N-bodies speedup (50%!)

| No Comments | No TrackBacks

So I've been playing with this n-bodies thing again, and while I haven't managed to get all that much closer to the likes of C, C++, or even the current Haskell entry. BUT! I did manage to make my program almost exactly twice as fast as it was.

I used a few simple tricks from the performance part of the haskell wiki, mainly the use of unsafeRead and unsafeWrite from the Arrays page, and the use of -fexcess-precision to speed up the Double computation.

For those interested in my progress (no one), here's my current code:

{-# OPTIONS -O2
-funbox-strict-fields
-fexcess-precision 
-fvia-C
-optc-mfpmath=sse
-optc-msse2#-}

{-# LANGUAGE BangPatterns          #-}
{-# LANGUAGE UnboxedTuples         #-}
{-# LANGUAGE MagicHash             #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module Main where

import System.Environment
import Data.Array.Base
import GHC.Exts
import GHC.ST
import GHC.Arr
import Control.Monad
import Control.Monad.ST
import Data.List
import qualified Data.Array.Unboxed as U
import Data.Array.Base
import Foreign.Storable
import Text.Printf
import Data.STRef


data Vec3 = V {-# UNPACK #-} !Double {-# UNPACK #-} !Double {-# UNPACK #-} !Double  -- deriving (Show)
data Body = B {  
                 pos ::  {-# UNPACK #-} !Vec3,
                 vel ::  {-# UNPACK #-} !Vec3,
                 mass :: {-# UNPACK #-} !Double
               } -- deriving (Show)


main = do
    n <- getArgs >>= \xs -> if null xs then return 1000 else (readIO.head) xs
    let (e,e') = runST $ do
            ps <- planets
            offset ps
            e1 <- energy ps
            advn n ps
            e2 <- energy ps
            return (e1,e2)
    prtnum e
    prtnum e'

advn n ps = replicateM_ n (advance deltat ps)



------------------

offset ps = do
    v  <- newSTRef (V 0 0 0)
    p  <- readPos ps 0
    ve <- readVel ps 0
    forM_ index1 $ \i -> do
        vel <- readVel ps i
        m <- readMass ps i
        modifySTRef v (.+. (m *. vel))
    modifySTRef v (./ solar_mass)
    v' <- readSTRef v
    writeVel ps 0 (ve .-. v')


advance dt ps = do
    forM_ index1 $ \i -> do
        -- (B p v m) <- readBody ps i
        p <- readPos ps i
        v <- readVel ps i
        m <- readMass ps i
        vvar <- newSTRef v

        forM_ (index2 i) $ \j -> do
            -- (B p' v' m') <- readBody ps j
            p' <- readPos ps j
            v' <- readVel ps j
            m' <- readMass ps j
            let dp = p .-. p'
                dst = mag dp
                magnit = dt / (dst * dst * dst)
                change = magnit *. dp
                (sca1,sca2) = (m' *. change, m *. change)
            modifySTRef vvar (.-. sca1)
            writeVel ps j (v' .+. sca2)
        vvar'@(V nx ny nz) <- readSTRef vvar
        writeVel' ps i nx ny nz
        writePos ps i $ move' dt (B p vvar' m)

index1 = [0..numPs]
-- index1 = map (*7) [0..numPs]
index2 i = [(i+1)..numPs]
-- index2 = map (\x -> (x*7,x)) [0..numPs]
-- index3 = tail . tails $ index1

energy ps = do
    e <- newSTRef (0 :: Double)

    forM_ index1 $ \i -> do
        b1 <- readBody ps i
        modifySTRef e ((+) (kinetic b1))

        forM_ (index2 i) $ \j -> do
            b2 <- readBody ps j
            let x = potential b1 b2
            modifySTRef e (subtract x)
    readSTRef e



move dt (B p v m) = B (p .+. dt *. v) v m
move' dt (B p v m) = p .+. dt *. v


-----------------------

writePos :: STUArray s Int Double -> Int -> Vec3  -> ST s ()
writePos !arr !i !(V x y z) =
    case i*7 of
        i'@(I# n) ->
            do
                unsafeWrite arr i'              x
                unsafeWrite arr (I# (n +# 1#))  y
                unsafeWrite arr (I# (n +# 2#))  z


writePos' :: STUArray s Int Double -> Int -> Double -> Double -> Double -> ST s ()
writePos' !arr !i !x !y !z =
    case i*7 of
        i'@(I# n) ->
            do
                unsafeWrite arr i'              x
                unsafeWrite arr (I# (n +# 1#))  y
                unsafeWrite arr (I# (n +# 2#))  z


writeVel :: STUArray s Int Double -> Int -> Vec3  -> ST s ()
writeVel !arr !i !(V vx vy vz) =
    case i*7 of
        i'@(I# n) ->
            do
                unsafeWrite arr (I# (n +# 3#)) vx
                unsafeWrite arr (I# (n +# 4#)) vy
                unsafeWrite arr (I# (n +# 5#)) vz

writeVel' :: STUArray s Int Double -> Int -> Double -> Double -> Double -> ST s ()
writeVel' !arr !i !vx !vy !vz =
    case i*7 of
        i'@(I# n) ->
            do
                unsafeWrite arr (I# (n +# 3#)) vx
                unsafeWrite arr (I# (n +# 4#)) vy
                unsafeWrite arr (I# (n +# 5#)) vz

-- writeMass :: STUArray s Int Double -> Int -> Double  -> ST s ()
-- writeMass arr i m =
--     do
--         unsafeWrite arr (i+6) m

writeBody :: STUArray s Int Double -> Int -> Body  -> ST s ()
writeBody !arr !i !(B !(V x y z) !(V vx vy vz) m) =
    case i*7 of
        i'@(I# n) ->
            do
                unsafeWrite arr (i')            x
                unsafeWrite arr (I# (n +# 1#))  y
                unsafeWrite arr (I# (n +# 2#))  z
                unsafeWrite arr (I# (n +# 3#))  vx
                unsafeWrite arr (I# (n +# 4#))  vy
                unsafeWrite arr (I# (n +# 5#))  vz
                unsafeWrite arr (I# (n +# 6#))  m


readPos :: STUArray s Int Double -> Int -> ST s Vec3
readPos !arr !i =
    case i*7 of
        i'@(I# n) ->
            do
                x <- unsafeRead arr i'
                y <- unsafeRead arr (I# (n +# 1#))
                z <- unsafeRead arr (I# (n +# 2#))
                return (V x y z)

readVel :: STUArray s Int Double -> Int -> ST s Vec3
readVel !arr !i =
    case i*7 of
        i'@(I# n) ->
            do
                vx <- unsafeRead arr (I# (n +# 3#))
                vy <- unsafeRead arr (I# (n +# 4#))
                vz <- unsafeRead arr (I# (n +# 5#))
                return (V vx vy vz)

readMass :: STUArray s Int Double -> Int -> ST s Double
readMass !arr !i = 
    case i*7 of
        i'@(I# n) -> unsafeRead arr (i*7+6)


readBody :: STUArray s Int Double -> Int -> ST s Body
readBody !arr !i =
    case i*7 of
        i'@(I# n) ->
            do
                x   <- unsafeRead arr i'
                y   <- unsafeRead arr (I# (n +# 1#))
                z   <- unsafeRead arr (I# (n +# 2#))
                vx  <- unsafeRead arr (I# (n +# 3#))
                vy  <- unsafeRead arr (I# (n +# 4#))
                vz  <- unsafeRead arr (I# (n +# 5#))
                m   <- unsafeRead arr (I# (n +# 6#))
                return (B (V x y z) (V vx vy vz) m)

-----------------------
solar_mass, deltat, days_per_year :: Double
days_per_year = 365.24
dp = days_per_year
solar_mass    = 4 * pi * pi
deltat       = 0.01
numPs       = length planetsList - 1

-- planets'' = newArray (0,(numPs+1)*7) 0 :: ST s (STUArray s Int Double)
    -- (B (V 0 0 0) (V 0 0 0) 1)

-- ms = U.listArray (0,numPs+1) (map mass planetsList)

planets = do
    arr <- newArray (0,(numPs+1)*7) 0 :: ST s (STUArray s Int Double)
    -- unsafeReplaceUArray arr (zip index1 planetsList)
    forM_ (zip index1 planetsList) $ \(i,p) -> do
            -- unsafeIOToST (print (i, pos p))
            writeBody arr i p
    return arr



planetsList =
    [B  (V  0.0
            0.0
            0.0) 
        (V  0.0
            0.0
            0.0) 
        solar_mass,

     B  (V  (4.84143144246472090e+00)
            (-1.16032004402742839e+00)
            (-1.03622044471123109e-01)) 
        (V  (1.66007664274403694e-03*dp) 
            (7.69901118419740425e-03*dp)
            ((-6.90460016972063023e-05)*dp)) 
        (9.54791938424326609e-04 * solar_mass),

     B  (V  (8.34336671824457987e+00)
            (4.12479856412430479e+00)
            (-4.03523417114321381e-01))
        (V  ((-2.76742510726862411e-03)*dp)
            (4.99852801234917238e-03*dp)
            (2.30417297573763929e-05*dp)) 
        (2.85885980666130812e-04 * solar_mass),

     B  (V  (1.28943695621391310e+01)
            (-1.51111514016986312e+01)
            (-2.23307578892655734e-01)) 
        (V  (2.96460137564761618e-03*dp)
            (2.37847173959480950e-03*dp)
            ((-2.96589568540237556e-05)*dp)) 
        (4.36624404335156298e-05 * solar_mass),

     B  (V  (1.53796971148509165e+01)
            (-2.59193146099879641e+01)
            (1.79258772950371181e-01)) 
        (V  (2.68067772490389322e-03*dp)
            (1.62824170038242295e-03*dp)
            ((-9.51592254519715870e-05)*dp)) 
        (5.15138902046611451e-05 * solar_mass)
    ]


-----------------------

infix 4 .-.
infix 4 .+.
-- infix 5 .*.
infixr 5 *.
infixl 5 ./

{-# INLINE (.+.) #-}
{-# INLINE (.-.) #-}
-- {-# INLINE (.*.) #-}
V x y z .+. V x' y' z' = V (x+x') (y+y') (z+z')  
V (D# x) (D# y) (D# z) .-. V (D# x') (D# y') (D# z') = 
    V (D# (x -## x'))
      (D# (y -## y'))
      (D# (z -## z'))  
-- V x y z .*. V x' y' z' = V (x*x') (y*y') (z*z')

{-# INLINE (*.) #-}
n *. (V x y z) = V (n*x) (n*y) (n*z)
{-# INLINE (./) #-}
V x y z ./ n = V (x/n) (y/n) (z/n)

-- {-# INLINE momentum #-}
momentum :: Body -> Vec3
momentum (B _ v m) = m *. v

{-# INLINE kinetic #-}
kinetic x =  0.5 * mass x * (mag2.vel) x

-- kinetic'


potential a b =
    let dst = dist' a b
    in --if dst <= 0
        --then 0
        --else 
        (mass a * mass b) / dist' a b

{-# INLINE mag #-}
mag :: Vec3 -> Double
mag = sqrt . mag2

{-# INLINE mag2 #-}
mag2 (V x y z) = x*x + y*y + z*z

{-# INLINE dist #-}
dist v1 v2 = sqrt $ dist2 v1 v2

{-# INLINE dist2 #-}
dist2 v1 v2 = mag2 $ v1 .-. v2

{-# INLINE dist' #-}
dist' b1 b2 = dist (pos b1) (pos b2)

prtnum :: Double -> IO ()
prtnum x = putStrLn $ printf "%.9f" x

-------------------- tests

thing2 pla = do
    arr <- pla
    unsafeIOToST (putStrLn "thing2")
    forM_ [0..numPs*7] $ \i -> do
        p <- readArray arr i
        unsafeIOToST (print (i,p))

thing3 = do
    arr <- planets
    offset arr
    thing2 planets
    unsafeIOToST (putStrLn "\n")
    -- thing2 arr
    e <- energy arr
    return e

No TrackBacks

TrackBack URL: http://axman6.homeip.net/cgi-bin/mt/mt-tb.cgi/19

Leave a comment

February 2009

Sun Mon Tue Wed Thu Fri Sat
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
OpenID accepted here Learn more about OpenID
Powered by Movable Type 4.23-en

About this Entry

This page contains a single entry by Alex Mason published on February 3, 2009 11:45 PM.

STUArray woes was the previous entry in this blog.

N-bodies evolution is the next entry in this blog.

Find recent content on the main index or look in the archives to find all content.