The Q6600
Benchmarks Game

n-body Rust #5 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by Cristi Cobzarenco
// vectorized by Felix Schütt

#![cfg(target_feature="sse2")]

use std::ops::{Add, Sub, Mul};
#[cfg(target_arch="x86")]
use std::arch::x86::*;
#[cfg(target_arch="x86_64")]
use std::arch::x86_64::*;

const PI: f64 = 3.141592653589793;
const SOLAR_MASS: f64 = 4.0 * PI * PI;
const DAYS_PER_YEAR: f64 = 365.24;
const N_BODIES: usize = 5;
const N_PAIRS: usize = N_BODIES * (N_BODIES - 1) / 2;

const BODIES: [Planet; N_BODIES] = [
    // Sun
    Planet {
        pos: Vec3([0.0, 0.0, 0.0, 0.0]),
        vel: Vec3([0.0, 0.0, 0.0, 0.0]),
        mass: SOLAR_MASS,
    },
    // Jupiter
    Planet {
        pos: Vec3([4.84143144246472090e+00,
                   -1.16032004402742839e+00,
                   -1.03622044471123109e-01,
                   0.0]),
        vel: Vec3([1.66007664274403694e-03 * DAYS_PER_YEAR,
                   7.69901118419740425e-03 * DAYS_PER_YEAR,
                   -6.90460016972063023e-05 * DAYS_PER_YEAR,
                   0.0]),
        mass: 9.54791938424326609e-04 * SOLAR_MASS,
    },
    // Saturn
    Planet {
        pos: Vec3([8.34336671824457987e+00,
                   4.12479856412430479e+00,
                   -4.03523417114321381e-01,
                   0.0]),
        vel: Vec3([-2.76742510726862411e-03 * DAYS_PER_YEAR,
                    4.99852801234917238e-03 * DAYS_PER_YEAR,
                    2.30417297573763929e-05 * DAYS_PER_YEAR,
                    0.0]),
        mass: 2.85885980666130812e-04 * SOLAR_MASS,
    },
    // Uranus
    Planet {
        pos: Vec3([1.28943695621391310e+01,
                   -1.51111514016986312e+01,
                   -2.23307578892655734e-01,
                   0.0]),
        vel: Vec3([2.96460137564761618e-03 * DAYS_PER_YEAR,
                   2.37847173959480950e-03 * DAYS_PER_YEAR,
                   -2.96589568540237556e-05 * DAYS_PER_YEAR,
                   0.0]),
        mass: 4.36624404335156298e-05 * SOLAR_MASS,
    },
    // Neptune
    Planet {
        pos: Vec3([1.53796971148509165e+01,
                   -2.59193146099879641e+01,
                   1.79258772950371181e-01,
                   0.0]),
        vel: Vec3([2.68067772490389322e-03 * DAYS_PER_YEAR,
                   1.62824170038242295e-03 * DAYS_PER_YEAR,
                   -9.51592254519715870e-05 * DAYS_PER_YEAR,
                   0.0]),
        mass: 5.15138902046611451e-05 * SOLAR_MASS,
    },
];

/// A 3d Vector type with oveloaded operators to improve readability.
#[repr(align(16))]
#[derive(Debug, Copy, Clone)]
struct Vec3(pub [f64;4]);

impl Vec3 {
    fn zero() -> Self { Vec3([0.0; 4]) }

    fn norm(&self) -> f64 { self.squared_norm().sqrt() }

    fn squared_norm(&self) -> f64 {

        let vec1 = unsafe { _mm_load_pd(&self.0[0]) };
        let vec2 = unsafe { _mm_load_pd(&self.0[2]) };
        
        let vec1 = unsafe { _mm_mul_pd(vec1, vec1) };
        let vec2 = unsafe { _mm_mul_pd(vec2, vec2) };
        
        let vec1 = unsafe { _mm_add_pd(vec1, vec2) };

        let lo = unsafe { _mm_cvtsd_f64(vec1) };
        let hi = unsafe { _mm_cvtsd_f64(_mm_unpackhi_pd(vec1, vec1)) };

        lo + hi
    }
}

macro_rules! simd_op {
    ($a:expr, $b:expr, $op:ident) => ({

        let vec1 = unsafe { _mm_load_pd(&$a.0[0]) };
        let vec2 = unsafe { _mm_load_pd(&$b.0[0]) };

        let combined_1 = unsafe { $op(vec1, vec2)};
        
        let vec3 = unsafe { _mm_load_pd(&$a.0[2]) };
        let vec4 = unsafe { _mm_load_pd(&$b.0[2]) };
        let combined_2 = unsafe { $op(vec3, vec4)};

        let mut target = [0.0, 0.0, 0.0, 0.0];

        unsafe { _mm_store_pd(&mut target[0], combined_1) };
        unsafe { _mm_store_pd(&mut target[2], combined_2) };

        target
    })
}

impl Add for Vec3 {
    type Output = Self;
    fn add(self, rhs: Self) -> Self {
        Vec3(simd_op!(&self, &rhs, _mm_add_pd))
    }
}

impl Sub for Vec3 {
    type Output = Self;
    fn sub(self, rhs: Self) -> Self {
        Vec3(simd_op!(&self, &rhs, _mm_sub_pd))
    }
}

impl Mul<f64> for Vec3 {
    type Output = Self;
    fn mul(self, rhs: f64) -> Self {
        
        let self_lo = unsafe { _mm_load_pd(&self.0[0]) };
        let self_hi = unsafe { _mm_load_pd(&self.0[2]) };
        
        let scalar = unsafe { _mm_set1_pd(rhs) };
        
        let self_lo = unsafe { _mm_mul_pd(self_lo, scalar)};
        let self_hi = unsafe { _mm_mul_pd(self_hi, scalar)};

        let mut target = [0.0, 0.0, 0.0, 0.0];

        unsafe { _mm_store_pd(&mut target[0], self_lo) };
        unsafe { _mm_store_pd(&mut target[2], self_hi) };

        Vec3(target)
    }
}

#[derive(Debug, Clone, Copy)]
struct Planet {
    pos: Vec3,
    vel: Vec3,
    mass: f64,
}

/// Computes all pairwise position differences between the planets.
fn pairwise_diffs(bodies: &[Planet; N_BODIES], diff: &mut [Vec3; N_PAIRS]) {
    let mut bodies = bodies.iter();
    let mut diff = diff.iter_mut();
    while let Some(bi) = bodies.next() {
        for bj in bodies.clone() {
            *diff.next().unwrap() = bi.pos - bj.pos;
        }
    }
}

/// Computes the magnitude of the force between each pair of planets.
fn magnitudes(diff: &[Vec3; N_PAIRS], dt: f64, mag: &mut [f64; N_PAIRS]) {
    for (mag, diff) in mag.iter_mut().zip(diff.iter()) {
        let d2 = diff.squared_norm();
        *mag = dt / (d2 * d2.sqrt());
    }
}

/// Updates the velocities of the planets by computing their gravitational
/// accelerations and performing one step of Euler integration.
fn update_velocities(bodies: &mut [Planet; N_BODIES], dt: f64,
                     diff: &mut [Vec3; N_PAIRS], mag: &mut [f64; N_PAIRS]) {
    pairwise_diffs(bodies, diff);
    magnitudes(&diff, dt, mag);

    let mut bodies = &mut bodies[..];
    let mut mag = mag.iter();
    let mut diff = diff.iter();
    while let Some(bi) = shift_mut_ref(&mut bodies) {
        for bj in bodies.iter_mut() {
            let diff = *diff.next().unwrap();
            let mag = *mag.next().unwrap();
            bi.vel = bi.vel - diff * (bj.mass * mag);
            bj.vel = bj.vel + diff * (bi.mass * mag);
        }
    }
}

/// Advances the solar system by one timestep by first updating the
/// velocities and then integrating the positions using the updated velocities.
///
/// Note: the `diff` & `mag` arrays are effectively scratch space. They're
/// provided as arguments to avoid re-zeroing them every time `advance` is
/// called.
fn advance(bodies: &mut [Planet; N_BODIES], dt: f64,
           diff: &mut [Vec3; N_PAIRS], mag: &mut [f64; N_PAIRS]) {
    update_velocities(bodies, dt, diff, mag);
    for body in bodies.iter_mut() {
        body.pos = body.pos + body.vel * dt;
    }
}

/// Computes the total energy of the solar system.
fn energy(bodies: &[Planet; N_BODIES]) -> f64 {
    let mut e = 0.0;
    let mut bodies = bodies.iter();
    while let Some(bi) = bodies.next() {
        e += bi.vel.squared_norm() * bi.mass / 2.0
           - bi.mass * bodies.clone()
                             .map(|bj| bj.mass / (bi.pos - bj.pos).norm())
                             .fold(0.0, |a, b| a + b);
    }
    e
}

/// Offsets the sun's velocity to make the overall momentum of the system zero.
fn offset_momentum(bodies: &mut [Planet; N_BODIES]) {
    let p = bodies.iter().fold(Vec3::zero(), |v, b| v + b.vel * b.mass);
    bodies[0].vel = p * (-1.0 / bodies[0].mass);
}

fn main() {
    let n = std::env::args_os().nth(1)
        .and_then(|s| s.into_string().ok())
        .and_then(|n| n.parse().ok())
        .unwrap_or(1000);
    let mut bodies = BODIES;
    let mut diff = [Vec3::zero(); N_PAIRS];
    let mut mag = [0.0f64; N_PAIRS];

    offset_momentum(&mut bodies);
    println!("{:.9}", energy(&bodies));

    for _ in 0..n {
        advance(&mut bodies, 0.01, &mut diff, &mut mag);
    }

    println!("{:.9}", energy(&bodies));
}

/// Pop a mutable reference off the head of a slice, mutating the slice to no
/// longer contain the mutable reference.
fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> {
    if r.len() == 0 { return None }
    let tmp = std::mem::replace(r, &mut []);
    let (h, t) = tmp.split_at_mut(1);
    *r = t;
    Some(&mut h[0])
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
rustc 1.44.0 (49cae5576 2020-06-01)
LLVM version: 9.0


Fri, 05 Jun 2020 20:35:00 GMT

MAKE:
/opt/src/rust-1.44.0/bin/rustc -C opt-level=3 -C target-cpu=core2 -C lto -C codegen-units=1 -C llvm-args='-unroll-threshold=500' nbody.rs -o nbody.rust-5.rust_run

15.26s to complete and log all make actions

COMMAND LINE:
./nbody.rust-5.rust_run 50000000

PROGRAM OUTPUT:
-0.169075164
-0.169059907