source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
contributed by Mark C. Lewis
modified slightly by Chad Whipkey
converted from java to c++,added sse support, by Branimir Maksimovic
modified by Vaclav Zeman
modified by Vaclav Haisman to use explicit SSE2 intrinsics and constexpr
*/
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <array>
#include <immintrin.h>
constexpr double PI = 3.141592653589793;
constexpr double SOLAR_MASS = 4 * PI * PI;
constexpr double DAYS_PER_YEAR = 365.24;
class Body {
public:
alignas(16) double x, y, z;
/*double filler;*/
alignas(16) double vx, vy, vz;
alignas(16) double mass;
Body() = default;
Body& offsetMomentum(double px, double py, double pz){
vx = -px / SOLAR_MASS;
vy = -py / SOLAR_MASS;
vz = -pz / SOLAR_MASS;
return *this;
}
};
constexpr Body jupiter(){
return Body {
/*x =*/ 4.84143144246472090e+00,
/*y =*/ -1.16032004402742839e+00,
/*z =*/ -1.03622044471123109e-01,
/*vx =*/ 1.66007664274403694e-03 * DAYS_PER_YEAR,
/*vy =*/ 7.69901118419740425e-03 * DAYS_PER_YEAR,
/*vz =*/ -6.90460016972063023e-05 * DAYS_PER_YEAR,
/*mass =*/ 9.54791938424326609e-04 * SOLAR_MASS
};
}
constexpr Body saturn(){
return Body {
/*x = */8.34336671824457987e+00,
/*y = */4.12479856412430479e+00,
/*z = */-4.03523417114321381e-01,
/*vx = */-2.76742510726862411e-03 * DAYS_PER_YEAR,
/*vy = */4.99852801234917238e-03 * DAYS_PER_YEAR,
/*vz = */2.30417297573763929e-05 * DAYS_PER_YEAR,
/*mass =*/ 2.85885980666130812e-04 * SOLAR_MASS
};
}
constexpr Body uranus(){
return Body {
/*x =*/ 1.28943695621391310e+01,
/*y =*/ -1.51111514016986312e+01,
/*z =*/ -2.23307578892655734e-01,
/*vx = */2.96460137564761618e-03 * DAYS_PER_YEAR,
/*vy = */2.37847173959480950e-03 * DAYS_PER_YEAR,
/*vz = */-2.96589568540237556e-05 * DAYS_PER_YEAR,
/*mass =*/ 4.36624404335156298e-05 * SOLAR_MASS
};
}
constexpr Body neptune(){
return Body {
/*x = */ 1.53796971148509165e+01,
/*y = */ -2.59193146099879641e+01,
/*z = */ 1.79258772950371181e-01,
/*vx = */ 2.68067772490389322e-03 * DAYS_PER_YEAR,
/*vy = */ 1.62824170038242295e-03 * DAYS_PER_YEAR,
/*vz = */-9.51592254519715870e-05 * DAYS_PER_YEAR,
/*mass = */ 5.15138902046611451e-05 * SOLAR_MASS
};
}
constexpr Body sun(){
return Body {
/*x = */0,
/*y = */0,
/*z = */0,
/*vx = */0,
/*vy = */0,
/*vz = */0,
/*mass = */ SOLAR_MASS
};
}
class NBodySystem {
private:
std::array<Body, 5> bodies;
static constexpr int bodies_size = 5;
public:
NBodySystem()
: bodies {{
sun(),
jupiter(),
saturn(),
uranus(),
neptune()
}}
{
double px = 0.0;
double py = 0.0;
double pz = 0.0;
for(unsigned i=0; i != bodies_size; ++i) {
px += bodies[i].vx * bodies[i].mass;
py += bodies[i].vy * bodies[i].mass;
pz += bodies[i].vz * bodies[i].mass;
}
bodies[0].offsetMomentum(px,py,pz);
}
void advance(double dt) {
constexpr unsigned N = ((bodies_size-1)*bodies_size)/2;
struct alignas(16) R {
double dx,dy,dz;
};
static R r[1000];
alignas(32) static double mag[1000];
for(int i=0,k=0; i != bodies_size-1; ++i) {
Body const & iBody = bodies[i];
for(int j=i+1; j != bodies_size; ++j,++k) {
r[k].dx = iBody.x - bodies[j].x;
r[k].dy = iBody.y - bodies[j].y;
r[k].dz = iBody.z - bodies[j].z;
}
}
for(int i=0; i != N; i+=2) {
__m128d const dx = _mm_loadh_pd(_mm_loadl_pd(dx,&r[i].dx),&r[i+1].dx);
__m128d const dy = _mm_loadh_pd(_mm_loadl_pd(dy,&r[i].dy),&r[i+1].dy);
__m128d const dz = _mm_loadh_pd(_mm_loadl_pd(dz,&r[i].dz),&r[i+1].dz);
//__m128d dSquared = dx*dx + dy*dy + dz*dz;
__m128d const dSquared = _mm_add_pd(
_mm_add_pd(_mm_mul_pd(dx, dx), _mm_mul_pd(dy, dy)),
_mm_mul_pd(dz, dz));
__m128d distance =
_mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(dSquared)));
for(int j=0;j!=2;++j)
{
distance = _mm_sub_pd(
_mm_mul_pd(distance, _mm_set1_pd(1.5)),
_mm_mul_pd(
_mm_mul_pd(
_mm_mul_pd(_mm_set1_pd(0.5), dSquared),
distance),
_mm_mul_pd(distance, distance)
)
);
}
__m128d const dmag = _mm_mul_pd(
_mm_div_pd(_mm_set1_pd(dt), dSquared), distance);
_mm_store_pd(&mag[i],dmag);
}
for(int i=0,k=0; i != bodies_size-1; ++i) {
Body& iBody = bodies[i];
for(int j=i+1; j != bodies_size; ++j,++k) {
Body & jBody = bodies[j];
double const jmm = jBody.mass * mag[k];
iBody.vx -= r[k].dx * jmm;
iBody.vy -= r[k].dy * jmm;
iBody.vz -= r[k].dz * jmm;
double const imm = iBody.mass * mag[k];
jBody.vx += r[k].dx * imm;
jBody.vy += r[k].dy * imm;
jBody.vz += r[k].dz * imm;
}
}
for (int i = 0; i != bodies_size; ++i) {
Body& iBody = bodies[i];
iBody.x += dt * iBody.vx;
iBody.y += dt * iBody.vy;
iBody.z += dt * iBody.vz;
}
}
double energy(){
double e = 0.0;
for (int i=0; i != bodies_size; ++i) {
Body const iBody = bodies[i];
e += 0.5 * iBody.mass *
( iBody.vx * iBody.vx
+ iBody.vy * iBody.vy
+ iBody.vz * iBody.vz );
for (int j=i+1; j != bodies_size; ++j) {
Body const & jBody = bodies[j];
double const dx = iBody.x - jBody.x;
double const dy = iBody.y - jBody.y;
double const dz = iBody.z - jBody.z;
double const distance = sqrt(dx*dx + dy*dy + dz*dz);
e -= (iBody.mass * jBody.mass) / distance;
}
}
return e;
}
};
int main(int argc, char** argv) {
int n = atoi(argv[1]);
NBodySystem bodies;
printf("%.9f\n", bodies.energy());
for (int i=0; i<n; ++i)
bodies.advance(0.01);
printf("%.9f\n", bodies.energy());
}