The Computer Language
24.11 Benchmarks Game

mandelbrot C# aot program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   AVX2 version written from scratch by Chris Lomont
*/

using System;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Threading.Tasks;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

public class MandelBrot
{
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static unsafe byte GetByte(double* pCrb, double Ciby)
    {
        var res = 0;
        for (var i=0; i<8; i+=2)
        {
            var vCrbx = Unsafe.Read<Vector<double>>(pCrb+i);
            var vCiby = new Vector<double>(Ciby);
            var Zr = vCrbx;
            var Zi = vCiby;
            int b = 0, j = 49;
            do
            {
                for (int counter = 0; counter < 7; counter++)
                {
                    var nZr = Zr * Zr - Zi * Zi + vCrbx;
                    var ZrZi = Zr * Zi;
                    Zi = ZrZi + ZrZi + vCiby;
                    Zr = nZr;
                    j--;
                }

                var t = Zr * Zr + Zi * Zi;
                if (t[0]>4.0) { b|=2; if (b==3) break; }
                if (t[1]>4.0) { b|=1; if (b==3) break; }
            } while (j>0);
            res = (res << 2) + b;
        }
        return (byte)(res^-1);
    }

    public static unsafe void MainOld(string[] args)
    {
        var size = args.Length==0 ? 200 : int.Parse(args[0]);
        Console.Out.WriteAsync(String.Concat("P4\n",size," ",size,"\n"));
        var Crb = new double[size+2];
        var lineLength = size >> 3;
        var data = new byte[size * lineLength];
        fixed (double* pCrb = &Crb[0])
        fixed (byte* pdata = &data[0])
        {
            var value = new Vector<double>(
                  new double[] {0,1,0,0,0,0,0,0}
            );
            var invN = new Vector<double>(2.0/size);
            var onePtFive = new Vector<double>(1.5);
            var step = new Vector<double>(2);
            for (var i=0; i<size; i+=2)
            {
                Unsafe.Write(pCrb+i, value*invN-onePtFive);
                value += step;
            }
            var _Crb = pCrb;
            var _pdata = pdata;
            Parallel.For(0, size, y =>
            {
                var Ciby = _Crb[y]+0.5;
                for (var x=0; x<lineLength; x++)
                {
                    _pdata[y*lineLength+x] = GetByte(_Crb+x*8, Ciby);
                }
            });
            Console.OpenStandardOutput().Write(data, 0, data.Length);
        }
    }

    // x86 version, AVX2
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static byte Process8(double x, double y, double dx)
    {
        // initial x coords
        var x01 = Vector256.Create(x+0*dx,x+1*dx,x+2*dx,x+3*dx);
        var x02 = Vector256.Create(x+4*dx,x+5*dx,x+6*dx,x+7*dx);
        
        // initial y coords
        var y0  = Vector256.Create(y); 
        
        Vector256<double> x1 = x01,y1 = y0; // current iteration 1
        Vector256<double> x2 = x02,y2 = y0; // current iteration 2

        Vector256<double> four = Vector256.Create(4.0); // 4 in each slot        

        var pass = 0;

        // temp space, C# requires init.
        Vector256<double> 
            x12=Vector256<double>.Zero,
            y12=Vector256<double>.Zero,
            x22=Vector256<double>.Zero,
            y22=Vector256<double>.Zero;

        // bit masks for results
        uint res1=1,res2=1;

        while (pass < 49 && (res1 != 0 || res2 != 0))
        {

            // do several between checks a time like other code
            for (var p = 0 ; p < 7; ++p)
            {
                // unroll loop 2x to decrease register stalls

                // squares x*x and y*y
                x12 = Avx2.Multiply(x1,x1);
                y12 = Avx2.Multiply(y1,y1);
                x22 = Avx2.Multiply(x2,x2);
                y22 = Avx2.Multiply(y2,y2);

                // mixed products x*y
                var xy1 = Avx2.Multiply(x1, y1);
                var xy2 = Avx2.Multiply(x2, y2);

                // diff of squares x*x - y*y
                var ds1 = Avx2.Subtract(x12, y12);
                var ds2 = Avx2.Subtract(x22, y22);

                // 2*x*y
                xy1 = Avx2.Add(xy1, xy1);
                xy2 = Avx2.Add(xy2, xy2);

                // next iters
                y1 = Avx2.Add(xy1, y0);
                y2 = Avx2.Add(xy2, y0);
                x1 = Avx2.Add(ds1, x01);
                x2 = Avx2.Add(ds2, x02);
            }
            pass+=7;

            // numbers overflow, which gives an Infinity or NaN, which, 
            // when compared N < 4, results in false, which is what we want

            // sum of squares x*x + y*y, compare to 4 (escape mandelbrot)
            var ss1  = Avx2.Add(x12, y12);
            var ss2  = Avx2.Add(x22, y22);

            // compare - puts all 0 in reg if false, else all 1 (=NaN bitwise)
            // when each register is 0, then all points escaped, so exit
            var cmp1 = Avx.Compare(ss1,four,
                    FloatComparisonMode.OrderedLessThanOrEqualNonSignaling);
            var cmp2 = Avx.Compare(ss2,four,
                    FloatComparisonMode.OrderedLessThanOrEqualNonSignaling);

            // take top bit from each byte
            res1 = (uint)Avx2.MoveMask(Vector256.AsByte(cmp1));
            res2 = (uint)Avx2.MoveMask(Vector256.AsByte(cmp2));
        }        

        // can make a mask of bits in any order, which is the +7, +6, .., +1, +0
        res1 &= 
            (1<<( 0+7)) |
            (1<<( 8+6)) |
            (1<<(16+5)) |
            (1<<(24+4));
        res2 &= 
            (1<<( 0+3)) |
            (1<<( 8+2)) |
            (1<<(16+1)) |
            (1<<(24+0));

        var res = res1|res2;
        res |= res>>16;
        res |= res>>8;
        return (byte)(res);
    }

    static void Test(byte [] data)
    {
        var filename = "mandelbrot-output.txt";
        if (!File.Exists(filename))
        {
            System.Console.WriteLine($"Cannot open file {filename}");
            return;
        }
        var len = data.Length;
        var truth = File.ReadAllBytes(filename);
        Array.Copy(truth,truth.Length-len,truth,0,len);
        for (var i = 0; i < len; ++i)
        {
            if (data[i] != truth[i])
            {
                var bits = data[i]^truth[i];
                System.Console.Write($"ERROR: Mismatch {i}: {data[i]:X2} != {truth[i]:X2}, ^={bits:X2},");
                var x = ((i*8)%200);
                var y = (i*8)/200;
                while (bits != 0)
                {
                    if ((bits&1) != 0)
                        System.Console.Write($"({x},{y}), ");
                    ++x;
                    bits>>=1;                    
                }
                System.Console.WriteLine();

            }
        }

    }

    static byte Process8a(double x0_, double y0, double delta)
    {
        var ans = 0;
        for (var bit = 0; bit < 8; ++bit)
        {

            var x0 = x0_+delta*bit;
            double x1 = x0, y1 = y0;
            for (var pass = 0; pass < 49; ++pass)
            {
                var xt = x1*x1-y1*y1+x0;
                y1 = 2*x1*y1+y0;
                x1 = xt;
            }
            ans <<= 1;
            if (x1*x1 + y1*y1 <= 4.0)
                ans |= 1;
            x0 += delta;
        }
        return (byte)ans;
    }

    public static void MainNew(string[] args)
    {

        var size = args.Length<2 ? 200 : int.Parse(args[1]);
        Console.Out.WriteAsync(String.Concat("P4\n",size," ",size,"\n"));
        var lineLength = size >> 3;
        var data = new byte[size * lineLength];

        // step size
        var delta = 2.0/size; // (0.5 - (-1.5))/size;

        Parallel.For(0, size, y =>
        {
            var yd = y*delta-1;
            for (var x=0; x<lineLength; x++)
            {
                var xd = (x*8)*delta-1.5;
                data[y*lineLength+x] = Process8(xd,yd,delta);
            }
        }
        );
        //if (size == 200)
        //    Test(data);
        Console.OpenStandardOutput().Write(data, 0, data.Length);
    }


    public static void Main(string[] args)
    {
        if (System.Runtime.Intrinsics.X86.Avx2.IsSupported)
            MainNew(args);
        else
            MainOld(args);
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET SDK 9.0.100
Host Version: 9.0.0
Commit: 9d5a6a9aa4

<OutputType>Exe
<TargetFramework>net9.0
<ImplicitUsings>enable
<Nullable>enable
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>true
<OptimizationPreference>Speed
<IlcInstructionSet>native


 Thu, 14 Nov 2024 00:43:26 GMT

MAKE:
cp mandelbrot.csharpaot Program.cs
cp Include/csharpaot/program.csproj .
mkdir obj
cp Include/csharpaot/project.assets.json ./obj
/opt/src/dotnet-sdk-9.0.100/dotnet publish
  Determining projects to restore...
  Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/mandelbrot/tmp/program.csproj (in 995 ms).
  program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/mandelbrot/tmp/bin/Release/net9.0/linux-x64/program.dll
  Generating native code
  program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/mandelbrot/tmp/bin/Release/net9.0/linux-x64/publish/

25.26s to complete and log all make actions

COMMAND LINE:
 ./bin/Release/net9.0/linux-x64/native/program 16000

(BINARY) PROGRAM OUTPUT NOT SHOWN