The Q6600
Benchmarks Game

regex-redux C# aot #5 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 
   Regex-Redux by Josh Goldfoot
   order variants by execution time by Anthony Lloyd
*/

using System;
using System.Threading.Tasks;
using System.Text.RegularExpressions;

public static class regexredux
{
    static Regex regex(string re)
    {
        // Not compiled on .Net Core, hence poor benchmark results.
        return new Regex(re, RegexOptions.Compiled);
    }

    static string regexCount(string s, string r)
    {
        int c = 0;
        var m = regex(r).Match(s);
        while(m.Success) { c++; m = m.NextMatch(); }
        return r + " " + c;
    }

    public static void Main(string[] args)
    {
        var sequences = Console.In.ReadToEnd();
        var initialLength = sequences.Length;
        sequences = Regex.Replace(sequences, ">.*\n|\n", "");
        
        var magicTask = Task.Run(() =>
        {
            var newseq = regex("tHa[Nt]").Replace(sequences, "<4>");
            newseq = regex("aND|caN|Ha[DS]|WaS").Replace(newseq, "<3>");
            newseq = regex("a[NSt]|BY").Replace(newseq, "<2>");
            newseq = regex("<[^>]*>").Replace(newseq, "|");
            newseq = regex("\\|[^|][^|]*\\|").Replace(newseq, "-");
            return newseq.Length;
        });

        var variant2 = Task.Run(() => regexCount(sequences, "[cgt]gggtaaa|tttaccc[acg]"));
        var variant3 = Task.Run(() => regexCount(sequences, "a[act]ggtaaa|tttacc[agt]t"));
        var variant7 = Task.Run(() => regexCount(sequences, "agggt[cgt]aa|tt[acg]accct"));
        var variant6 = Task.Run(() => regexCount(sequences, "aggg[acg]aaa|ttt[cgt]ccct"));
        var variant4 = Task.Run(() => regexCount(sequences, "ag[act]gtaaa|tttac[agt]ct"));
        var variant5 = Task.Run(() => regexCount(sequences, "agg[act]taaa|ttta[agt]cct"));
        var variant1 = Task.Run(() => regexCount(sequences, "agggtaaa|tttaccct"));
        var variant9 = Task.Run(() => regexCount(sequences, "agggtaa[cgt]|[acg]ttaccct"));
        var variant8 = Task.Run(() => regexCount(sequences, "agggta[cgt]a|t[acg]taccct"));

        Console.Out.WriteLineAsync(variant1.Result);
        Console.Out.WriteLineAsync(variant2.Result);
        Console.Out.WriteLineAsync(variant3.Result);
        Console.Out.WriteLineAsync(variant4.Result);
        Console.Out.WriteLineAsync(variant5.Result);
        Console.Out.WriteLineAsync(variant6.Result);
        Console.Out.WriteLineAsync(variant7.Result);
        Console.Out.WriteLineAsync(variant8.Result);
        Console.Out.WriteLineAsync(variant9.Result);
        Console.Out.WriteLineAsync("\n"+initialLength+"\n"+sequences.Length);
        Console.Out.WriteLineAsync(magicTask.Result.ToString());
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET Core SDK   3.1.201
Host Version: 3.1.3; Commit: 4a9f85e9f8
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
/p:PublishReadyToRun=true
/p:RuntimeIdentifier=linux-x64


Thu, 07 May 2020 22:06:26 GMT

MAKE:
cp regexredux.csharpaot-5.csharpaot Program.cs
cp Include/csharpcore/tmp.csproj .
mkdir obj
cp Include/csharpcore/project.assets.json ./obj
/usr/bin/dotnet publish -c Release --no-restore --no-self-contained /p:PublishReadyToRun=true /p:RuntimeIdentifier=linux-x64
Microsoft (R) Build Engine version 16.5.0+d4cbfca49 for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.

  tmp -> /home/dunham/benchmarksgame_quadcore/regexredux/tmp/bin/Release/netcoreapp3.1/linux-x64/tmp.dll
  tmp -> /home/dunham/benchmarksgame_quadcore/regexredux/tmp/bin/Release/netcoreapp3.1/linux-x64/publish/

9.56s to complete and log all make actions

COMMAND LINE:
./bin/Release/netcoreapp3.1/linux-x64/tmp 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361