The Computer Language
24.09 Benchmarks Game

regex-redux C# .NET #5 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 
   Regex-Redux by Josh Goldfoot
   order variants by execution time by Anthony Lloyd
*/

using System;
using System.Threading.Tasks;
using System.Text.RegularExpressions;

public static class regexredux
{
    static Regex regex(string re)
    {
        // Not compiled on .Net Core, hence poor benchmark results.
        return new Regex(re, RegexOptions.Compiled);
    }

    static string regexCount(string s, string r)
    {
        int c = 0;
        var m = regex(r).Match(s);
        while(m.Success) { c++; m = m.NextMatch(); }
        return r + " " + c;
    }

    public static void Main(string[] args)
    {
        var sequences = Console.In.ReadToEnd();
        var initialLength = sequences.Length;
        sequences = Regex.Replace(sequences, ">.*\n|\n", "");
        
        var magicTask = Task.Run(() =>
        {
            var newseq = regex("tHa[Nt]").Replace(sequences, "<4>");
            newseq = regex("aND|caN|Ha[DS]|WaS").Replace(newseq, "<3>");
            newseq = regex("a[NSt]|BY").Replace(newseq, "<2>");
            newseq = regex("<[^>]*>").Replace(newseq, "|");
            newseq = regex("\\|[^|][^|]*\\|").Replace(newseq, "-");
            return newseq.Length;
        });

        var variant2 = Task.Run(() => regexCount(sequences, "[cgt]gggtaaa|tttaccc[acg]"));
        var variant3 = Task.Run(() => regexCount(sequences, "a[act]ggtaaa|tttacc[agt]t"));
        var variant7 = Task.Run(() => regexCount(sequences, "agggt[cgt]aa|tt[acg]accct"));
        var variant6 = Task.Run(() => regexCount(sequences, "aggg[acg]aaa|ttt[cgt]ccct"));
        var variant4 = Task.Run(() => regexCount(sequences, "ag[act]gtaaa|tttac[agt]ct"));
        var variant5 = Task.Run(() => regexCount(sequences, "agg[act]taaa|ttta[agt]cct"));
        var variant1 = Task.Run(() => regexCount(sequences, "agggtaaa|tttaccct"));
        var variant9 = Task.Run(() => regexCount(sequences, "agggtaa[cgt]|[acg]ttaccct"));
        var variant8 = Task.Run(() => regexCount(sequences, "agggta[cgt]a|t[acg]taccct"));

        Console.Out.WriteLineAsync(variant1.Result);
        Console.Out.WriteLineAsync(variant2.Result);
        Console.Out.WriteLineAsync(variant3.Result);
        Console.Out.WriteLineAsync(variant4.Result);
        Console.Out.WriteLineAsync(variant5.Result);
        Console.Out.WriteLineAsync(variant6.Result);
        Console.Out.WriteLineAsync(variant7.Result);
        Console.Out.WriteLineAsync(variant8.Result);
        Console.Out.WriteLineAsync(variant9.Result);
        Console.Out.WriteLineAsync("\n"+initialLength+"\n"+sequences.Length);
        Console.Out.WriteLineAsync(magicTask.Result.ToString());
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET SDK 8.0.301
Host Version: 8.0.6
Commit: 3b8b000a0e

<OutputType>Exe
<TargetFramework>net8.0
<ImplicitUsings>enable
<Nullable>enable
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>false
<OptimizationPreference>Speed
<IlcInstructionSet>native


 Wed, 29 May 2024 21:44:08 GMT

MAKE:
cp regexredux.csharpcore-5.csharpcore Program.cs
cp Include/csharpcore/program.csproj .
mkdir obj
cp Include/csharpcore/project.assets.json ./obj
~/dotnet/dotnet build -c Release --use-current-runtime  	
  Determining projects to restore...
  Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj (in 845 ms).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(12,21): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
  program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net8.0/linux-x64/program.dll

Build succeeded.

/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(12,21): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
    1 Warning(s)
    0 Error(s)

Time Elapsed 00:00:05.65

7.64s to complete and log all make actions

COMMAND LINE:
 ./bin/Release/net8.0/linux-x64/program 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361