The Computer Language
24.04 Benchmarks Game

regex-redux C# .NET program

source code

/* The Computer Language Benchmarks Game
 * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 * 
 * regex-dna program contributed by Isaac Gouy 
*/

using System;
using System.Text.RegularExpressions;

class regexredux
{
   static void Main(string[] args){
                   
      // read FASTA sequence
      String sequence = Console.In.ReadToEnd();
      int initialLength = sequence.Length;

      // remove FASTA sequence descriptions and new-lines
      Regex r = new Regex(">.*\n|\n", RegexOptions.Compiled);
      sequence = r.Replace(sequence,"");
      int codeLength = sequence.Length;


      // regex match
      string[] variants = {
         "agggtaaa|tttaccct"
         ,"[cgt]gggtaaa|tttaccc[acg]"
         ,"a[act]ggtaaa|tttacc[agt]t"
         ,"ag[act]gtaaa|tttac[agt]ct"
         ,"agg[act]taaa|ttta[agt]cct"
         ,"aggg[acg]aaa|ttt[cgt]ccct"
         ,"agggt[cgt]aa|tt[acg]accct"
         ,"agggta[cgt]a|t[acg]taccct"
         ,"agggtaa[cgt]|[acg]ttaccct"
      }; 

      int count;
      foreach (string v in variants){
         count = 0;
         r = new Regex(v, RegexOptions.Compiled);

         for (Match m = r.Match(sequence); m.Success; m = m.NextMatch()) count++;
         Console.WriteLine("{0} {1}", v, count);
      }


      // regex substitution
      IUB[] codes = {
          new IUB("tHa[Nt]", "<4>")
         ,new IUB("aND|caN|Ha[DS]|WaS", "<3>")
         ,new IUB("a[NSt]|BY", "<2>")
         ,new IUB("<[^>]*>", "|")
         ,new IUB("\\|[^|][^|]*\\|" , "-")
      }; 

      foreach (IUB iub in codes) {
         r = new Regex(iub.code, RegexOptions.Compiled);
         sequence = r.Replace(sequence,iub.alternatives);
      }
      Console.WriteLine("\n{0}\n{1}\n{2}", 
         initialLength, codeLength, sequence.Length);
   }


   struct IUB 
   {
      public string code;
      public string alternatives;

      public IUB(string code, string alternatives) {
         this.code = code;
         this.alternatives = alternatives;
      }
   }
}

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET SDK 8.0.204
Host Version: 8.0.4
Commit: 2d7eea2529
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>false
<OptimizationPreference>Speed
<IlcInstructionSet>native


 Fri, 26 Apr 2024 01:06:58 GMT

MAKE:
cp regexredux.csharpcore Program.cs
cp Include/csharpcore/program.csproj .
mkdir obj
cp Include/csharpcore/project.assets.json ./obj
/usr/bin/dotnet build -c Release --use-current-runtime  	
MSBuild version 17.9.8+b34f75857 for .NET
  Determining projects to restore...
  Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj (in 180 ms).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(10,7): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
  program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net8.0/linux-x64/program.dll

Build succeeded.

/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(10,7): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
    1 Warning(s)
    0 Error(s)

Time Elapsed 00:00:03.80

5.42s to complete and log all make actions

COMMAND LINE:
 ./bin/Release/net8.0/linux-x64/program 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361