source code
/* The Computer Language Benchmarks Game
* contributed by Josh Goldfoot
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
class regexredux
static void Main(string[] args)
// read FASTA sequence
String sequence = Console.In.ReadToEnd();
int initialLength = sequence.Length;
// remove FASTA sequence descriptions and new-lines
sequence = Regex.Replace(sequence, ">.*\n|\n", "");
int codeLength = sequence.Length;
Task<int> substitution = Task.Run(() => {
// regex substitution
string newseq = Regex.Replace(sequence, "tHa[Nt]", "<4>");
newseq = Regex.Replace(newseq, "aND|caN|Ha[DS]|WaS", "<3>");
newseq = Regex.Replace(newseq, "a[NSt]|BY", "<2>");
newseq = Regex.Replace(newseq, "<[^>]*>", "|");
newseq = Regex.Replace(newseq, "\\|[^|][^|]*\\|" , "-");
return newseq.Length;
// divide large sequence into chunks (one per core) and search each in parallel
int[][] sums = Chunks(sequence).AsParallel().Select(CountRegexes).ToArray();
var variants = Variants.variantsCopy();
for (int i = 0; i < 9; i++)
Console.WriteLine("{0} {1}", variants[i], sums.Sum(a => a[i]));
initialLength, codeLength, substitution.Result);
private static IEnumerable<string> Chunks(string sequence)
int numChunks = Environment.ProcessorCount;
int start = 0;
int chunkSize = sequence.Length / numChunks;
while (--numChunks >= 0)
if (numChunks > 0)
yield return sequence.Substring(start, chunkSize);
yield return sequence.Substring(start);
start += chunkSize;
private static int[] CountRegexes(string chunk)
// regex match
int[] counts = new int[9];
string[] variants = Variants.variantsCopy();
for (int i = 0; i < 9; i++)
for (var m = Regex.Match(chunk, variants[i]); m.Success; m = m.NextMatch()) counts[i]++;
return counts;
public class Variants
public static string[] variantsCopy()
return new string[] {
notes, command-line, and program output
64-bit Ubuntu quad core
.NET SDK 9.0.100
Host Version: 9.0.0
Commit: 9d5a6a9aa4
Thu, 06 Feb 2025 03:30:31 GMT
cp regexredux.csharpaot-4.csharpaot Program.cs
cp Include/csharpaot/program.csproj .
/opt/src/dotnet-sdk-9.0.100/dotnet publish -r linux-x64 -c Release
Determining projects to restore...
Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj (in 903 ms).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(13,7): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net9.0/linux-x64/program.dll
Generating native code
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net9.0/linux-x64/publish/
26.94s to complete and log all make actions
./bin/Release/net9.0/linux-x64/native/program 0 < regexredux-input5000000.txt
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178