source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
Regex-Redux by Josh Goldfoot
order variants by execution time by Anthony Lloyd
*/
using System;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
public static class regexredux
{
static Regex regex(string re)
{
// Not compiled on .Net Core, hence poor benchmark results.
return new Regex(re, RegexOptions.Compiled);
}
static string regexCount(string s, string r)
{
int c = 0;
var m = regex(r).Match(s);
while(m.Success) { c++; m = m.NextMatch(); }
return r + " " + c;
}
public static void Main(string[] args)
{
var sequences = Console.In.ReadToEnd();
var initialLength = sequences.Length;
sequences = Regex.Replace(sequences, ">.*\n|\n", "");
var magicTask = Task.Run(() =>
{
var newseq = regex("tHa[Nt]").Replace(sequences, "<4>");
newseq = regex("aND|caN|Ha[DS]|WaS").Replace(newseq, "<3>");
newseq = regex("a[NSt]|BY").Replace(newseq, "<2>");
newseq = regex("<[^>]*>").Replace(newseq, "|");
newseq = regex("\\|[^|][^|]*\\|").Replace(newseq, "-");
return newseq.Length;
});
var variant2 = Task.Run(() => regexCount(sequences, "[cgt]gggtaaa|tttaccc[acg]"));
var variant3 = Task.Run(() => regexCount(sequences, "a[act]ggtaaa|tttacc[agt]t"));
var variant7 = Task.Run(() => regexCount(sequences, "agggt[cgt]aa|tt[acg]accct"));
var variant6 = Task.Run(() => regexCount(sequences, "aggg[acg]aaa|ttt[cgt]ccct"));
var variant4 = Task.Run(() => regexCount(sequences, "ag[act]gtaaa|tttac[agt]ct"));
var variant5 = Task.Run(() => regexCount(sequences, "agg[act]taaa|ttta[agt]cct"));
var variant1 = Task.Run(() => regexCount(sequences, "agggtaaa|tttaccct"));
var variant9 = Task.Run(() => regexCount(sequences, "agggtaa[cgt]|[acg]ttaccct"));
var variant8 = Task.Run(() => regexCount(sequences, "agggta[cgt]a|t[acg]taccct"));
Console.Out.WriteLineAsync(variant1.Result);
Console.Out.WriteLineAsync(variant2.Result);
Console.Out.WriteLineAsync(variant3.Result);
Console.Out.WriteLineAsync(variant4.Result);
Console.Out.WriteLineAsync(variant5.Result);
Console.Out.WriteLineAsync(variant6.Result);
Console.Out.WriteLineAsync(variant7.Result);
Console.Out.WriteLineAsync(variant8.Result);
Console.Out.WriteLineAsync(variant9.Result);
Console.Out.WriteLineAsync("\n"+initialLength+"\n"+sequences.Length);
Console.Out.WriteLineAsync(magicTask.Result.ToString());
}
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
.NET SDK 9.0.100
Host Version: 9.0.0
Commit: 9d5a6a9aa4
<OutputType>Exe
<TargetFramework>net9.0
<ImplicitUsings>enable
<Nullable>enable
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>true
<OptimizationPreference>Speed
<IlcInstructionSet>native
Thu, 14 Nov 2024 01:18:29 GMT
MAKE:
cp regexredux.csharpaot-5.csharpaot Program.cs
cp Include/csharpaot/program.csproj .
mkdir obj
cp Include/csharpaot/project.assets.json ./obj
/opt/src/dotnet-sdk-9.0.100/dotnet publish
Determining projects to restore...
Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj (in 995 ms).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/Program.cs(12,21): warning CS8981: The type name 'regexredux' only contains lower-cased ascii characters. Such names may become reserved for the language. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.csproj]
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net9.0/linux-x64/program.dll
Generating native code
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net9.0/linux-x64/publish/
26.55s to complete and log all make actions
COMMAND LINE:
./bin/Release/net9.0/linux-x64/native/program 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361