source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
Regex-Redux by Josh Goldfoot
order variants by execution time by Anthony Lloyd
*/
using System;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
public static class regexredux
{
static Regex regex(string re)
{
// Not compiled on .Net Core, hence poor benchmark results.
return new Regex(re, RegexOptions.Compiled);
}
static string regexCount(string s, string r)
{
int c = 0;
var m = regex(r).Match(s);
while(m.Success) { c++; m = m.NextMatch(); }
return r + " " + c;
}
public static void Main(string[] args)
{
var sequences = Console.In.ReadToEnd();
var initialLength = sequences.Length;
sequences = Regex.Replace(sequences, ">.*\n|\n", "");
var magicTask = Task.Run(() =>
{
var newseq = regex("tHa[Nt]").Replace(sequences, "<4>");
newseq = regex("aND|caN|Ha[DS]|WaS").Replace(newseq, "<3>");
newseq = regex("a[NSt]|BY").Replace(newseq, "<2>");
newseq = regex("<[^>]*>").Replace(newseq, "|");
newseq = regex("\\|[^|][^|]*\\|").Replace(newseq, "-");
return newseq.Length;
});
var variant2 = Task.Run(() => regexCount(sequences, "[cgt]gggtaaa|tttaccc[acg]"));
var variant3 = Task.Run(() => regexCount(sequences, "a[act]ggtaaa|tttacc[agt]t"));
var variant7 = Task.Run(() => regexCount(sequences, "agggt[cgt]aa|tt[acg]accct"));
var variant6 = Task.Run(() => regexCount(sequences, "aggg[acg]aaa|ttt[cgt]ccct"));
var variant4 = Task.Run(() => regexCount(sequences, "ag[act]gtaaa|tttac[agt]ct"));
var variant5 = Task.Run(() => regexCount(sequences, "agg[act]taaa|ttta[agt]cct"));
var variant1 = Task.Run(() => regexCount(sequences, "agggtaaa|tttaccct"));
var variant9 = Task.Run(() => regexCount(sequences, "agggtaa[cgt]|[acg]ttaccct"));
var variant8 = Task.Run(() => regexCount(sequences, "agggta[cgt]a|t[acg]taccct"));
Console.Out.WriteLineAsync(variant1.Result);
Console.Out.WriteLineAsync(variant2.Result);
Console.Out.WriteLineAsync(variant3.Result);
Console.Out.WriteLineAsync(variant4.Result);
Console.Out.WriteLineAsync(variant5.Result);
Console.Out.WriteLineAsync(variant6.Result);
Console.Out.WriteLineAsync(variant7.Result);
Console.Out.WriteLineAsync(variant8.Result);
Console.Out.WriteLineAsync(variant9.Result);
Console.Out.WriteLineAsync("\n"+initialLength+"\n"+sequences.Length);
Console.Out.WriteLineAsync(magicTask.Result.ToString());
}
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
.NET Core SDK 3.1.201
Host Version: 3.1.3; Commit: 4a9f85e9f8
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
Thu, 07 May 2020 21:05:03 GMT
MAKE:
cp regexredux.csharpcore-5.csharpcore Program.cs
cp Include/csharpcore/tmp.csproj .
mkdir obj
cp Include/csharpcore/project.assets.json ./obj
/usr/bin/dotnet build -c Release --no-restore
Microsoft (R) Build Engine version 16.5.0+d4cbfca49 for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
tmp -> /home/dunham/benchmarksgame_quadcore/regexredux/tmp/bin/Release/netcoreapp3.1/tmp.dll
Build succeeded.
0 Warning(s)
0 Error(s)
Time Elapsed 00:00:04.51
6.57s to complete and log all make actions
COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp3.1/tmp.dll 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361