regex-redux F# .NET Core #7 program
source code
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by David Grenier
// converted from regex-dna program
// parallelization by Peter Kese
// Use PCRE.NET by Anthony Lloyd
open PCRE
let input = stdin.ReadToEnd()
let inline regex s = PcreRegex(s,PcreOptions.Compiled|||PcreOptions.NoUtfCheck)
let text = (regex ">.*\n|\n").Replace(input, "")
let settings = PcreMatchSettings(AdditionalOptions=PcreMatchOptions.NoUtfCheck)
let inline regexCount pattern = async {
let c = (regex pattern).Matches(text,settings) |> Seq.length
return pattern + " " + string c
}
let replaceTask = async {
let l =
[
"tHa[Nt]", "<4>"
"aND|caN|Ha[DS]|WaS", "<3>"
"a[NSt]|BY", "<2>"
"<[^>]*>", "|"
"\\|[^|][^|]*\\|", "-"
]
|> List.fold (fun s (pattern, replace) ->
(regex pattern).Replace (s, replace)) text
|> String.length |> string
return "\n" + string input.Length + "\n" + string text.Length + "\n" + l
}
let results =
[
replaceTask
regexCount "[cgt]gggtaaa|tttaccc[acg]"
regexCount "a[act]ggtaaa|tttacc[agt]t"
regexCount "agggt[cgt]aa|tt[acg]accct"
regexCount "aggg[acg]aaa|ttt[cgt]ccct"
regexCount "ag[act]gtaaa|tttac[agt]ct"
regexCount "agg[act]taaa|ttta[agt]cct"
regexCount "agggtaaa|tttaccct"
regexCount "agggtaa[cgt]|[acg]ttaccct"
regexCount "agggta[cgt]a|t[acg]taccct"
]
|> Async.Parallel
|> Async.RunSynchronously
stdout.WriteLine results.[7]
stdout.WriteLine results.[1]
stdout.WriteLine results.[2]
stdout.WriteLine results.[5]
stdout.WriteLine results.[6]
stdout.WriteLine results.[4]
stdout.WriteLine results.[3]
stdout.WriteLine results.[9]
stdout.WriteLine results.[8]
stdout.WriteLine results.[0]
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
.NET Core SDK 3.1.201
Host Version: 3.1.3; Commit: 4a9f85e9f8
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
Thu, 07 May 2020 20:03:10 GMT
MAKE:
cp regexredux.fsharpcore-7.fsharpcore Program.fs
cp Include/fsharpcore/tmp.fsproj .
mkdir obj
cp Include/fsharpcore/project.assets.json ./obj
/usr/bin/dotnet build -c Release --no-restore
Microsoft (R) Build Engine version 16.5.0+d4cbfca49 for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.
tmp -> /home/dunham/benchmarksgame_quadcore/regexredux/tmp/bin/Release/netcoreapp3.1/tmp.dll
Build succeeded.
0 Warning(s)
0 Error(s)
Time Elapsed 00:00:11.31
14.08s to complete and log all make actions
COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp3.1/tmp.dll 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361