The Computer Language
24.04 Benchmarks Game

regex-redux F# .NET #2 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by Valentin Kraevskiy
// converted from regex-dna program

open System.Text.RegularExpressions

let regex s = Regex (s, RegexOptions.Compiled)
let input = stdin.ReadToEnd ()
let text = (regex ">.*\n|\n").Replace (input, "")

["agggtaaa|tttaccct"
 "[cgt]gggtaaa|tttaccc[acg]"
 "a[act]ggtaaa|tttacc[agt]t"
 "ag[act]gtaaa|tttac[agt]ct"
 "agg[act]taaa|ttta[agt]cct"
 "aggg[acg]aaa|ttt[cgt]ccct"
 "agggt[cgt]aa|tt[acg]accct"
 "agggta[cgt]a|t[acg]taccct"
 "agggtaa[cgt]|[acg]ttaccct"]
|> List.iter (fun s ->
         printf "%s %i\n" s ((regex s).Matches text).Count)

let newText =
    ["tHa[Nt]", "<4>"
     "aND|caN|Ha[DS]|WaS", "<3>"
     "a[NSt]|BY", "<2>"
     "<[^>]*>", "|"
     "\\|[^|][^|]*\\|" , "-"]
     |> List.fold (fun s (code, alt) -> 
            (regex code).Replace (s, alt)) text

printf "\n%i\n%i\n%i\n" input.Length text.Length newText.Length
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET SDK 8.0.200
Host Version: 8.0.2
Commit: 1381d5ebd2
<ServerGarbageCollection>true
F# 8.0



 Mon, 01 Apr 2024 23:26:20 GMT

MAKE:
cp regexredux.fsharpcore-2.fsharpcore Program.fs
cp Include/fsharpcore/tmp.fsproj .
mkdir obj
cp Include/fsharpcore/project.assets.json ./obj
/usr/bin/dotnet build -c Release --no-self-contained -r linux-x64 	
MSBuild version 17.9.6+a4ecab324 for .NET
  Determining projects to restore...
  Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/tmp.fsproj (in 753 ms).
  tmp -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net8.0/linux-x64/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:08.25

10.35s to complete and log all make actions

COMMAND LINE:
 ./bin/Release/net8.0/linux-x64/tmp 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361