The Computer Language
22.05 Benchmarks Game

regex-redux F# .NET #6 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by David Grenier
// converted from regex-dna program
// parallelization by Peter Kese
// order variants by execution time by Anthony Lloyd

open System.Text.RegularExpressions

let inline regex s = Regex(s, RegexOptions.Compiled)
let input = stdin.ReadToEnd()
let text = (regex ">.*\n|\n").Replace (input, "")

let inline regexCount pattern = async {
    let rec loop c (m:Match) =
        if not m.Success then c
        else loop (c+1) (m.NextMatch())
    let c = loop 0 ((regex pattern).Match text)
    return pattern + " " + string c
}

let replaceTask = async {
    let l =
        [
            "tHa[Nt]", "<4>"
            "aND|caN|Ha[DS]|WaS", "<3>"
            "a[NSt]|BY", "<2>"
            "<[^>]*>", "|"
            "\\|[^|][^|]*\\|", "-"
        ]
        |> List.fold (fun s (pattern, replace) ->
            (regex pattern).Replace (s, replace)) text
        |> String.length |> string
    return "\n" + string input.Length + "\n" + string text.Length + "\n" + l
}

let results =
    [
        replaceTask
        regexCount "[cgt]gggtaaa|tttaccc[acg]"
        regexCount "a[act]ggtaaa|tttacc[agt]t"
        regexCount "agggt[cgt]aa|tt[acg]accct"
        regexCount "aggg[acg]aaa|ttt[cgt]ccct"
        regexCount "ag[act]gtaaa|tttac[agt]ct"
        regexCount "agg[act]taaa|ttta[agt]cct"
        regexCount "agggtaaa|tttaccct"
        regexCount "agggtaa[cgt]|[acg]ttaccct"
        regexCount "agggta[cgt]a|t[acg]taccct"
    ]
    |> Async.Parallel
    |> Async.RunSynchronously

stdout.WriteLine results.[7]
stdout.WriteLine results.[1]
stdout.WriteLine results.[2]
stdout.WriteLine results.[5]
stdout.WriteLine results.[6]
stdout.WriteLine results.[4]
stdout.WriteLine results.[3]
stdout.WriteLine results.[9]
stdout.WriteLine results.[8]
stdout.WriteLine results.[0]
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET SDK 6.0.101
Host Version: 6.0.1;
Commit: 3a25a7f1cc
<ServerGarbageCollection>true


Tue, 10 May 2022 03:50:27 GMT

MAKE:
cp regexredux.fsharpcore-6.fsharpcore Program.fs
cp Include/fsharpcore/tmp.fsproj .
mkdir obj
cp Include/fsharpcore/project.assets.json ./obj
/usr/bin/dotnet build -c Release --no-restore --no-self-contained -r ubuntu-x64 
Microsoft (R) Build Engine version 17.1.1+a02f73656 for .NET
Copyright (C) Microsoft Corporation. All rights reserved.

  tmp -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net6.0/ubuntu-x64/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:07.54

9.05s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/net6.0/ubuntu-x64/tmp.dll 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361