regex-redux F# .NET #4 program
source code
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by David Grenier
// converted from regex-dna program
open System.Text.RegularExpressions
open System.Threading
let inline flip f a b = f b a
let inline curry f a b = f(a, b)
let regex s = Regex (s, RegexOptions.Compiled)
let replace rx = ((regex rx).Replace : string * string -> string) |> curry |> flip
let matchCount rx = (regex rx).Matches >> fun x -> x.Count
let input = stdin.ReadToEnd ()
let text = input |> replace ">.*\n" "" |> replace "\n" ""
module Array =
module Parallel =
let loop (count: int) f =
let count = ref count
let rec loop f =
async {
match Interlocked.Decrement count with
| i when i < 0 -> ()
| i -> f i; Async.StartImmediate (loop f)
}
Array.init System.Environment.ProcessorCount (fun _ -> loop f)
|> Async.Parallel
|> Async.RunSynchronously
|> ignore
let init len f =
let result = Array.zeroCreate len
loop len (fun i -> result.[i] <- f i)
result
let map f arr = init (Array.length arr) (fun i -> f arr.[i])
[|
"agggtaaa|tttaccct"
"[cgt]gggtaaa|tttaccc[acg]"
"a[act]ggtaaa|tttacc[agt]t"
"ag[act]gtaaa|tttac[agt]ct"
"agg[act]taaa|ttta[agt]cct"
"aggg[acg]aaa|ttt[cgt]ccct"
"agggt[cgt]aa|tt[acg]accct"
"agggta[cgt]a|t[acg]taccct"
"agggtaa[cgt]|[acg]ttaccct"
|]
|> Array.Parallel.map (fun s -> sprintf "%s %d" s (matchCount s text))
|> Array.iter (printfn "%s")
let chunkedMap windowCount f arr =
let len = Array.length arr
let size = len / (windowCount - 1)
Array.Parallel.init windowCount (fun i ->
if i + 1 = windowCount then len % (windowCount - 1) else size
|> Array.sub arr (i * size)
|> f
)
let applyPatterns =
[
replace "tHa[Nt]" "<4>"
replace "aND|caN|Ha[DS]|WaS" "<3>"
replace "a[NSt]|BY" "<2>"
replace "<[^>]*>" "|"
replace "\\|[^|][^|]*\\|" "-"
]
|> List.reduce (>>)
text.ToCharArray()
|> chunkedMap 16 (fun cs ->
System.String cs
|> applyPatterns
|> String.length
)
|> Array.sum
|> printfn "\n%i\n%i\n%i" input.Length text.Length
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
.NET SDK 8.0.301
Host Version: 8.0.6
Commit: 3b8b000a0e
<ServerGarbageCollection>true
F# 8.0
<OutputType>Exe
<TargetFramework>net8.0
<ImplicitUsings>enable
<Nullable>enable
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>false
Tue, 04 Jun 2024 05:09:31 GMT
MAKE:
cp regexredux.fsharpcore-4.fsharpcore Program.fs
cp Include/fsharpcore/program.fsproj .
mkdir obj
cp Include/fsharpcore/project.assets.json ./obj
~/dotnet/dotnet build -c Release --use-current-runtime
Determining projects to restore...
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.fsproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.fsproj (in 6.05 sec).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.fsproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/bin/Release/net8.0/linux-x64/program.dll
Build succeeded.
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.fsproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/regexredux/tmp/program.fsproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
2 Warning(s)
0 Error(s)
Time Elapsed 00:00:14.17
16.55s to complete and log all make actions
COMMAND LINE:
./bin/Release/net8.0/linux-x64/program 0 < regexredux-input50000.txt
UNEXPECTED OUTPUT
13c13
< 273969
---
> 273927
PROGRAM OUTPUT:
agggtaaa|tttaccct 3
[cgt]gggtaaa|tttaccc[acg] 12
a[act]ggtaaa|tttacc[agt]t 43
ag[act]gtaaa|tttac[agt]ct 27
agg[act]taaa|ttta[agt]cct 58
aggg[acg]aaa|ttt[cgt]ccct 16
agggt[cgt]aa|tt[acg]accct 15
agggta[cgt]a|t[acg]taccct 18
agggtaa[cgt]|[acg]ttaccct 20
508411
500000
273969