source code
# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
#
# contributed by Daniel Jones
# fixed by David Campbell
# modified by Jarret Revels, Alex Arslan, Yichao Yu
using Printf
const variants = [
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct"
]
const subs = [
(r"tHa[Nt]", "<4>"),
(r"aND|caN|Ha[DS]|WaS", "<3>"),
(r"a[NSt]|BY", "<2>"),
(r"<[^>]*>", "|"),
(r"\|[^|][^|]*\|", "-")
]
function perf_regex_dna()
seq = read(stdin, String)
l1 = length(seq)
seq = replace(seq, r">.*\n|\n" => "")
l2 = length(seq)
for v in variants
k = 0
for m in eachmatch(Regex(v), seq)
k += 1
end
@printf("%s %d\n", v, k)
end
for (u, v) in subs
seq = replace(seq, u => v)
end
println()
println(l1)
println(l2)
println(length(seq))
end
perf_regex_dna()
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
julia version 1.4.0
Tue, 05 May 2020 17:19:02 GMT
MAKE:
printenv JULIA_NUM_THREADS
4
printenv JULIA_LLVM_ARGS
0.08s to complete and log all make actions
COMMAND LINE:
/opt/src/julia-1.4.0/bin/julia -O3 --cpu-target=core2 -- regexredux.julia 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361