source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
contributed by Engin Kayraklioglu
derived from the converted regex-dna Chapel version by Ben Harshbarger
which was derived from the GNU C++ RE2 version by Alexey Zolotov
*/
use IO, Regex;
proc main(args: [] string) {
var variants = [
b"agggtaaa|tttaccct",
b"[cgt]gggtaaa|tttaccc[acg]",
b"a[act]ggtaaa|tttacc[agt]t",
b"ag[act]gtaaa|tttac[agt]ct",
b"agg[act]taaa|ttta[agt]cct",
b"aggg[acg]aaa|ttt[cgt]ccct",
b"agggt[cgt]aa|tt[acg]accct",
b"agggta[cgt]a|t[acg]taccct",
b"agggtaa[cgt]|[acg]ttaccct"
];
var subst = [
(b"tHa[Nt]", b"<4>"), (b"aND|caN|Ha[DS]|WaS", b"<3>"),
(b"a[NSt]|BY", b"<2>"), (b"<[^>]*>", b"|"), (b"\\|[^|][^|]*\\|", b"-")
];
var data = stdin.readAll(bytes); // read in the entire file
const initLen = data.size;
// remove newlines
data = data.replace(new regex(b">.*\n|\n"), b"");
var copy = data; // make a copy so we can perform replacements in parallel
var results: [variants.domain] int;
sync {
// fire off a task to perform replacements
begin with (ref copy) {
for (f, r) in subst do
copy = copy.replace(new regex(f), r);
}
// count patterns
forall (pattern, result) in zip(variants, results) do
for m in (new regex(pattern)).matches(data) do
result += 1;
}
// print results
for (p,r) in zip(variants, results) do
writeln(p, " ", r);
writeln();
writeln(initLen);
writeln(data.size);
writeln(copy.size);
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
chpl version 2.2.0
built with LLVM version 18.1.3
Copyright 2020-2024
Hewlett Packard
Enterprise Development LP
Copyright 2004-2019 Cray Inc.
Sat, 05 Oct 2024 23:00:17 GMT
MAKE:
mv regexredux.chapel-3.chapel regexredux.chapel-3.chpl
/opt/src/chapel-2.2.0/bin/linux64-x86_64/chpl --fast regexredux.chapel-3.chpl -o regexredux.chapel-3.chapel_run
rm regexredux.chapel-3.chpl
20.16s to complete and log all make actions
COMMAND LINE:
./regexredux.chapel-3.chapel_run --n=0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361