source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
regex-dna program contributed by Jesse Millikan
Base on the Ruby version by jose fco. gonzalez
fixed by Matthew Wilson
ported to Node.js and sped up by Roman Pletnev
converted from regex-dna program
fixed by Josh Goldfoot
multi thread by Andrey Filatkin
sequential by Isaac Gouy
*/
const fs = require('fs');
function mainThread() {
const regExps = [
/agggtaaa|tttaccct/ig,
/[cgt]gggtaaa|tttaccc[acg]/ig,
/a[act]ggtaaa|tttacc[agt]t/ig,
/ag[act]gtaaa|tttac[agt]ct/ig,
/agg[act]taaa|ttta[agt]cct/ig,
/aggg[acg]aaa|ttt[cgt]ccct/ig,
/agggt[cgt]aa|tt[acg]accct/ig,
/agggta[cgt]a|t[acg]taccct/ig,
/agggtaa[cgt]|[acg]ttaccct/ig
];
let data = fs.readFileSync('/dev/stdin', 'ascii');
const initialLen = data.length;
data = data.replace(/^>.*\n|\n/mg, '');
const cleanedLen = data.length;
for (let j = 0; j < regExps.length; j++) {
const re = regExps[j];
const m = data.match(re);
console.log(re.source, m ? m.length : 0);
}
const endLen = data
.replace(/tHa[Nt]/g, '<4>')
.replace(/aND|caN|Ha[DS]|WaS/g, '<3>')
.replace(/a[NSt]|BY/g, '<2>')
.replace(/<[^>]*>/g, '|')
.replace(/\|[^|][^|]*\|/g, '-')
.length;
console.log(`\n${initialLen}\n${cleanedLen}\n${endLen}`);
}
mainThread();
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
v22.8.0
Wed, 04 Sep 2024 03:20:10 GMT
MAKE:
cp -L regexredux.node-4.node regexredux.js
0.14s to complete and log all make actions
COMMAND LINE:
/opt/src/node-v22.8.0/bin/node regexredux.js 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361