source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
regex-dna program contributed by Jesse Millikan
Base on the Ruby version by jose fco. gonzalez
fixed by Matthew Wilson
ported to Node.js and sped up by Roman Pletnev
converted from regex-dna program
fixed by Josh Goldfoot
multi thread by Andrey Filatkin
sequential by Isaac Gouy
*/
const fs = require('fs');
function mainThread() {
const regExps = [
/agggtaaa|tttaccct/ig,
/[cgt]gggtaaa|tttaccc[acg]/ig,
/a[act]ggtaaa|tttacc[agt]t/ig,
/ag[act]gtaaa|tttac[agt]ct/ig,
/agg[act]taaa|ttta[agt]cct/ig,
/aggg[acg]aaa|ttt[cgt]ccct/ig,
/agggt[cgt]aa|tt[acg]accct/ig,
/agggta[cgt]a|t[acg]taccct/ig,
/agggtaa[cgt]|[acg]ttaccct/ig
];
let data = fs.readFileSync('/dev/stdin', 'ascii');
const initialLen = data.length;
data = data.replace(/^>.*\n|\n/mg, '');
const cleanedLen = data.length;
for (let j = 0; j < regExps.length; j++) {
const re = regExps[j];
const m = data.match(re);
console.log(re.source, m ? m.length : 0);
}
const endLen = data
.replace(/tHa[Nt]/g, '<4>')
.replace(/aND|caN|Ha[DS]|WaS/g, '<3>')
.replace(/a[NSt]|BY/g, '<2>')
.replace(/<[^>]*>/g, '|')
.replace(/\|[^|][^|]*\|/g, '-')
.length;
console.log(`\n${initialLen}\n${cleanedLen}\n${endLen}`);
}
mainThread();
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
Version 3.9.2
node.js v14.2.0
Wed, 13 May 2020 17:34:37 GMT
MAKE:
mv regexredux.typescript-4.typescript regexredux.typescript-4.ts
/opt/src/node-v14.2.0-linux-x64/bin/tsc --strict --noEmitOnError --removeComments --lib es7 regexredux.typescript-4.ts
3.89s to complete and log all make actions
COMMAND LINE:
/opt/src/node-v14.2.0-linux-x64/bin/node --use_strict regexredux.typescript-4.js 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361