source code
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by Jesse Millikan
// Base on the Ruby version by jose fco. gonzalez
// fixed by Matthew Wilson
// ported to Node.js and sped up by Roman Pletnev
// converted from regex-dna program
// fixed by Josh Goldfoot
// multi thread by Andrey Filatkin
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
const fs = require('fs');
if (isMainThread) {
mainThread();
} else {
workerThread(workerData);
}
async function mainThread() {
const regExps = [
/agggtaaa|tttaccct/ig,
/[cgt]gggtaaa|tttaccc[acg]/ig,
/a[act]ggtaaa|tttacc[agt]t/ig,
/ag[act]gtaaa|tttac[agt]ct/ig,
/agg[act]taaa|ttta[agt]cct/ig,
/aggg[acg]aaa|ttt[cgt]ccct/ig,
/agggt[cgt]aa|tt[acg]accct/ig,
/agggta[cgt]a|t[acg]taccct/ig,
/agggtaa[cgt]|[acg]ttaccct/ig
];
let data = fs.readFileSync('/dev/stdin', 'ascii');
const initialLen = data.length;
data = data.replace(/^>.*\n|\n/mg, '');
const cleanedLen = data.length;
const worker = replaceWork(data);
for (let j = 0; j < regExps.length; j++) {
const re = regExps[j];
const m = data.match(re);
console.log(re.source, m ? m.length : 0);
}
const endLen = await worker;
console.log(`\n${initialLen}\n${cleanedLen}\n${endLen}`);
function replaceWork(data) {
return new Promise(resolve => {
const worker = new Worker(__filename, {workerData: data});
worker.on('message', message => {
resolve(message.data);
});
});
}
}
function workerThread(str) {
const len = str
.replace(/tHa[Nt]/g, '<4>')
.replace(/aND|caN|Ha[DS]|WaS/g, '<3>')
.replace(/a[NSt]|BY/g, '<2>')
.replace(/<[^>]*>/g, '|')
.replace(/\|[^|][^|]*\|/g, '-')
.length;
parentPort.postMessage({data: len});
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
v14.2.0
Thu, 07 May 2020 02:24:10 GMT
MAKE:
cp -L regexredux.node-3.node regexredux.node-3.js
0.24s to complete and log all make actions
COMMAND LINE:
/opt/src/node-v14.2.0-linux-x64/bin/node regexredux.node-3.js 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361