source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
regex-dna program contributed by Jos Hirth, based on the JavaScript version
which was created by Jesse Millikan, jose fco. gonzalez, and Matthew Wilson
converted from regex-dna program
*/
import 'dart:io';
import 'dart:convert';
void main() {
var text = StringBuffer();
var src = stdin.transform(Utf8Decoder()).transform(LineSplitter());
src.listen((line) {
if (line != null) {
text.write(line);
text.write('\n');
}
},
onDone: () {
regexAllTheThings(text.toString());
});
}
void regexAllTheThings (String fullText) {
var lengthA, lengthB, lengthC, regexp, replacements;
regexp = ((){
var pattern = [
'agggtaaa|tttaccct',
'[cgt]gggtaaa|tttaccc[acg]',
'a[act]ggtaaa|tttacc[agt]t',
'ag[act]gtaaa|tttac[agt]ct',
'agg[act]taaa|ttta[agt]cct',
'aggg[acg]aaa|ttt[cgt]ccct',
'agggt[cgt]aa|tt[acg]accct',
'agggta[cgt]a|t[acg]taccct',
'agggtaa[cgt]|[acg]ttaccct'
];
var regexp = [];
for(var p in pattern) {
regexp.add(RegExp(p, caseSensitive: false));
}
return regexp;
}());
replacements = [
'tHa[Nt]', '<4>',
'aND|caN|Ha[DS]|WaS', '<3>',
'a[NSt]|BY', '<2>',
'<[^>]*>', '|',
'\\|[^|][^|]*\\|', '-'
];
lengthA = fullText.length;
fullText = fullText.replaceAll(RegExp('^>.*\n|\n', multiLine: true), ''); // still ridiculously slow with r21658
lengthB = fullText.length;
for(var i = 0; i < regexp.length; ++i) {
print('${regexp[i].pattern} ${regexp[i].allMatches(fullText).length}');
}
for(var i = -1; i < replacements.length - 1;) {
fullText = fullText.replaceAll(RegExp(replacements[++i]), replacements[++i]);
}
lengthC = fullText.length;
print('\n$lengthA\n$lengthB\n$lengthC');
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
Dart VM version: 2.8.1 (stable) (Unknown timestamp) on "linux_x64"
--snapshot-kind=app-jit
Thu, 07 May 2020 03:45:47 GMT
MAKE:
/usr/bin/dartanalyzer regexredux.dartsnapshot-2.dartsnapshot
make: /usr/bin/dartanalyzer: Command not found
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:448: regexredux.dartsnapshot-2.dartsnapshot_run] Error 127 (ignored)
/usr/bin/dart --snapshot=regexredux.dartsnapshot-2.snapshot --snapshot-kind=app-jit regexredux.dartsnapshot-2.dartsnapshot 0 < ../regexredux-input50000.txt
agggtaaa|tttaccct 3
[cgt]gggtaaa|tttaccc[acg] 12
a[act]ggtaaa|tttacc[agt]t 43
ag[act]gtaaa|tttac[agt]ct 27
agg[act]taaa|ttta[agt]cct 58
aggg[acg]aaa|ttt[cgt]ccct 16
agggt[cgt]aa|tt[acg]accct 15
agggta[cgt]a|t[acg]taccct 18
agggtaa[cgt]|[acg]ttaccct 20
508411
500000
273927
4.26s to complete and log all make actions
COMMAND LINE:
/usr/bin/dart regexredux.dartsnapshot-2.snapshot 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361