The Q6600
Benchmarks Game

regex-redux Dart #2 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   regex-dna program contributed by Jos Hirth, based on the JavaScript version
     which was created by Jesse Millikan, jose fco. gonzalez, and Matthew Wilson

   converted from regex-dna program
*/

import 'dart:io';
import 'dart:convert';

void main() {
  var text = StringBuffer();
  var src = stdin.transform(Utf8Decoder()).transform(LineSplitter());

  src.listen((line) {
    if (line != null) {
      text.write(line);
      text.write('\n');
    }
  },
  onDone: () {
    regexAllTheThings(text.toString());
  });
}

void regexAllTheThings (String fullText) {
  var lengthA, lengthB, lengthC, regexp, replacements;

  regexp = ((){
    var pattern = [
      'agggtaaa|tttaccct',
      '[cgt]gggtaaa|tttaccc[acg]',
      'a[act]ggtaaa|tttacc[agt]t',
      'ag[act]gtaaa|tttac[agt]ct',
      'agg[act]taaa|ttta[agt]cct',
      'aggg[acg]aaa|ttt[cgt]ccct',
      'agggt[cgt]aa|tt[acg]accct',
      'agggta[cgt]a|t[acg]taccct',
      'agggtaa[cgt]|[acg]ttaccct'
    ];
    var regexp = [];
    for(var p in pattern) {
      regexp.add(RegExp(p, caseSensitive: false));
    }
    return regexp;
  }());

  replacements = [
    'tHa[Nt]', '<4>',
    'aND|caN|Ha[DS]|WaS', '<3>',
    'a[NSt]|BY', '<2>',
    '<[^>]*>', '|',
    '\\|[^|][^|]*\\|', '-'
  ];

  lengthA = fullText.length;

  fullText = fullText.replaceAll(RegExp('^>.*\n|\n', multiLine: true), ''); // still ridiculously slow with r21658

  lengthB = fullText.length;

  for(var i = 0; i < regexp.length; ++i) {
    print('${regexp[i].pattern} ${regexp[i].allMatches(fullText).length}');
  }

  for(var i = -1; i < replacements.length - 1;) {
    fullText = fullText.replaceAll(RegExp(replacements[++i]), replacements[++i]);
  }

  lengthC = fullText.length;

  print('\n$lengthA\n$lengthB\n$lengthC');
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Dart VM version: 2.8.1 (stable) (Unknown timestamp) on "linux_x64"


Wed, 06 May 2020 20:09:22 GMT

MAKE:
/usr/bin/dartanalyzer regexredux.dart-2.dart
make: /usr/bin/dartanalyzer: Command not found
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:445: regexredux.dart-2.dart_run] Error 127 (ignored)

0.09s to complete and log all make actions

COMMAND LINE:
/usr/bin/dart  regexredux.dart-2.dart 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361