The Computer Language
24.06 Benchmarks Game

regex-redux Dart jit #2 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   regex-dna program contributed by Jos Hirth, based on the JavaScript version
     which was created by Jesse Millikan, jose fco. gonzalez, and Matthew Wilson

   converted from regex-dna program
*/

import 'dart:io';
import 'dart:convert';

void main() {
  var text = StringBuffer();
  var src = stdin.transform(Utf8Decoder()).transform(LineSplitter());

  src.listen((line) {
    text.write(line);
    text.write('\n');
  }, onDone: () {
    regexAllTheThings(text.toString());
  });
}

void regexAllTheThings(String fullText) {
  var lengthA, lengthB, lengthC, regexp, replacements;

  regexp = (() {
    var pattern = [
      'agggtaaa|tttaccct',
      '[cgt]gggtaaa|tttaccc[acg]',
      'a[act]ggtaaa|tttacc[agt]t',
      'ag[act]gtaaa|tttac[agt]ct',
      'agg[act]taaa|ttta[agt]cct',
      'aggg[acg]aaa|ttt[cgt]ccct',
      'agggt[cgt]aa|tt[acg]accct',
      'agggta[cgt]a|t[acg]taccct',
      'agggtaa[cgt]|[acg]ttaccct'
    ];
    var regexp = [];
    for (var p in pattern) {
      regexp.add(RegExp(p, caseSensitive: false));
    }
    return regexp;
  }());

  replacements = [
    'tHa[Nt]',
    '<4>',
    'aND|caN|Ha[DS]|WaS',
    '<3>',
    'a[NSt]|BY',
    '<2>',
    '<[^>]*>',
    '|',
    '\\|[^|][^|]*\\|',
    '-'
  ];

  lengthA = fullText.length;

  fullText = fullText.replaceAll(RegExp('^>.*\n|\n', multiLine: true),
      ''); // still ridiculously slow with r21658

  lengthB = fullText.length;

  for (var i = 0; i < regexp.length; ++i) {
    print('${regexp[i].pattern} ${regexp[i].allMatches(fullText).length}');
  }

  for (var i = -1; i < replacements.length - 1;) {
    fullText =
        fullText.replaceAll(RegExp(replacements[++i]), replacements[++i]);
  }

  lengthC = fullText.length;

  print('\n$lengthA\n$lengthB\n$lengthC');
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Dart SDK version: 3.4.1 (stable)
Tue May 21 15:46:25 2024



 Tue, 28 May 2024 23:17:29 GMT

MAKE:
/opt/src/dart-sdk/bin/dart analyze 
Analyzing tmp...
No issues found!

1.76s to complete and log all make actions

COMMAND LINE:
 /opt/src/dart-sdk/bin/dart run  regexredux.dartjit-2.dartjit 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361