The Computer Language
Benchmarks Game

regex-redux Python 3 program

source code

# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
#
# regex-dna program contributed by Dominique Wahli
# 2to3
# mp by Ahmad Syukri
# modified by Justin Peel
# converted from regex-dna program

from sys import stdin
from re import sub, findall
from multiprocessing import Pool

def init(arg):
    global seq
    seq = arg

def var_find(f):
    return len(findall(f, seq))

def main():
    seq = stdin.read()
    ilen = len(seq)

    seq = sub('>.*\n|\n', '', seq)
    clen = len(seq)

    pool = Pool(initializer = init, initargs = (seq,))

    variants = (
          'agggtaaa|tttaccct',
          '[cgt]gggtaaa|tttaccc[acg]',
          'a[act]ggtaaa|tttacc[agt]t',
          'ag[act]gtaaa|tttac[agt]ct',
          'agg[act]taaa|ttta[agt]cct',
          'aggg[acg]aaa|ttt[cgt]ccct',
          'agggt[cgt]aa|tt[acg]accct',
          'agggta[cgt]a|t[acg]taccct',
          'agggtaa[cgt]|[acg]ttaccct')
    for f in zip(variants, pool.imap(var_find, variants)):
        print(f[0], f[1])

    subst = {
          'tHa[Nt]' : '<4>', 'aND|caN|Ha[DS]|WaS' : '<3>', 'a[NSt]|BY' : '<2>',
          '<[^>]*>' : '|', '\\|[^|][^|]*\\|' : '-'}
    for f, r in list(subst.items()):
        seq = sub(f, r, seq)

    print()
    print(ilen)
    print(clen)
    print(len(seq))

if __name__=="__main__":
    main()
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Python 3.8.0


Tue, 15 Oct 2019 19:59:20 GMT

MAKE:
mv regexredux.python3 regexredux.py
pytype .
ninja: Entering directory `/home/dunham/benchmarksgame_quadcore/regexredux/tmp/.pytype'
[1/1] check regexredux
FAILED: /home/dunham/benchmarksgame_quadcore/regexredux/tmp/.pytype/pyi/regexredux.pyi 
pytype-single --imports_info /home/dunham/benchmarksgame_quadcore/regexredux/tmp/.pytype/imports/regexredux.imports --module-name regexredux -V 3.8 -o /home/dunham/benchmarksgame_quadcore/regexredux/tmp/.pytype/pyi/regexredux.pyi --analyze-annotated --nofail --quick /home/dunham/benchmarksgame_quadcore/regexredux/tmp/regexredux.py
Python versions > 3.7 are not yet supported.
ninja: build stopped: subcommand failed.
Computing dependencies
Analyzing 1 sources with 0 local dependencies
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:513: regexredux.python3_run] Error 1 (ignored)

5.11s to complete and log all make actions

COMMAND LINE:
/opt/src/Python-3.8.0/bin/python3 -OO regexredux.py 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361