source code
# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
#
# contributed by A. Sinan Unur
use strict;
my @variants = qw/
agggtaaa|tttaccct
[cgt]gggtaaa|tttaccc[acg]
a[act]ggtaaa|tttacc[agt]t
ag[act]gtaaa|tttac[agt]ct
agg[act]taaa|ttta[agt]cct
aggg[acg]aaa|ttt[cgt]ccct
agggt[cgt]aa|tt[acg]accct
agggta[cgt]a|t[acg]taccct
agggtaa[cgt]|[acg]ttaccct
/;
my @variants_re = map qr/$_/xiaa, @variants;
my @iub = map { my $x = $_; sub { $_[0] =~ s/$x->[0]/$x->[1]/g }} (
[ qr{ tHa [Nt] }x, '<4>' ],
[ qr{ aND | caN | Ha[DS] | WaS }x, '<3>' ],
[ qr{ a [NSt] | BY }x, '<2>' ],
[ qr{ < [^>]* > }x, '|' ],
[ qr{ \| [^|] [^|]* \| }x, '-' ],
);
my $seq = do { local $/; <STDIN> };
my $input_length = length( $seq );
$seq =~ s/>.*\n|\n//g;
my $cleaned_length = length( $seq );
my @results = map scalar( () = $seq =~ /$_/g ), @variants;
$_->($seq) for @iub;
# report
print "$variants[$_] $results[$_]\n" for 0 .. $#variants;
print "$_\n" for '', $input_length, $cleaned_length, length( $seq );
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
This is perl 5, version 30, subversion 0 (v5.30.0)
built for x86_64-linux-thread-multi
Sun, 10 May 2020 22:51:56 GMT
COMMAND LINE:
/opt/src/perl-5.30.0/bin/perl regexredux.perl 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361