The Computer Language
24.09 Benchmarks Game

regex-redux PHP program

source code

<?php
/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   regex-dna program contributed by Danny Sauer
   modified by Josh Goldfoot
   modified by Sergey Khripunov
   modified by Craig Russell
   converted from regex-dna program
*/

$tok = ftok(__FILE__, chr(time() & 255));
$queue = msg_get_queue($tok);

$variants = array(
    'agggtaaa|tttaccct',
    '[cgt]gggtaaa|tttaccc[acg]',
    'a[act]ggtaaa|tttacc[agt]t',
    'ag[act]gtaaa|tttac[agt]ct',
    'agg[act]taaa|ttta[agt]cct',
    'aggg[acg]aaa|ttt[cgt]ccct',
    'agggt[cgt]aa|tt[acg]accct',
    'agggta[cgt]a|t[acg]taccct',
    'agggtaa[cgt]|[acg]ttaccct',
);

// IUB replacement parallel arrays
$IUB = array();                 $IUBnew = array();
$IUB[]='/tHa[Nt]/S';            $IUBnew[]='<4>';
$IUB[]='/aND|caN|Ha[DS]|WaS/S'; $IUBnew[]='<3>';
$IUB[]='/a[NSt]|BY/S';          $IUBnew[]='<2>';
$IUB[]='/<[^>]*>/S';            $IUBnew[]='|';
$IUB[]='/\\|[^|][^|]*\\|/S';    $IUBnew[]='-';


// read in file
$contents = file_get_contents('php://stdin');
$initialLength = strlen($contents);

// remove things
$contents = preg_replace('/^>.*$|\n/mS', '', $contents);
$codeLength = strlen($contents);

// do regexp counts
$messages = array_flip($variants);
$workers = $results = array();
foreach ($variants as $key => $regex){
   if($key == 0 || $key == 2 || $key == 4 || $key == 6) {
      $pid = pcntl_fork();
      if($pid) $workers[] = $pid;
   }
   if($pid && $key > 7) {
      $messages[$regex] =
         preg_match_all('/' . $regex . '/iS', $contents, $discard);
   }
   else if(!$pid) {
      $results[] = $regex . ',' . 
         preg_match_all('/' . $regex . '/iS', $contents, $discard);
      if($key == 1 || $key == 3 || $key == 5 || $key == 7) {
         msg_send($queue, 2, implode(';', $results), false, false, $errno);
         exit;
	  }
   }
}

// receive and output the counts
pcntl_wait($status);
foreach($workers as $worker) {
   msg_receive($queue, 2, $msgtype, 4096, $message, false);
   $message = explode(';', $message, 3);
   foreach($message as $line) {
      $tmp = explode(',', $line, 2);
      $messages[$tmp[0]] = $tmp[1];
   }
}
foreach($messages as $regex => $count) {
   echo $regex, ' ', $count, "\n";
}

// do replacements
$contents = preg_replace($IUB, $IUBnew, $contents);

echo "\n",
      $initialLength, "\n",
      $codeLength, "\n",
      strlen($contents), "\n";
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
PHP 8.3.11 (cli)
(built: Sep  5 2024 12:34:23) (NTS)
Copyright (c) The PHP Group
Zend Engine v4.3.11,
with Zend OPcache v8.3.11,
Copyright (c) Zend Technologies


 Thu, 05 Sep 2024 21:11:29 GMT

COMMAND LINE:
 /opt/src/php-8.3.11/bin/php -dzend_extension=/opt/src/php-8.3.11/lib/php/extensions/no-debug-non-zts-20230831/opcache.so -dopcache.enable_cli=1 -dopcache.jit_buffer_size=64M -n -d memory_limit=512M regexredux.php 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361