The Computer Language
24.04 Benchmarks Game

k-nucleotide PHP #4 program

source code

<?
/* 
   The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   contributed by Damien Bonvillain
   fixed by Isaac Gouy
   improved by Levi Cameron
*/

$sequence = read_sequence('THREE');

fclose(STDIN);

$jobs = array(
   array('write_freq', 1),
   array('write_freq', 2),
   array('write_count', 'GGT'),
   array('write_count', 'GGTA'),
   array('write_count', 'GGTATT'),
   array('write_count', 'GGTATTTTAATT'),
   array('write_count', 'GGTATTTTAATTTATAGT'),
);

$tok = ftok(__FILE__, chr(time() & 255));
$queue = msg_get_queue($tok);

$parent = TRUE;
$count = count($jobs);
for ($i = 1; $i < $count; ++$i) {
   $pid = pcntl_fork();
   if ($pid === -1) {
      die('could not fork');
   } else if ($pid) {
      continue;
   }
   $parent = FALSE;
   break;
}
if ($parent) {
   $i = 0;
}

$func = $jobs[$i][0];
$arg =  $jobs[$i][1];

ob_start();

$func($sequence, $arg);

$result = array($i, ob_get_clean());

if (!msg_send($queue, 2, $result, TRUE, FALSE, $errno)) {
   var_dump("$errno");
   var_dump(msg_stat_queue($queue));
}

if (!$parent) {
   exit(0);
}

$results = array();
foreach($jobs as $job) {
    msg_receive($queue, 2, $msgtype, 4096, $result, TRUE);
   $results[$result[0]] = $result[1];
   pcntl_wait($s);
}

ksort($results);
foreach ($results as $result) {
   echo $result;
}

msg_remove_queue($queue);


/* functions definitions follow */
function read_sequence($id) {
   $id = '>' . $id;
   $ln_id = strlen($id);
   $fd = STDIN;

   // reach sequence three
   do {
      $line = stream_get_line($fd, 250, "\n");
	  // if EOF then we couldn't find the sequence
	  if (feof($fd)) exit(-1);
   } while (strncmp($line, $id, $ln_id) !== 0);

   ob_start();	// for repeated string concatenations, output buffering is fastest
   // next, read the content of the sequence
   while (!feof($fd)) {
      $line = stream_get_line($fd, 250, "\n");
      if (!isset($line[0])) continue;
      $c = $line[0];
      if ($c === ';') continue;
      if ($c === '>') break;
      // append the uppercase sequence fragment,
      // must get rid of the CR/LF or whatever if present
      echo $line;
   }
   return strtoupper(ob_get_clean());
}

function write_freq($sequence, $key_length) {
   $map = generate_frequencies($sequence, $key_length);
   uasort($map, 'freq_name_comparator');
   foreach($map as $key => $val) {
      printf ("%s %.3f\n", $key, $val);
   }
   echo "\n";
}

function write_count($sequence, $key) {
   $map = generate_frequencies($sequence, strlen($key), false);
   if (isset($map[$key])) $value = $map[$key];
   else $value = 0;
   printf ("%d\t%s\n", $value, $key);
}

/**
 * Returns a map (key, count or freq(default))
 */
function generate_frequencies($sequence, $key_length, $compute_freq = true) {
   $result = array();
   $total = strlen($sequence) - $key_length;
   $i = $total;
   if ($key_length === 1) { 
      do {
         $x = &$result[$sequence[$i--]];
         $x++;
      } while ($i);
   } else {
      do {
         $x = &$result[substr($sequence, $i--, $key_length)];
         $x++;
      } while ($i);
   }
   if($compute_freq) {
      foreach($result as $k => $v) {
         $result[$k] = $v * 100 / $total;
      }
   }
   return $result;
}

function freq_name_comparator($a, $b) {
   if ($a == $b) return 0;
   return  ($a < $b) ? 1 : -1;
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
PHP 8.3.3 (cli)
(built: Mar  3 2024 08:47:21) (NTS)
Copyright (c) The PHP Group
Zend Engine v4.3.3,
with Zend OPcache v8.3.3,
Copyright (c) Zend Technologies


 Fri, 08 Mar 2024 23:47:23 GMT

COMMAND LINE:
 /opt/src/php-8.3.3/bin/php -dzend_extension=/opt/src/php-8.3.3/lib/php/extensions/no-debug-non-zts-20230831/opcache.so -dopcache.enable_cli=1 -dopcache.jit_buffer_size=64M -n -d memory_limit=1024M knucleotide.php-4.php 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT