The Computer Language
24.11 Benchmarks Game

k-nucleotide PHP #8 program

source code

<? /* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
   
   Naive transliteration from bearophile's program
   contributed by Isaac Gouy   
*/


function seq_lines() {
   while ($line = fgets(STDIN)) {
      if (str_starts_with($line,">THREE")) break;      
   }
   $lines = [];
   while ($line = fgets(STDIN)) {
      if (str_starts_with($line,">")) break;  
      $lines[] = rtrim($line);      
   }   
   return $lines;
}

function base_counts($bases, $seq) { 
   $counts = [];
   $size = strlen($seq) + 1 - $bases;        
   for ($i = 0; $i < $size; $i++) {  
      $nucleo = substr($seq, $i, $bases);          
      if (isset($counts[$nucleo]))       
         $counts[$nucleo] += 1;  
      else
         $counts[$nucleo] = 1;              
   } 
   return $counts;
}

function sorted_freq($bases, $seq) {
   $size = strlen($seq) + 1 - $bases;   
   $counts = base_counts($bases, $seq);   
   arsort($counts);
   $freq = [];
   foreach($counts as $k => $v) { $freq[$k] = 100.0 * $v / $size; }
   return $freq;
}

function specific_count($code, $seq) {  
    $counts = base_counts(strlen($code),$seq);
    return (isset($counts[$code])) ? $counts[$code] : 0;
}    

function main() {
   $lines = seq_lines();
   $seq =  implode(array_map('strtoupper', $lines)); unset($lines);
   
   foreach(array(1,2) as $base) {    
      foreach(sorted_freq($base,$seq) as $k => $v){
         printf ("%s %.3f\n", $k, $v);         
      }
      echo "\n";         
   }
   
   foreach(array("GGT", "GGTA", "GGTATT",
         "GGTATTTTAATT", "GGTATTTTAATTTATAGT") as $code) {   
      printf ("%d\t%s\n", specific_count( $code,$seq), $code);       
   }  
}

main();
?>
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
PHP 8.4.1 (cli)
(built: Nov 22 2024 14:22:47) (NTS)
Copyright (c) The PHP Group
Zend Engine v4.4.1,
with Zend OPcache v8.4.1,
Copyright (c) Zend Technologies


 Sat, 23 Nov 2024 04:43:01 GMT

COMMAND LINE:
 /opt/src/php-8.4.1/bin/php -dzend_extension=/opt/src/php-8.4.1/lib/php/extensions/no-debug-non-zts-20240924/opcache.so -dopcache.enable_cli=1 -dopcache.jit_buffer_size=64M -n -d memory_limit=1024M knucleotide.php-8.php 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT