source code
<? /* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
Naive transliteration from bearophile's program
contributed by Isaac Gouy
*/
function seq_lines() {
while ($line = fgets(STDIN)) {
if (str_starts_with($line,">THREE")) break;
}
$lines = [];
while ($line = fgets(STDIN)) {
if (str_starts_with($line,">")) break;
$lines[] = rtrim($line);
}
return $lines;
}
function base_counts($bases, $seq) {
$counts = [];
$size = strlen($seq) + 1 - $bases;
for ($i = 0; $i < $size; $i++) {
$nucleo = substr($seq, $i, $bases);
if (isset($counts[$nucleo]))
$counts[$nucleo] += 1;
else
$counts[$nucleo] = 1;
}
return $counts;
}
function sorted_freq($bases, $seq) {
$size = strlen($seq) + 1 - $bases;
$counts = base_counts($bases, $seq);
arsort($counts);
$freq = [];
foreach($counts as $k => $v) { $freq[$k] = 100.0 * $v / $size; }
return $freq;
}
function specific_count($code, $seq) {
$counts = base_counts(strlen($code),$seq);
return (isset($counts[$code])) ? $counts[$code] : 0;
}
function main() {
$lines = seq_lines();
$seq = implode(array_map('strtoupper', $lines)); unset($lines);
foreach(array(1,2) as $base) {
foreach(sorted_freq($base,$seq) as $k => $v){
printf ("%s %.3f\n", $k, $v);
}
echo "\n";
}
foreach(array("GGT", "GGTA", "GGTATT",
"GGTATTTTAATT", "GGTATTTTAATTTATAGT") as $code) {
printf ("%d\t%s\n", specific_count( $code,$seq), $code);
}
}
main();
?>
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
PHP 8.4.1 (cli)
(built: Nov 22 2024 14:22:47) (NTS)
Copyright (c) The PHP Group
Zend Engine v4.4.1,
with Zend OPcache v8.4.1,
Copyright (c) Zend Technologies
Sat, 23 Nov 2024 04:43:01 GMT
COMMAND LINE:
/opt/src/php-8.4.1/bin/php -dzend_extension=/opt/src/php-8.4.1/lib/php/extensions/no-debug-non-zts-20240924/opcache.so -dopcache.enable_cli=1 -dopcache.jit_buffer_size=64M -n -d memory_limit=1024M knucleotide.php-8.php 0 < knucleotide-input25000000.txt
PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754
AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902
1471758 GGT
446535 GGTA
47336 GGTATT
893 GGTATTTTAATT
893 GGTATTTTAATTTATAGT