The Computer Language
23.03 Benchmarks Game

k-nucleotide Pharo Smalltalk #2 program

source code

"* The Computer Language Benchmarks Game
    http://shootout.alioth.debian.org/
    contributed by Isaac Gouy *"!

Object subclass: #BenchmarksGame
   instanceVariableNames: ''
   classVariableNames: ''
   poolDictionaries: ''
   category: ''!


!BenchmarksGame class methodsFor: 'private'!

substringFrequencies: aString for: aLength using: aDictionary
   | m |
   m := aString size - aLength + 1.
   1 to: m do: [:i | | fragment |
      fragment := aString copyFrom: i to: i + aLength - 1.

      aDictionary at: fragment put: 
         (aDictionary at: fragment ifAbsent: [0]) + 1 
   ].
   ^aDictionary!


readFasta: sequenceName from: input
   | prefix line hasDescription description buffer |

   prefix := '>', sequenceName, '*'.

   [input atEnd or: [(((line := input nextLine) at: 1) = $>) 
      and: [(hasDescription := prefix match: line) 
         ifTrue: [description := line]. hasDescription
      ]]
   ] whileFalse.

   buffer := ReadWriteStream on: (String new: 1028).
   [input atEnd or: [((line := input nextLine) at: 1) = $>]]
      whileFalse: [ 
         ((line at: 1) = $;) ifFalse: [buffer nextPutAll: line]].

   ^Association key: description value: buffer contents!


knucleotideFrom: input to: output
   | sequence writeFrequencies writeCount |

   sequence := (self readFasta: 'THREE' from: input) value asUppercase.

   writeFrequencies :=
      [:k | | frequencies count |
      frequencies := SortedCollection sortBlock: [:a :b|
         (a value = b value) ifTrue: [b key < a key] ifFalse: [b value < a value]].

      count := 0.0.
      (self substringFrequencies: sequence for: k using: Dictionary new)
         associationsDo: [:each|
            frequencies add: each. count := count + each value].

      frequencies do: [:each | | percentage |
         percentage := (each value / count) * 100.0.
         output 
            nextPutAll: each key; nextPutAll: ' ';
            print: percentage digits: 3; lf]]. 

   writeCount := [:nucleotideFragment | | frequencies count |
      frequencies := self substringFrequencies: sequence 
         for: nucleotideFragment size
         using: Dictionary new.
      count := frequencies at: nucleotideFragment ifAbsent: [0].
      output print: count; tab; nextPutAll: nucleotideFragment; lf].

   writeFrequencies value: 1. output lf.
   writeFrequencies value: 2. output lf.

   writeCount value: 'GGT'.
   writeCount value: 'GGTA'.
   writeCount value: 'GGTATT'.
   writeCount value: 'GGTATTTTAATT'.
   writeCount value: 'GGTATTTTAATTTATAGT'! !


!BenchmarksGame class methodsFor: 'initialize-release'!

do: n
   self knucleotideFrom: 
         (ZnCharacterReadStream on: 
            (ZnBufferedReadStream on: Stdio stdin)) 
      to: Stdio stdout! !


!StdioStream methodsFor: 'benchmarks game'!

tab
   self nextPut: Character tab!

print: number digits: decimalPlaces
   | precision rounded |
   decimalPlaces <= 0 ifTrue: [^ number rounded printString].
   precision := (10 raisedTo: decimalPlaces negated) asFloat.
   rounded := number roundTo: precision.
   self nextPutAll: 
      ((rounded asScaledDecimal: decimalPlaces) printString copyUpTo: $s)! !


    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Pharo 9.0.21
Dec 7 2022 20:44:42
Compiler: 5.4.0 20160609


Mon, 23 Jan 2023 18:18:30 GMT

MAKE:
cp /opt/src/pharo-vm-Linux-x86_64-stable/Pharo10-SNAPSHOT-64bit-2314c3f.image knucleotide.pharo-2.pharo_run.image
cp /opt/src/pharo-vm-Linux-x86_64-stable/Pharo10-SNAPSHOT-64bit-2314c3f.changes knucleotide.pharo-2.pharo_run.changes
ln -s /opt/src/pharo-vm-Linux-x86_64-stable/Pharo10.0-64bit-2314c3f.sources .
cat Include/pharo/make.st
| prog |

(SystemWindow windowsIn: World
      satisfying: [:w | w model canDiscardEdits])
   do: [:w | w delete].

   "load program to be measured"
prog := Smalltalk getSystemAttribute: 3.
(prog notNil) ifTrue: [prog asFileReference fileIn].

ImageCleaner cleanUpForRelease.
Smalltalk garbageCollect.
SmalltalkImage current snapshot: true andQuit: true.
/opt/src/pharo-vm-Linux-x86_64-stable/pharo --headless knucleotide.pharo-2.pharo_run.image Include/pharo/make.st knucleotide.pharo-2.pharo
cat Include/pharo/main.st

BenchmarksGame do: (Smalltalk getSystemAttribute: 3) asInteger.!
SmalltalkImage current snapshot: false andQuit: true!



26.81s to complete and log all make actions

COMMAND LINE:
/opt/src/pharo-vm-Linux-x86_64-stable/pharo --headless knucleotide.pharo-2.pharo_run.image Include/pharo/main.st 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT