The Computer Language
Benchmarks Game

k-nucleotide Pharo Smalltalk program

source code

"* The Computer Language Benchmarks Game
    unoptimized program contributed long ago by Isaac Gouy *"!

Object subclass: #BenchmarksGame
   instanceVariableNames: ''
   classVariableNames: ''
   poolDictionaries: ''
   category: ''!

!BenchmarksGame class methodsFor: 'private'!

substringFrequencies: aString for: aLength using: aDictionary
   | m |
   m := aString size - aLength + 1.
   1 to: m do: [:i | | fragment |
      fragment := aString copyFrom: i to: i + aLength - 1.

      aDictionary at: fragment put: 
         (aDictionary at: fragment ifAbsent: [0]) + 1 

readFasta: sequenceName from: input
   | sc gt lf prefix description buffer byte |
   sc := $; asciiValue.
   gt := $> asciiValue.
   lf := Character lf asciiValue.
   prefix := '>',sequenceName.

   "* find start of particular fasta sequence *"
   [(input atEnd) or: [
         (input peek = gt)
            ifTrue: [((line := (input upTo: lf) asString)
               findString: prefix) = 1]
            ifFalse: [input skipTo: lf. false].
   ] whileFalse.

   "* line-by-line read - it would be a lot faster to block read *"
   description := line.
   buffer := ReadWriteStream on: (String new: 1028).
   [(input atEnd) or: [(byte := input peek) = gt]] whileFalse: [
      (byte = sc)
         ifTrue: [input upTo: lf]
         ifFalse: [buffer nextPutAll: (input upTo: lf) asString]
   ^Association key: description value: buffer contents!

knucleotideFrom: input to: output
   | sequence writeFrequencies writeCount |

   sequence := (self readFasta: 'THREE' from: input) value asUppercase.

   writeFrequencies :=
      [:k | | frequencies count |
      frequencies := SortedCollection sortBlock: [:a :b|
         (a value = b value) ifTrue: [b key < a key] ifFalse: [b value < a value]].

      count := 0.0.
      (self substringFrequencies: sequence for: k using: Dictionary new)
         associationsDo: [:each|
            frequencies add: each. count := count + each value].

      frequencies do: [:each | | percentage |
         percentage := (each value / count) * 100.0.
            nextPutAll: each key; nextPutAll: ' ';
            print: percentage digits: 3; nl]]. 

   writeCount := [:nucleotideFragment | | frequencies count |
      frequencies := self substringFrequencies: sequence 
         for: nucleotideFragment size
         using: Dictionary new.
      count := frequencies at: nucleotideFragment ifAbsent: [0].
      output print: count; tab; nextPutAll: nucleotideFragment; nl].

   writeFrequencies value: 1. output nl.
   writeFrequencies value: 2. output nl.

   writeCount value: 'GGT'.
   writeCount value: 'GGTA'.
   writeCount value: 'GGTATT'.
   writeCount value: 'GGTATTTTAATT'.
   writeCount value: 'GGTATTTTAATTTATAGT'! !

!BenchmarksGame class methodsFor: 'initialize-release'!

do: n
   self knucleotideFrom: Stdio stdin to: Stdio stdout! !

!StdioStream methodsFor: 'benchmarks game'!

   self nextPut: Character tab!

   self nextPut: Character lf!

print: number digits: decimalPlaces
   | precision rounded |
   decimalPlaces <= 0 ifTrue: [^ number rounded printString].
   precision := (10 raisedTo: decimalPlaces negated) asFloat.
   rounded := number roundTo: precision.
   self nextPutAll: 
      ((rounded asScaledDecimal: decimalPlaces) printString copyUpTo: $s)! !

!StdioStream instance methodsFor: 'positioning'!

skipTo: anObject 
   "Set the access position of the receiver to be past the next occurrence of 
   anObject. Answer whether anObject is found."

   [self atEnd]
      whileFalse: [self next = anObject ifTrue: [^true]].
   ^false! !


notes, command-line, and program output

64-bit Ubuntu quad core
Pharo8.0.0 build: 1122, commit: bbcdf97

Sun, 26 Jan 2020 02:41:06 GMT

cp /opt/src/pharo64-linux-stable/Pharo8.0.0-0-64bit-bbcdf97.image knucleotide.pharo_run.image
cp /opt/src/pharo64-linux-stable/Pharo8.0.0-0-64bit-bbcdf97.changes knucleotide.pharo_run.changes
ln -s /opt/src/pharo64-linux-stable/Pharo8.0-32bit-bbcdf97.sources .
cat Include/pharo/

| prog |

(SystemWindow windowsIn: World
      satisfying: [:w | w model canDiscardEdits])
   do: [:w | w delete].

   "load program to be measured"
prog := Smalltalk getSystemAttribute: 3.
(prog notNil) ifTrue: [FileStream fileIn: prog].

ImageCleaner cleanUpForRelease.
Smalltalk garbageCollect.
SmalltalkImage current snapshot: true andQuit: true.

/opt/src/pharo64-linux-stable/pharo -headless knucleotide.pharo_run.image Include/pharo/ knucleotide.pharo 2>/dev/null
NewUndeclaredWarning: BenchmarksGame class>>readFasta:from: (line is Undeclared)
NewUndeclaredWarning: BenchmarksGame class>>readFasta:from: (line is Undeclared)
cat Include/pharo/

BenchmarksGame do: (Smalltalk getSystemAttribute: 3) asInteger.!
SmalltalkImage current snapshot: false andQuit: true!

36.39s to complete and log all make actions

/opt/src/pharo64-linux-stable/pharo -headless knucleotide.pharo_run.image Include/pharo/ 0 < knucleotide-input25000000.txt

A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT

pthread_setschedparam failed: Operation not permitted
This VM uses a separate heartbeat thread to update its internal clock
and handle events.  For best operation, this thread should run at a
higher priority, however the VM was unable to change the priority.  The
effect is that heavily loaded systems may experience some latency
issues.  If this occurs, please create the appropriate configuration
file in /etc/security/limits.d/ as shown below:

cat <<END | sudo tee /etc/security/limits.d/pharo.conf
*      hard    rtprio  2
*      soft    rtprio  2

and report to the pharo mailing list whether this improves behaviour.

You will need to log out and log back in for the limits to take effect.
For more information please see