The Computer Language
23.03 Benchmarks Game

k-nucleotide VW Smalltalk program

source code

"* The Computer Language Benchmarks Game
    http://shootout.alioth.debian.org/
    unoptimized program contributed long ago by Isaac Gouy *"!

Smalltalk.Core defineClass: #BenchmarksGame
	superclass: #{Core.Object}
	indexedType: #none
	private: false
	instanceVariableNames: ''
	classInstanceVariableNames: ''
	imports: ''
	category: ''!


!Core.BenchmarksGame class methodsFor: 'private'!

substringFrequencies: aString for: aLength using: aDictionary
   | m |
   m := aString size - aLength + 1.
   1 to: m do: [:i | | fragment |
      fragment := aString copyFrom: i to: i + aLength - 1.

      aDictionary at: fragment put: 
         (aDictionary at: fragment ifAbsent: [0]) + 1 
   ].
   ^aDictionary!


readFasta: sequenceName from: input
   | prefix newline buffer description line char |
   prefix := '>',sequenceName.
   newline := Character cr.

   "* find start of particular fasta sequence *"
   [(input atEnd) or: [
         (input peek = $>)
            ifTrue: [((line := input upTo: newline)
               indexOfSubCollection: prefix startingAt: 1) = 1]
            ifFalse: [input skipThrough: newline. false]]
      ] whileFalse.

   "* line-by-line read - it would be a lot faster to block read *"
   description := line.
   buffer := ReadWriteStream on: (String new: 1028).
   [(input atEnd) or: [(char := input peek) = $>]] whileFalse: [
      (char = $;)
         ifTrue: [input upTo: newline]
         ifFalse: [buffer nextPutAll: (input upTo: newline)]
      ].
   ^Association key: description value: buffer contents !


knucleotideFrom: input to: output
   | sequence writeFrequencies writeCount |

   sequence := (self readFasta: 'THREE' from: input) value asUppercase.

   writeFrequencies :=
      [:k | | frequencies count |
      frequencies := SortedCollection sortBlock: [:a :b|
         (a value = b value) ifTrue: [b key < a key] ifFalse: [b value < a value]].

      count := 0.0.
      (self substringFrequencies: sequence for: k using: Dictionary new)
         associationsDo: [:each|
            frequencies add: each. count := count + each value].

      frequencies do: [:each | | percentage |
         percentage := (each value / count) * 100.0.
         output 
            nextPutAll: each key; nextPutAll: ' ';
            print: percentage digits: 3; nl]]. 

   writeCount := [:nucleotideFragment | | frequencies count |
      frequencies := self substringFrequencies: sequence 
         for: nucleotideFragment size
         using: Dictionary new.
      count := frequencies at: nucleotideFragment ifAbsent: [0].
      output print: count; tab; nextPutAll: nucleotideFragment; nl].

   writeFrequencies value: 1. output nl.
   writeFrequencies value: 2. output nl.

   writeCount value: 'GGT'.
   writeCount value: 'GGTA'.
   writeCount value: 'GGTATT'.
   writeCount value: 'GGTATTTTAATT'.
   writeCount value: 'GGTATTTTAATTTATAGT'! !


!Core.BenchmarksGame class methodsFor: 'initialize-release'!

do: n
   self knucleotideFrom: Stdin to: Stdout.
   ^''! !


!Core.Stream methodsFor: 'benchmarks game'!

tab
   self nextPut: Character tab!

nl
   self nextPut: Character lf!

print: number digits: decimalPlaces
   self nextPutAll: 
      ((number asFixedPoint: decimalPlaces) printString copyWithout: $s)! !


    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
VisualWorks® 8.3
Aug 19 2017


Mon, 23 Jan 2023 20:02:24 GMT

MAKE:
cp /opt/src/vw8.3pul/image/visualnc64.im knucleotide.vw_run.im
/opt/src/vw8.3pul/bin/visual knucleotide.vw_run.im -nogui -pcl MatriX -filein knucleotide.vw -doit 'ObjectMemory snapshotThenQuit'

Autoloading MatriX from $(VISUALWORKS)/preview/matrix/MatriX.pcl
Autoloading Xtreams-Support from $(VISUALWORKS)/xtreams/Xtreams-Support.pcl
Autoloading Xtreams-Core from $(VISUALWORKS)/xtreams/Xtreams-Core.pcl
Autoloading Xtreams-Terminals from $(VISUALWORKS)/xtreams/Xtreams-Terminals.pcl
Autoloading Xtreams-Transforms from $(VISUALWORKS)/xtreams/Xtreams-Transforms.pcl
Autoloading Xtreams-Substreams from $(VISUALWORKS)/xtreams/Xtreams-Substreams.pcl
Autoloading Xtreams-Multiplexing from $(VISUALWORKS)/xtreams/Xtreams-Multiplexing.pcl
Filing in from:
	knucleotide.vw
BenchmarksGame class<private
BenchmarksGame class<initialize-release
Stream<benchmarks game
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/knucleotide.vw_run.im created at January 23, 2023 12:01:16 PM
6.22s to complete and log all make actions

COMMAND LINE:
/opt/src/vw8.3pul/bin/visual knucleotide.vw_run.im -nogui -evaluate "BenchmarksGame do: 0" < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT