The Computer Language
24.12 Benchmarks Game

k-nucleotide Lisp SBCL #3 program

source code

;;   The Computer Language Benchmarks Game
;;   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
;;   contributed by Vsevolod Dyomkin


(defun slice-and-dice (gene k ht)
  (declare (optimize (speed 3) (safety 0) (debug 0))
	   (type simple-base-string gene)
	   (type fixnum k))
  (clrhash ht)
;  (setf ht (make-hash-table :size (expt 2 k)))
  (if (= k 1)
      (dotimes (i (the fixnum (- (length gene) k)))
        (incf (the fixnum (gethash (schar gene i) ht 0))))
      (loop for i from 0 to (the fixnum (- (length gene) k))
         with gene-num = 0 and mask = (1- (expt 4 k)) do
           (setf gene-num (logand mask (letter-to-num gene i gene-num)))
           (unless (< i (the fixnum (1- k)))
             (incf (the fixnum (gethash gene-num ht 0))))))
  ht)

(declaim (inline letter-to-num))
(defun letter-to-num (gene i idx)
  (declare (optimize (speed 3) (safety 0) (debug 0))
           (type fixnum i))
  (ecase (schar gene i)
    (#\A (ash idx 2))
    (#\C (logxor (ash idx 2) 1))
    (#\G (logxor (ash idx 2) 2))
    (#\T (logxor (ash idx 2) 3))))

(declaim (inline gene-to-num))
(defun gene-to-num (gene)
  (let ((gene-num 0))
    (dotimes (i (length gene))
      (declare (fixnum i))
      (setf gene-num (letter-to-num gene i gene-num)))
    gene-num))

(declaim (inline num-to-gene))
(defun num-to-gene (num k)
  (declare (optimize (speed 3) (safety 0) (debug 0))
           (type fixnum num)
           (type fixnum k))
  (let ((gene (make-array k :element-type 'base-char))
        (acgt #(#\A #\C #\G #\T)))
    (dotimes (i k)
      (let ((pos (* i 2)))
        (declare (type fixnum pos))
        (setf (aref gene (- k i 1))
              (aref acgt (+ (if (logbitp pos num) 1 0)
                            (if (logbitp (1+ pos) num) 2 0))))))
    gene))

(defun discard-data-header (stream)
  (loop for line = (read-line stream nil nil)
	while (and line (string/= ">THREE" line :end2 6))))

(defun read-data (stream)
  (declare (optimize (speed 3) (safety 0) (debug 0)))
  (let ((data (make-array 0 :element-type 'base-char :adjustable t :fill-pointer 0)))
    (do ((line (read-line stream nil 'eof) (read-line stream nil 'eof)))
        ((or (eq line 'eof) (char= (schar line 0) #\>)))
      (if (not (char= (schar line 0) #\;))
          (dotimes (char-index (length line))
            (vector-push-extend (char-upcase (the base-char (schar line char-index))) data))))
    (coerce data 'simple-base-string)))

(defun print-precentages (gene k ht)
  (declare (type simple-base-string gene)
	   (type fixnum k))
  (let* ((gene-table (slice-and-dice gene k ht))
	 sorted-frequencies
	 (sum 0))
    (maphash (lambda (key v)
	       (incf sum v)
	       (push (cons key v)
                     sorted-frequencies))
	     gene-table)
    (setf sorted-frequencies 
	  (sort sorted-frequencies
		(lambda (v1 v2)
                  (cond ((> (cdr v1) (cdr v2)) t)
                        ((< (cdr v1) (cdr v2)) nil)
                        (t (< (car v1) (car v2)))))))
    (dolist (entry sorted-frequencies)
      (format t "~a ~,3F~%"
	      (if (= k 1) (car entry)
                  (num-to-gene (car entry) k))
	      (* (/ (cdr entry) (float sum)) 100.0)))
    (terpri)))

(defun print-occurence (slice gene k ht)
  (let ((gene-table (slice-and-dice gene k ht)))
    (format t "~A~C~A~%"
            (gethash (gene-to-num slice) gene-table 0)
            #\Tab
            slice)))

(defun main ()
  (declare (optimize (speed 0) (safety 1)))
  (discard-data-header *standard-input*)
  (let ((gene (read-data *standard-input*))
        (ht (make-hash-table :rehash-size 5.0)))
    (print-precentages gene 1 ht)
    (print-precentages gene 2 ht)
    (print-occurence "GGT" gene 3 ht)
    (print-occurence "GGTA" gene 4 ht)
    (print-occurence "GGTATT" gene 6 ht)
    (print-occurence "GGTATTTTAATT" gene 12 ht)
    (print-occurence "GGTATTTTAATTTATAGT" gene 18 ht)))


(defun test ()
  (with-open-file (*standard-input* "knucleotide-input.txt"
				    :direction :input)
    (with-open-file (*standard-output* "knucleotide-output.txt"
				       :direction :output
				       :if-does-not-exist :create
				       :if-exists :supersede)
      (main))))
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
SBCL 2.4.8


 Fri, 06 Sep 2024 21:23:25 GMT

MAKE:
cp: 'knucleotide.sbcl-3.sbcl' and './knucleotide.sbcl-3.sbcl' are the same file
SBCL built with: /opt/src/sbcl-2.4.8/bin/sbcl --userinit /dev/null --batch --eval '(load "knucleotide.sbcl-3.sbcl_compile")'
### START knucleotide.sbcl-3.sbcl_compile
(handler-bind ((sb-ext:defconstant-uneql      (lambda (c) (abort c))))      (require :sb-concurrency)      (load (compile-file "knucleotide.sbcl-3.sbcl" ))) (save-lisp-and-die "sbcl.core" :purify t)
### END knucleotide.sbcl-3.sbcl_compile

; compiling file "/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/knucleotide.sbcl-3.sbcl" (written 26 APR 2018 12:48:18 PM):

; file: /home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/knucleotide.sbcl-3.sbcl
; in: DEFUN SLICE-AND-DICE
;     (EXPT 4 K)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 3) because:
;       The result is a (VALUES (OR (INTEGER 1 1) (INTEGER 4)) &OPTIONAL), not a (VALUES
;                                                                                 FIXNUM
;                                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 4) because:
;       The second argument is a (OR (INTEGER 2 9223372036854775806)
;                                    (INTEGER 0 0)), not a (UNSIGNED-BYTE 62).
;       The result is a (VALUES (OR (INTEGER 1 1) (INTEGER 4)) &OPTIONAL), not a (VALUES
;                                                                                 (UNSIGNED-BYTE
;                                                                                  64)
;                                                                                 &OPTIONAL).
;       etc.

;     (1- (EXPT 4 K))
; 
; note: forced to do GENERIC-- (cost 10)
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a (RATIONAL 0), not a FIXNUM.
;       The result is a (VALUES (INTEGER -1) &OPTIONAL), not a (VALUES FIXNUM
;                                                                      &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a (RATIONAL 0), not a FIXNUM.
;       The result is a (VALUES (INTEGER -1) &OPTIONAL), not a (VALUES FIXNUM
;                                                                      &OPTIONAL).
;       etc.

;     (LOGAND MASK (LETTER-TO-NUM GENE I GENE-NUM))
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a (INTEGER -1), not a FIXNUM.
;       The second argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline (unsigned-byte 64) arithmetic (cost 3) because:
;       The first argument is a (INTEGER -1), not a (UNSIGNED-BYTE 64).
;       The second argument is a INTEGER, not a (UNSIGNED-BYTE 64).
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES
;                                                          (UNSIGNED-BYTE 64)
;                                                          &OPTIONAL).
;       etc.

;     (EXPT 4 K)
; 
; note: doing unsigned word to integer coercion (cost 20), for:
;       the first result of inline ASH

; in: DECLAIM (INLINE LETTER-TO-NUM)
;     (DECLAIM (INLINE LETTER-TO-NUM))
; ==>
;   (SB-C::%PROCLAIM '(INLINE LETTER-TO-NUM) (SB-C:SOURCE-LOCATION))
; 
; caught STYLE-WARNING:
;   Proclaiming COMMON-LISP-USER::LETTER-TO-NUM to be INLINE, but 1 call to it was
;   previously compiled. A declaration of NOTINLINE at the call site will eliminate
;   this warning, as will proclaiming and defining the function before its first
;   potential use.

; in: DEFUN LETTER-TO-NUM
;     (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

;     (LOGXOR (ASH IDX 2) 1)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

;     (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

;     (LOGXOR (ASH IDX 2) 2)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

;     (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

;     (LOGXOR (ASH IDX 2) 3)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; in: DEFUN GENE-TO-NUM
;     (LETTER-TO-NUM GENE I GENE-NUM)
; --> BLOCK ECASE LET COND IF 
; ==>
;   (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF LOGXOR 
; ==>
;   (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF 
; ==>
;   (LOGXOR (ASH IDX 2) 1)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF IF LOGXOR 
; ==>
;   (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF IF 
; ==>
;   (LOGXOR (ASH IDX 2) 2)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF IF IF LOGXOR 
; ==>
;   (ASH IDX 2)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; --> BLOCK ECASE LET COND IF IF IF IF 
; ==>
;   (LOGXOR (ASH IDX 2) 3)
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.

; in: DEFUN PRINT-OCCURENCE
;     (GENE-TO-NUM SLICE)
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline ASH (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline ASH (cost 3) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; note: forced to do full call
;       unable to do inline fixnum arithmetic (cost 1) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       unable to do inline fixnum arithmetic (cost 2) because:
;       The first argument is a INTEGER, not a FIXNUM.
;       The result is a (VALUES INTEGER &OPTIONAL), not a (VALUES FIXNUM
;                                                                 &OPTIONAL).
;       etc.
; 
; compilation unit finished
;   caught 1 STYLE-WARNING condition
;   printed 25 notes


; wrote /home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/knucleotide.sbcl-3.fasl
; compilation finished in 0:00:00.172
### START knucleotide.sbcl-3.sbcl_run
(main) (quit)
### END knucleotide.sbcl-3.sbcl_run


2.61s to complete and log all make actions

COMMAND LINE:
 /opt/src/sbcl-2.4.8/bin/sbcl  --noinform --core sbcl.core --userinit /dev/null --load knucleotide.sbcl-3.sbcl_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT