regex-redux Lisp SBCL #3 program
source code
;;; The Computer Language Benchmarks Game
;;; https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
;;;
;;; regex-dna program contributed by: Witali Kusnezow 2009-03-02
;;; converted from regex-dna program
(eval-when (:compile-toplevel :load-toplevel :execute)
(require :asdf)
(require :cl-ppcre)
#+sb-thread
(progn
(define-alien-routine sysconf long (name int))
(use-package :sb-thread)))
(eval-when (:compile-toplevel)
(setf cl-ppcre:*regex-char-code-limit* 128))
(defconstant +regex-list+
'("agggtaaa|tttaccct"
"[cgt]gggtaaa|tttaccc[acg]"
"a[act]ggtaaa|tttacc[agt]t"
"ag[act]gtaaa|tttac[agt]ct"
"agg[act]taaa|ttta[agt]cct"
"aggg[acg]aaa|ttt[cgt]ccct"
"agggt[cgt]aa|tt[acg]accct"
"agggta[cgt]a|t[acg]taccct"
"agggtaa[cgt]|[acg]ttaccct"))
(defconstant +alternatives+
'(("tHa[Nt]" "<4>") ("aND|caN|Ha[DS]|WaS" "<3>")
("a[NSt]|BY" "<2>") ("<[^>]*>" "|")
("\\|[^|][^|]*\\|" "-")))
#+sb-thread
(progn
(defconstant +cpu-count+ (sysconf 84))
(defvar *mutex* (make-mutex))
(defvar *aux-mutex* (make-mutex))
(defmacro bg (&body body) `(make-thread (lambda () ,@body)))
(defmacro join-all (&body body)
`(mapcar
#'join-thread
(loop for item in (list ,@body)
append (if (consp item) item (list item))))))
(defun read-all
(stream &aux (buf-size (* 1024 1024))
(size 0)
(buf-list
(loop
for buf = (make-string buf-size :element-type 'base-char)
for len = (read-sequence buf stream)
do (incf size len)
collect (if (< len buf-size) (subseq buf 0 len) buf)
while (= len buf-size))))
(declare (type fixnum size))
(loop with res-string = (make-string size :element-type 'base-char)
with i of-type fixnum = 0
for str in buf-list
do (setf (subseq res-string i) (the simple-base-string str))
(incf i (length (the simple-base-string str)))
finally (return res-string)))
(defun length-to-replace (match)
(loop for x in match
sum (- (the fixnum (cdr x))
(the fixnum (car x))) of-type fixnum))
(defun replace-aux
(match replacement target-string result-string
&key (match-begin 0) (match-end -1)
(match-length (length match))
&aux
(len (length replacement))
(first-match (if (zerop match-begin) '(0 . 0) (nth (1- match-begin) match)))
(target-start (cdr first-match))
(result-start (+ (the fixnum (* len match-begin))
(- target-start
(the fixnum (length-to-replace (subseq match 0 match-begin)))))))
(declare (type fixnum match-begin match-end match-length target-start result-start len)
(type list match)
(type simple-base-string result-string target-string)
(type vector replacement))
(loop with (i j) of-type fixnum = (list result-start target-start)
with mmatch = (if (> match-begin match-end)
match (subseq match match-begin match-end))
for pair in mmatch
do (setf (subseq result-string i) (subseq target-string j (car pair))
i (+ i (- (the fixnum (car pair)) j))
(subseq result-string i) replacement
j (cdr pair)
i (+ i len))
finally (if (or (minusp match-end) (<= match-length match-end))
(setf (subseq result-string i ) (subseq target-string j))))
nil)
#+sb-thread
(defun parts
(parts-num len
&aux
(ranges (loop with (step rest) of-type fixnum = (multiple-value-list (floor len parts-num))
with i of-type fixnum = 0 while (< i len)
collect i into res of-type fixnum
do (incf i step)(if (plusp rest) (progn (incf i) (decf rest)) )
finally (return (append res (list len))))
))
(declare (type fixnum len parts-num)
(type list ranges))
(mapcar #'cons ranges (subseq ranges 1)))
(defun replace-all
(regexp replacement target-string
&aux (rmatch '()) (match '())
(result-string (make-string 0 :element-type 'base-char)))
(declare (type simple-base-string result-string target-string)
(type vector replacement))
(cl-ppcre:do-scans
(match-start match-end reg-starts reg-ends regexp target-string nil)
(push (cons match-start match-end) rmatch))
(if rmatch
(progn
(setf match (reverse rmatch)
result-string (make-string
(+ (- (length target-string)
(length-to-replace match))
(the fixnum (* (length replacement)
(length match)))) :element-type 'base-char))
#-sb-thread
(replace-aux match replacement target-string result-string)
#+sb-thread
(mapcar #'join-thread
(loop with len of-type fixnum = (length match)
with parts-list = (parts +cpu-count+ len)
with current of-type fixnum = 0
repeat +cpu-count+
collect
(bg (let (range)
(with-mutex (*mutex*)
(setf range (nth current parts-list))
(incf current))
(replace-aux match replacement target-string result-string
:match-begin (car range) :match-end (cdr range)
:match-length len)))))
result-string)
target-string))
(defun main (&optional (stream *standard-input*)
&aux (sequence (read-all stream))
(size (length sequence)))
(declare (type simple-base-string sequence))
(setf sequence (replace-all ">[^\\n]*\\n|\\n" "" sequence))
#-sb-thread
(progn
(loop for regex in +regex-list+ do
(format t "~a ~a~%" regex
(/ (length
(the list
(cl-ppcre:all-matches regex sequence))) 2)))
(format t "~%~a~%~a~%" size (length sequence))
(loop for pair in +alternatives+ do
(setf sequence (replace-all (car pair) (cadr pair) sequence )))
(format t "~a~%" (length sequence)))
#+sb-thread
(let* ((len (length +regex-list+))
(result (make-list (1+ len))))
(join-all
(loop with idx of-type fixnum = 0
repeat len
collect
(bg (let (reg cur)
(with-mutex (*aux-mutex*)
(setf cur idx reg (nth cur +regex-list+))
(incf idx))
(setf (nth cur result)
(format nil "~a ~a" reg
(/ (length
(the list
(cl-ppcre:all-matches reg sequence))) 2))))))
(bg (loop with seq = (copy-seq sequence)
for pair in +alternatives+ do
(setf seq (replace-all (car pair) (cadr pair) seq ))
finally (setf (nth len result)
(format nil "~%~a~%~a~%~a" size (length sequence) (length seq))))))
(format t "~{~a~%~}" result))
)
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
SBCL 2.0.4
Fri, 22 May 2020 00:24:22 GMT
MAKE:
cp: 'regexredux.sbcl-3.sbcl' and './regexredux.sbcl-3.sbcl' are the same file
SBCL built with: /usr/local/bin/sbcl --userinit /dev/null --batch --eval '(load "regexredux.sbcl-3.sbcl_compile")'
### START regexredux.sbcl-3.sbcl_compile
(handler-bind ((sb-ext:defconstant-uneql (lambda (c) (abort c)))) (require :sb-concurrency) (load (compile-file "regexredux.sbcl-3.sbcl" ))) (save-lisp-and-die "sbcl.core" :purify t)
### END regexredux.sbcl-3.sbcl_compile
; compiling file "/home/dunham/8000-benchmarksgame/bench/regexredux/regexredux.sbcl-3.sbcl" (written 20 MAY 2020 03:20:36 PM):
; compiling (REQUIRE :ASDF)
; compiling (REQUIRE :CL-PPCRE)While evaluating the form starting at line 1, column 0
of #P"/home/dunham/benchmarksgame_quadcore/regexredux/tmp/regexredux.sbcl-3.sbcl_compile":
debugger invoked on a SB-INT:EXTENSION-FAILURE in thread
#<THREAD "main thread" RUNNING {1000518083}>:
Don't know how to REQUIRE CL-PPCRE.
See also:
The SBCL Manual, Variable *MODULE-PROVIDER-FUNCTIONS*
The SBCL Manual, Function REQUIRE
;
; compilation unit aborted
; caught 1 fatal ERROR condition
; compilation aborted after 0:03:49.463
### START regexredux.sbcl-3.sbcl_run
(main) (quit)
### END regexredux.sbcl-3.sbcl_run
231.75s to complete and log all make actions
COMMAND LINE:
/usr/local/bin/sbcl --noinform --core sbcl.core --userinit /dev/null --load regexredux.sbcl-3.sbcl_run 0 < regexredux-input50000.txt
PROGRAM FAILED
PROGRAM OUTPUT:
could not open file "sbcl.core"
open: No such file or directory