reverse-complement OCaml #4 program
source code
(* The Computer Language Benchmarks Game
* https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
contributed by Ingo Bormuth
modified by Fabrice Le Fessant
*)
let verbose = false
let rec print_args list =
match list with
[] -> ()
| x :: tail ->
Printf.fprintf stderr " %d" x;
print_args tail
let enter name list =
if verbose then begin
Printf.fprintf stderr "%s:" name;
print_args list;
Printf.fprintf stderr "\n%!"
end
let arch64 =
match Sys.word_size with
32 -> false
| 64 -> true
| _ -> assert false
module LineReader : sig
(* read all non empty lines *)
val read :
(* from : *) Unix.file_descr ->
(* max_line_length : *) int ->
(* handler : *) (string -> int -> int -> unit) -> unit
end = struct
let read ic maxlen handler =
let s = String.create maxlen in
let rec iter begin_pos pos =
let to_read = maxlen - pos in
if to_read < 32000 then begin
let len = pos - begin_pos in
String.blit s begin_pos s 0 len;
iter 0 len
end else
let nread = Unix.read ic s pos to_read in
if nread = 0 then raise End_of_file;
let end_pos = pos + nread in
iter2 begin_pos pos end_pos
and iter2 begin_pos pos end_pos =
if pos = end_pos then
iter begin_pos end_pos
else
match s.[pos] with
'\n' | '\r' ->
if pos > begin_pos then
handler s begin_pos (pos - begin_pos);
iter2 (pos+1) (pos+1) end_pos
| _ ->
iter2 begin_pos (pos+1) end_pos
in
iter 0 0
end
let t = String.make 256 ' '
let b = String.make 61 '\n'
let bi = ref 1
let _ =
String.blit "TVGHEFCDIJMLKNOPQYSAABWXRZ" 0 t 65 26;
String.blit t 65 t 97 26
;;
let t =
let s = Array.create 256 ' ' in
for i = 0 to 255 do
s.(i) <- t.[i]
done;
s
;;
module Fasta : sig
val clear : unit -> unit
val flush : unit -> unit
val print : string -> int -> int -> unit
end = struct
let printed = ref 0
let clear () = printed := 0
let flush () =
if !printed > 0 then print_newline ();
printed := 0
let rec print s pos len =
if len > 60 then begin
output stdout s pos 60;
output_char stdout '\n';
print s (pos + 60) (len-60)
end else
if len > 0 then
begin
output stdout s pos len;
printed := len
end
let print s pos len =
let to_print = 60 - !printed in
if len < to_print then begin
output stdout s pos len;
printed := !printed + len
end else begin
output stdout s pos to_print;
output_char stdout '\n';
printed := 0;
print s (pos + to_print) (len - to_print);
end
end
module IMPLEMENTATION32 : sig
val main : unit -> unit
end = struct
module BigRevBuffer : sig
val clear : unit -> unit
val length : unit -> int
val add : string -> int -> int -> unit
(* val iter : (string -> int -> int -> unit) -> unit *)
val reverse_iter : unit -> unit
end = struct
(* don't allocate any buffers on x64 *)
let nbuffers = if arch64 then 0 else 256
let buffer_len = 1_000_000
let buffers = Array.init nbuffers (fun _ -> String.create buffer_len)
let buffer_pos = Array.create nbuffers buffer_len
let last_buffer = ref 0
let clear () =
last_buffer := 0;
for i = 0 to 255 do
buffer_pos.(i) <- buffer_len;
done
let length () =
!last_buffer * buffer_len + (buffer_len - buffer_pos.(!last_buffer))
let rec blit_rev src end_pos dst dpos len =
if len > 0 then begin
dst.[dpos] <- t.(Char.code src.[end_pos]);
blit_rev src (end_pos-1) dst (dpos+1) (len-1)
end
let blit_rev src spos dst dpos len =
let end_pos = spos + len - 1 in
blit_rev src end_pos dst dpos len
let rec add s pos len =
if len > 0 then
let b = buffers.(!last_buffer) in
let bpos = buffer_pos.(!last_buffer) in
if bpos > len then begin
let new_pos = bpos - len in
blit_rev s pos b new_pos len;
buffer_pos.(!last_buffer) <- new_pos
end else begin
blit_rev s pos b 0 bpos;
buffer_pos.(!last_buffer) <- 0;
incr last_buffer;
add s (pos + bpos) (len - bpos)
end
(*
let iter f =
let rec iter f i last_buffer =
if i < last_buffer then begin
f buffers.(i) 0 buffer_len;
iter f (i+1) last_buffer
end
else
let pos = buffer_pos.(last_buffer) in
f buffers.(last_buffer) pos (buffer_len - pos)
in
iter f 0 !last_buffer
let rev_iter f =
let rec iter f i =
if i >= 0 then begin
f buffers.(i) 0 buffer_len;
iter f (i-1)
end
in
let pos = buffer_pos.(!last_buffer) in
f buffers.(!last_buffer) pos (buffer_len - pos);
iter f (!last_buffer-1)
*)
let reverse_iter f =
let rec iter i =
if i >= 0 then begin
Fasta.print buffers.(i) 0 buffer_len;
iter (i-1)
end
in
let pos = buffer_pos.(!last_buffer) in
if pos < buffer_len then
Fasta.print buffers.(!last_buffer) pos (buffer_len - pos);
iter (!last_buffer-1)
end
let reverse () =
if BigRevBuffer.length () > 0 then begin
Fasta.clear ();
BigRevBuffer.reverse_iter ();
BigRevBuffer.clear ();
Fasta.flush ()
end
let main () =
try
LineReader.read Unix.stdin 1_000_000
(fun s pos len ->
if s.[pos] = '>' then begin
reverse ();
output stdout s pos len;
output_char stdout '\n';
end else
BigRevBuffer.add s pos len
)
with End_of_file -> reverse ()
| e ->
Printf.fprintf stderr "exception %s\n%!" (Printexc.to_string e);
exit 2
end
module IMPLEMENTATION64 : sig
val main : unit -> unit
end = struct
let buffer_len = 150_000_000
let buffer = if arch64 then String.create buffer_len else ""
let buffer_pos = ref buffer_len
let wait_for = ref None
let reverse () =
begin
match !wait_for with
None -> ()
| Some ix ->
let s = String.create 1 in
ignore (Unix.read ix s 0 1)
end;
let len = buffer_len - !buffer_pos in
if len > 0 then begin
Fasta.clear ();
Fasta.print buffer !buffer_pos len;
Fasta.flush ();
buffer_pos := buffer_len;
end
let maxlen = 10_000_000
let inbuf = String.create maxlen
let rec iter1 begin_pos () pos =
let to_read = maxlen - pos in
if to_read < 32_000 then begin
let len = pos - begin_pos in
String.blit inbuf begin_pos inbuf 0 len;
iter1 0 () len
end else
let nread = Unix.read Unix.stdin inbuf pos to_read in
if nread = 0 then raise End_of_file;
let end_pos = pos + nread in
iter2 begin_pos pos end_pos
and iter2 begin_pos pos end_pos =
if pos = end_pos then
iter1 begin_pos () end_pos
else
match inbuf.[pos] with
'\n' ->
iter2 (pos+1) (pos+1) end_pos
| '>' ->
iter4 begin_pos (pos+1) end_pos
| c ->
let c = t.(Char.code c) in
decr buffer_pos;
buffer.[!buffer_pos] <- c;
iter2 begin_pos (pos+1) end_pos
and iter3 begin_pos () pos =
let to_read = maxlen - pos in
if to_read < 32000 then begin
let len = pos - begin_pos in
String.blit inbuf begin_pos inbuf 0 len;
iter3 0 () len
end else
let nread = Unix.read Unix.stdin inbuf pos to_read in
if nread = 0 then raise End_of_file;
let end_pos = pos + nread in
iter4 begin_pos pos end_pos
and iter4 begin_pos pos end_pos =
if pos = end_pos then
iter3 begin_pos () end_pos
else
match inbuf.[pos] with
'\n' | '\r' ->
if pos > begin_pos then begin
if !buffer_pos < buffer_len then begin
let (ix, ox) = Unix.pipe () in
match Unix.fork () with
| -1 -> assert false
| 0 ->
reverse ();
output stdout inbuf begin_pos (pos - begin_pos);
output_char stdout '\n';
ignore (Unix.write ox "X" 0 1);
Unix.close ox;
exit 0;
| _ ->
wait_for := Some ix;
buffer_pos := buffer_len;
end else begin
output stdout inbuf begin_pos (pos - begin_pos);
output_char stdout '\n';
flush stdout;
end
end;
iter2 (pos+1) (pos+1) end_pos
| _ ->
iter4 begin_pos (pos+1) end_pos
let read () =
iter1 0 () 0
let main () =
enter "main64" [];
try
read ()
with End_of_file -> reverse ()
| e ->
Printf.fprintf stderr "exception %s\n%!" (Printexc.to_string e);
exit 2
end
let _ =
if arch64 then
IMPLEMENTATION64.main ()
else
IMPLEMENTATION32.main ()
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
The OCaml native-code compiler, version 4.10.0
Tue, 05 May 2020 23:16:38 GMT
MAKE:
mv revcomp.ocaml-4.ocaml revcomp.ocaml-4.ml
/opt/src/ocaml-4.10.0/bin/ocamlopt -noassert -unsafe -fPIC -nodynlink -inline 100 -O3 unix.cmxa -ccopt -march=core2 revcomp.ocaml-4.ml -o revcomp.ocaml-4.ocaml_run
File "revcomp.ocaml-4.ml", line 42, characters 12-25:
42 | let s = String.create maxlen in
^^^^^^^^^^^^^
Alert deprecated: Stdlib.String.create
Use Bytes.create instead.
File "revcomp.ocaml-4.ml", line 48, characters 17-18:
48 | String.blit s begin_pos s 0 len;
^
Error: This expression has type bytes but an expression was expected of type
string
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:320: revcomp.ocaml-4.ocaml_run] Error 2 (ignored)
rm revcomp.ocaml-4.ml
3.93s to complete and log all make actions
COMMAND LINE:
./revcomp.ocaml-4.ocaml_run 0 < revcomp-input250000.txt
MAKE ERROR