The Q6600
Benchmarks Game

reverse-complement OCaml #3 program

source code

(* The Computer Language Benchmarks Game
 * https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 *
 * Contributed by Paolo Ribeca
 *)

let chars_per_line = 60
and lines_per_worker =
  match Sys.word_size with
  | 32 -> 200000
  | 64 -> 500000
  | _ -> assert false

let rc_table = String.make 256 '\000'
let _ =
  rc_table.[Char.code 'A'] <- 'T'; rc_table.[Char.code 'T'] <- 'A';
  rc_table.[Char.code 'w'] <- 'W'; rc_table.[Char.code 's'] <- 'S';
  rc_table.[Char.code 'a'] <- 'T'; rc_table.[Char.code 't'] <- 'A';
  rc_table.[Char.code 'C'] <- 'G'; rc_table.[Char.code 'G'] <- 'C';
  rc_table.[Char.code 'c'] <- 'G'; rc_table.[Char.code 'g'] <- 'C';
  rc_table.[Char.code 'U'] <- 'A'; rc_table.[Char.code 'u'] <- 'A';
  rc_table.[Char.code 'M'] <- 'K'; rc_table.[Char.code 'K'] <- 'M';
  rc_table.[Char.code 'm'] <- 'K'; rc_table.[Char.code 'k'] <- 'M';
  rc_table.[Char.code 'R'] <- 'Y'; rc_table.[Char.code 'Y'] <- 'R';
  rc_table.[Char.code 'r'] <- 'Y'; rc_table.[Char.code 'y'] <- 'R';
  rc_table.[Char.code 'W'] <- 'W'; rc_table.[Char.code 'S'] <- 'S';
  rc_table.[Char.code 'w'] <- 'W'; rc_table.[Char.code 's'] <- 'S';
  rc_table.[Char.code 'V'] <- 'B'; rc_table.[Char.code 'B'] <- 'V';
  rc_table.[Char.code 'v'] <- 'B'; rc_table.[Char.code 'b'] <- 'V';
  rc_table.[Char.code 'H'] <- 'D'; rc_table.[Char.code 'D'] <- 'H';
  rc_table.[Char.code 'h'] <- 'D'; rc_table.[Char.code 'd'] <- 'H';
  rc_table.[Char.code 'N'] <- 'N'; rc_table.[Char.code 'n'] <- 'N'

let _ =
  let aug_chars_per_line = chars_per_line + 1
  and in_ack, out_ack = Unix.pipe () and in_end, out_end = Unix.pipe ()
  and put out_pipe () =
    if Unix.write out_pipe " " 0 1 <> 1 then
      failwith "Pipe problem"
  and get in_pipe () =
    let res = " " in
    if Unix.read in_pipe res 0 1 <> 1 then
      failwith "Pipe problem" in
  let put_ack = put out_ack and get_ack = get in_ack
  and put_end_ack = put out_end and get_end_ack = get in_end in
  let rec spawn tag beg first =
    let output_tag () =
      print_string tag;
      print_char '\n';
      flush stdout
    and buf = String.create (lines_per_worker * chars_per_line + 2)
    and len = ref (String.length beg) in
    String.blit beg 0 buf 0 !len;
    let process_buffer () =
      let red_len = !len - 1 in
      let mid_point = red_len / 2 in
      for i = 0 to mid_point do
	let ri = red_len - i and tmp = buf.[i] in
	buf.[i] <- rc_table.[Char.code buf.[ri]];
	buf.[ri] <- rc_table.[Char.code tmp]
      done
    and write_by_cols rem eol =
      let len = !len and dne = ref 0 in
      if rem > 0 then begin
	let to_do = min rem (len - !dne) in
	output stdout buf !dne to_do;
	output_char stdout '\n';
	dne := !dne + to_do
      end;
      while len - !dne >= chars_per_line do
	output stdout buf !dne chars_per_line;
	output_char stdout '\n';
	dne := !dne + chars_per_line
      done;
      let rem = len - !dne in
      if rem > 0 then begin
	output stdout buf !dne rem;
	if eol then
	  output_char stdout '\n'
      end;
      flush stdout;
      if eol then
	0
      else
	rem in
    try
      for i = 2 to lines_per_worker do
	really_input stdin buf !len aug_chars_per_line;
	let new_len = ref (!len + chars_per_line) in
	if buf.[!len] = '>' || buf.[!new_len] <> '\n' then begin
	  while buf.[!len] <> '>' do
	    incr len
	  done;
	  let ptr = ref !len in
	  (* Needed to patch the hideous bug in the output of the C program *)
	  if buf.[!len - 1] <> '\n' then begin
	    String.blit buf !len buf (!len + 1) aug_chars_per_line;
	    buf.[!len] <- '\n';
	    incr new_len;
	    incr ptr
	  end else
	    decr len;
	  while !ptr < !new_len && buf.[!ptr] <> '\n' do
	    incr ptr
	  done;
	  match Unix.fork () with
	  | 0 ->
	      let aug_len = !len + 1 in
	      if !ptr = !new_len then
		spawn
		  (String.sub buf
		    aug_len (!new_len - aug_len) ^ input_line stdin)
		  "" true
	      else
		let aug_ptr = !ptr + 1 in
		spawn
		  (String.sub buf aug_len (!ptr - aug_len))
		  (String.sub buf aug_ptr (!new_len - !ptr) ^ input_line stdin)
		  true
	  | _ ->
	      get_ack ();
	      output_tag ();
	      process_buffer ();
	      let rem = write_by_cols 0 first in
	      if first then
		put_ack ();
	      exit rem
	end;
	len := !new_len
      done;
      match Unix.fork () with
      | 0 -> spawn tag "" false
      | pid ->
	  process_buffer ();
	  match Unix.waitpid [] pid with
	  | _, Unix.WEXITED rem ->
	      let rem = write_by_cols (chars_per_line - rem) first in
	      if first then
		put_ack ();
	      exit rem
	  | _ -> assert false
    with End_of_file ->
      while buf.[!len] <> '\n' do
	incr len
      done;
      get_ack ();
      put_end_ack ();
      output_tag ();
      process_buffer ();
      let rem = write_by_cols 0 first in
      if first then
	put_ack ();
      exit rem in
  match Unix.fork () with
  | 0 ->
      put_ack ();
      spawn (read_line ()) "" true
  | _ ->
      get_end_ack ();
      get_ack ();
      exit 0
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
The OCaml native-code compiler, version 4.10.0


Tue, 05 May 2020 23:06:39 GMT

MAKE:
mv revcomp.ocaml-3.ocaml revcomp.ocaml-3.ml
/opt/src/ocaml-4.10.0/bin/ocamlopt -noassert -unsafe -fPIC -nodynlink -inline 100 -O3 unix.cmxa -ccopt -march=core2 revcomp.ocaml-3.ml -o revcomp.ocaml-3.ocaml_run
File "revcomp.ocaml-3.ml", line 16, characters 2-33:
16 |   rc_table.[Char.code 'A'] <- 'T'; rc_table.[Char.code 'T'] <- 'A';
       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Alert deprecated: Stdlib.String.unsafe_set
File "revcomp.ocaml-3.ml", line 16, characters 2-10:
16 |   rc_table.[Char.code 'A'] <- 'T'; rc_table.[Char.code 'T'] <- 'A';
       ^^^^^^^^
Error: This expression has type string but an expression was expected of type
         bytes
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:320: revcomp.ocaml-3.ocaml_run] Error 2 (ignored)
rm revcomp.ocaml-3.ml

1.00s to complete and log all make actions

COMMAND LINE:
./revcomp.ocaml-3.ocaml_run 0 < revcomp-input250000.txt

MAKE ERROR