k-nucleotide OCaml #2 program
source code
(* The Computer Language Benchmarks Game
* https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
*
* contributed by Troestler Christophe
* modified by Mauricio Fernandez
*)
module C(S : sig
val k : int
val dna : string
end) =
struct
let dna, k = S.dna, S.k
module K = struct
type t = int
let equal k1 k2 =
let rec cmp n ka kb =
if n = 0 then true
else if dna.[ka] = dna.[kb] then cmp (n - 1) (ka + 1) (kb + 1)
else false
in cmp k k1 k2
let hash n =
let h = ref 0 in
for i = n to n + k - 1 do h := !h * 5 + Char.code dna.[i] done;
!h
end
let c = 0x40000
include Hashtbl.Make(K)
let h = create c
let count () =
for i = 0 to String.length dna - k - 1 do
try incr (find h i) with Not_found -> add h i (ref 1)
done
let compare_freq ((k1:string),(f1:float)) (k2, f2) =
if f1 > f2 then -1 else if f1 < f2 then 1 else String.compare k1 k2
let write_frequencies () =
count ();
let tot = float(fold (fun _ n t -> !n + t) h 0) in
let frq =
fold (fun off n l ->
(String.sub dna off k, 100. *. float !n /. tot) :: l) h [] in
let frq = List.sort compare_freq frq in
String.concat ""
(List.map (fun (k,f) -> Printf.sprintf "%s %.3f\n" k f) frq)
let write_count seq =
assert (String.length seq = k);
count ();
String.blit seq 0 dna 0 k;
Printf.sprintf "%d\t%s" (try !(find h 0) with Not_found -> 0) seq
end
(* Extract DNA sequence "THREE" from stdin *)
let dna_three =
let is_not_three s = try String.sub s 0 6 <> ">THREE" with _ -> true in
while is_not_three(input_line stdin) do () done;
let buf = Buffer.create 1000 in
(* Skip possible comment *)
(try while true do
let line = input_line stdin in
if line.[0] <> ';' then
(Buffer.add_string buf (String.uppercase line); raise Exit)
done with _ -> ());
(* Read the DNA sequence *)
(try while true do
let line = input_line stdin in
if line.[0] = '>' then raise End_of_file;
Buffer.add_string buf (String.uppercase line)
done with End_of_file -> ());
Buffer.contents buf
let invoke (f : 'a -> 'b) x : unit -> 'b =
let input, output = Unix.pipe() in
match Unix.fork() with
| -1 -> Unix.close input; Unix.close output; (let v = f x in fun () -> v)
| 0 ->
Unix.close input;
let output = Unix.out_channel_of_descr output in
Marshal.to_channel output (try `Res(f x) with e -> `Exn e) [];
close_out output;
exit 0
| pid ->
Unix.close output;
let input = Unix.in_channel_of_descr input in fun () ->
let v = Marshal.from_channel input in
ignore (Unix.waitpid [] pid);
close_in input;
match v with `Res x -> x | `Exn e -> raise e
let parallelize f l =
List.iter (fun g -> print_endline (g ())) (List.map (invoke f) l)
let () =
parallelize
(fun i ->
let module M = C(struct let k = i let dna = dna_three end) in
M.write_frequencies ()) [1; 2];
parallelize
(fun k ->
let module M = C(struct let k = String.length k let dna = dna_three end) in
M.write_count k)
["GGT"; "GGTA"; "GGTATT"; "GGTATTTTAATT"; "GGTATTTTAATTTATAGT"]
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
The OCaml native-code compiler, version 4.10.0
Tue, 05 May 2020 22:35:21 GMT
MAKE:
mv knucleotide.ocaml-2.ocaml knucleotide.ocaml-2.ml
/opt/src/ocaml-4.10.0/bin/ocamlopt -noassert -unsafe -fPIC -nodynlink -inline 100 -O3 unix.cmxa -ccopt -march=core2 knucleotide.ocaml-2.ml -o knucleotide.ocaml-2.ocaml_run
File "knucleotide.ocaml-2.ml", line 55, characters 22-25:
55 | String.blit seq 0 dna 0 k;
^^^
Error: This expression has type string but an expression was expected of type
bytes
make: [/home/dunham/8000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:320: knucleotide.ocaml-2.ocaml_run] Error 2 (ignored)
rm knucleotide.ocaml-2.ml
0.99s to complete and log all make actions
COMMAND LINE:
./knucleotide.ocaml-2.ocaml_run 0 < knucleotide-input250000.txt
MAKE ERROR