The Computer Language
25.03 Benchmarks Game

k-nucleotide Chapel #4 program

source code

/* The Computer Language Benchmarks Game

   contributed by Ben McDonald
   derived from the Chapel #3 version by Ben Harshbarger and Brad Chamberlain

use IO, Map, Sort;

config param columns = 61;

proc main(args: [] string) {
  // Create a non-locking version of 'stdin' and query its size
  const consoleIn = new file(0),
        fileLen = consoleIn.size,
        stdin = consoleIn.reader(locking=false);

  // Read line-by-line until we see a line beginning with '>TH'
  var buff: [1..columns] uint(8),
      lineSize = 0,
      numRead = 0;

  do {
    lineSize = stdin.readLine(buff);
    numRead += lineSize;
  } while lineSize > 0 && !startsWithThree(buff);

  // Read in the rest of the file
  var dataDom = {1..fileLen-numRead},
      data: [dataDom] uint(8),
      idx = 1;

  do {
    lineSize = stdin.readLine(data[idx..], stripNewline=true);
    idx += lineSize;
  } while lineSize > 0;

  // Resize our array to the amount actually read
  dataDom = {1..idx};

  // Make everything uppercase
  forall d in data do
    d -= ("a".toByte() - "A".toByte());

  writeFreqs(data, 1);
  writeFreqs(data, 2);
  writeCount(data, "GGT");
  writeCount(data, "GGTA");
  writeCount(data, "GGTATT");
  writeCount(data, "GGTATTTTAATT");
  writeCount(data, "GGTATTTTAATTTATAGT");

proc writeFreqs(data, param nclSize) {
  const freqs = calculate(data, nclSize);

  // create an array of (frequency, sequence) tuples
  var arr = for (s,f) in zip(freqs.keys(), freqs.values()) do (f,s.val);

  // print the array, sorted by decreasing frequency
  sort(arr, new reverseComparator());
  for (f, s) in arr do
   writef("%s %.3dr\n", decode(s, nclSize),
           (100.0 * f) / (data.size - nclSize));

proc writeCount(data, param str) {
  const strBytes = str.bytes(),
        freqs = calculate(data, str.numBytes),
        d = hash(strBytes, strBytes.domain.low, str.numBytes);

  writeln(freqs.get(d, 0), "\t", decode(d.val, str.numBytes));

proc calculate(data, param nclSize) {
  var freqs = new map(hashVal, int);

  var lock: sync bool = true;
  const numTasks = here.maxTaskPar;
  coforall tid in 1..numTasks with (ref freqs) {
    var myFreqs = new map(hashVal, int);

    for i in tid..(data.size - nclSize) by numTasks do
      myFreqs[hash(data, i, nclSize)] += 1;

    lock.readFE();      // acquire lock
    for (k,v) in zip(myFreqs.keys(), myFreqs.values()) do
      freqs[k] += v;
    lock.writeEF(true); // release lock

  return freqs;

const toChar: [0..3] string = ["A", "C", "T", "G"];
var toNum: [0..127] int;

forall i in toChar.domain do
  toNum[toChar[i].toByte()] = i;

inline proc decode(in data, param nclSize) {
  var ret: string;

  for i in 1..nclSize {
    ret = toChar[(data & 3)] + ret;
    data >>= 2;

  return ret;

inline proc hash(str, beg, param size) {
  var data = 0;

  for i in 0..size-1 {
    data <<= 2;
    data |= toNum[str[beg+i]];

  return new hashVal(data);

inline proc startsWithThree(data) {
  return data[1] == ">".toByte() &&
         data[2] == "T".toByte() &&
         data[3] == "H".toByte();

record hashVal: hashable {
  var val: int;
  proc hash() {
    return val: uint;


notes, command-line, and program output

64-bit Ubuntu quad core
chpl version  2.3.0
built with LLVM version 19.1.1
Copyright 2020-2024
Hewlett Packard
Enterprise Development LP
Copyright 2004-2019 Cray Inc.

 Sun, 23 Feb 2025 17:54:09 GMT

mv knucleotide.chapel-4.chapel knucleotide.chapel-4.chpl
/opt/src/chapel-2.3.0/bin/linux64-x86_64/chpl --fast knucleotide.chapel-4.chpl -o knucleotide.chapel-4.chapel_run
rm knucleotide.chapel-4.chpl

28.96 seconds to complete and log all make actions

 ./knucleotide.chapel-4.chapel_run --n=0 < knucleotide-input25000000.txt

A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT