source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
contributed by Ben McDonald
derived from the Chapel #3 version by Ben Harshbarger and Brad Chamberlain
*/
use IO, Map, Sort;
config param columns = 61;
proc main(args: [] string) {
// Open stdin and a binary reader channel
const consoleIn = openfd(0),
fileLen = consoleIn.size,
stdinNoLock = consoleIn.reader(kind=ionative, locking=false);
// Read line-by-line until we see a line beginning with '>TH'
var buff: [1..columns] uint(8),
lineSize = 0,
numRead = 0;
while stdinNoLock.readline(buff, lineSize) && !startsWithThree(buff) do
numRead += lineSize;
// Read in the rest of the file
var dataDom = {1..fileLen-numRead},
data: [dataDom] uint(8),
idx = 1;
while stdinNoLock.readline(data, lineSize, idx) do
idx += lineSize - 1;
// Resize our array to the amount actually read
dataDom = {1..idx};
// Make everything uppercase
forall d in data do
d -= ("a".toByte() - "A".toByte());
writeFreqs(data, 1);
writeFreqs(data, 2);
writeCount(data, "GGT");
writeCount(data, "GGTA");
writeCount(data, "GGTATT");
writeCount(data, "GGTATTTTAATT");
writeCount(data, "GGTATTTTAATTTATAGT");
}
proc writeFreqs(data, param nclSize) {
const freqs = calculate(data, nclSize);
// create an array of (frequency, sequence) tuples
var arr = for (s,f) in freqs.items() do (f,s.val);
// print the array, sorted by decreasing frequency
for (f, s) in arr.sorted(reverseComparator) do
writef("%s %.3dr\n", decode(s, nclSize),
(100.0 * f) / (data.size - nclSize));
writeln();
}
proc writeCount(data, param str) {
const strBytes = str.bytes(),
freqs = calculate(data, str.numBytes),
d = hash(strBytes, strBytes.domain.low, str.numBytes);
writeln(freqs[d], "\t", decode(d.val, str.numBytes));
}
proc calculate(data, param nclSize) {
var freqs = new map(hashVal, int);
var lock: sync bool = true;
const numTasks = here.maxTaskPar;
coforall tid in 1..numTasks with (ref freqs) {
var myFreqs = new map(hashVal, int);
for i in tid..(data.size-nclSize) by numTasks do
myFreqs[hash(data, i, nclSize)] += 1;
lock.readFE(); // acquire lock
for (k,v) in myFreqs.items() do
freqs[k] += v;
lock.writeEF(true); // release lock
}
return freqs;
}
const toChar: [0..3] string = ["A", "C", "T", "G"];
var toNum: [0..127] int;
forall i in toChar.domain do
toNum[toChar[i].toByte()] = i;
inline proc decode(in data, param nclSize) {
var ret: string;
for i in 1..nclSize {
ret = toChar[(data & 3)] + ret;
data >>= 2;
}
return ret;
}
inline proc hash(str, beg, param size) {
var data = 0;
for i in 0..size-1 {
data <<= 2;
data |= toNum[str[beg+i]];
}
return new hashVal(data);
}
inline proc startsWithThree(data) {
return data[1] == ">".toByte() &&
data[2] == "T".toByte() &&
data[3] == "H".toByte();
}
record hashVal {
var val: int;
proc hash() {
return val;
}
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
chpl version 1.29.0
built with LLVM version 14.0.0
Copyright 2020-2022
Hewlett Packard
Enterprise Development LP
Copyright 2004-2019 Cray Inc.
Wed, 25 Jan 2023 21:42:03 GMT
MAKE:
mv knucleotide.chapel-4.chapel knucleotide.chapel-4.chpl
/opt/src/chapel-1.29.0/bin/chpl --fast knucleotide.chapel-4.chpl -o knucleotide.chapel-4.chapel_run
knucleotide.chapel-4.chpl:13: In function 'main':
knucleotide.chapel-4.chpl:24: warning: channel.readline is deprecated. Use channel.readLine instead
knucleotide.chapel-4.chpl:24: warning: channel.readline is deprecated. Use channel.readLine instead
knucleotide.chapel-4.chpl:32: warning: channel.readline is deprecated. Use channel.readLine instead
knucleotide.chapel-4.chpl:32: warning: channel.readline is deprecated. Use channel.readLine instead
knucleotide.chapel-4.chpl:52: In function 'writeFreqs':
knucleotide.chapel-4.chpl:59: warning: 'Array.sorted' is deprecated - use Sort.sort instead
knucleotide.chapel-4.chpl:42: called as writeFreqs(data: [domain(1,int(64),false)] uint(8), param nclSize = 1) from function 'main'
knucleotide.chapel-4.chpl:52: In function 'writeFreqs':
knucleotide.chapel-4.chpl:59: warning: 'Array.sorted' is deprecated - use Sort.sort instead
knucleotide.chapel-4.chpl:43: called as writeFreqs(data: [domain(1,int(64),false)] uint(8), param nclSize = 2) from function 'main'
rm knucleotide.chapel-4.chpl
26.16s to complete and log all make actions
COMMAND LINE:
./knucleotide.chapel-4.chapel_run --n=0 < knucleotide-input25000000.txt
PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754
AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902
1471758 GGT
446535 GGTA
47336 GGTATT
893 GGTATTTTAATT
893 GGTATTTTAATTTATAGT