The Computer Language
24.04 Benchmarks Game

k-nucleotide Swift #2 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   contributed by Ralph Ganszky
*/

import Foundation
import Dispatch

let i2c = [ 0: "A", 1: "C", 2: "T", 3: "G" ]

let c2i: [Character:Int] = [ "A": 0, "C": 1, "T": 2, "G": 3 ]

let ntasks = 8
let linewidth = 60

let mQueue = DispatchQueue(label: "mergeQueue", attributes: [])
let queue = DispatchQueue(label: "hashQueue", attributes: DispatchQueue.Attributes.concurrent)

func compress(_ n: Int, seq: ArraySlice<Int8>) -> Int {
    var res = 0
    for i in seq.indices {
        res = (res << 2) | Int(seq[i])
    }
    return res
}

func decode(_ n: Int, _ len: Int) -> String {
    var val = n
    var s = ""
    for _ in 0..<len {
        s = i2c[val & 0x3]! + s
        val >>= 2
    }
    return s
}

func encode(_ seq: String) ->  Int {
    let cSeq = seq.utf8.map{(Int8($0) & 0x6) >> 1}
    var res = 0
    for c in cSeq {
        res = res << 2 | Int(c)
    }
    return res
}

func getSequenceHash(_ n: Int, seq: [Int8]) -> [Int:Int] {
    var hash = [Int:Int]()
    let slice = (seq.count-(n-1)) / ntasks
    let remainder = (seq.count-(n-1)) % ntasks
    let mask = ~(-1 << (2*n))
    DispatchQueue.concurrentPerform(iterations: ntasks) { i in
        var lHash = [Int:Int]()
        var idx = compress(n, seq: seq[i*slice..<(i*slice+n)])
        lHash[idx] = 1
        for l in i*slice+1..<(i+1)*slice {
            idx = ((idx << 2) & mask) | Int(seq[l+n-1])
            if let value = lHash[idx] {
                lHash[idx] = value + 1
            } else {
                lHash[idx] = 1
            }
        }
        mQueue.sync {
            for (key, count) in lHash {
                if let value = hash[key] {
                    hash[key] = value + count
                } else {
                    hash[key] = count
                }
            }
        }
    }
    let startIdx = seq.count - remainder - (n - 1)
    let endIdx = seq.count - (n - 1)
    for i in startIdx..<endIdx {
        let idx = compress(n, seq: seq[i..<(i+n)])
        if let value = hash[idx] {
            hash[idx] = value + 1
        } else {
            hash[idx] = 1
        }
    }
    return hash
}

func readInputAndRewrite() -> [Int8] {
    // Read until sequence THREE
    let buf = UnsafeMutablePointer<Int8>.allocate(capacity: 100)
    defer {
        buf.deallocate()
    }
    let three = ">THREE".utf8.map{Int8($0)}
    while let res = fgets(buf, 100, stdin) {
        if res[0] == three[0] && res[1] == three[1] && res[2] == three[2] {
            break
        }
    }
    // Read sequence
    var seq = [Int8]()
    while let res = fgets(buf, 100, stdin) {
        seq.append(contentsOf: UnsafeMutableBufferPointer(start: res, count: linewidth))
    }
    // Locate newline from at the end
    var nl = seq.count - 1
    var pos = nl
    while pos > seq.count - linewidth {
        if seq[pos] == 10 { // "\n"
            nl = pos
            break
        }
        pos -= 1
    }
    // Convert to bits
    var bitSeq = [Int8](repeating: 0, count: nl)
    for i in 0..<nl {
        bitSeq[i] = (seq[i] & 0x6) >> 1
    }
    return bitSeq
}

// Read sequence
var sequence = readInputAndRewrite()

let hash = getSequenceHash(1, seq: sequence)
let total = hash.reduce(0) { $0 + $1.1 }
for k in hash.keys.sorted(by: { hash[$1]! < hash[$0]! }) {
    print("\(i2c[k]!) \(String(format: "%.3f", 100.0*Double(hash[k]!)/Double(total)))")
}
print()

let hash2 = getSequenceHash(2, seq: sequence)
let total2 = hash2.reduce(0) { $0 + $1.1 }
for k in hash2.keys.sorted(by: { hash2[$1]! < hash2[$0]! }) {
    print("\(decode(k, 2)) \(String(format: "%.3f", 100.0*Double(hash2[k]!)/Double(total2)))")
}
print()

let hash3 = getSequenceHash(3, seq: sequence)
print("\(hash3[encode("GGT")] ?? 0)\tGGT")

let hash4 = getSequenceHash(4, seq: sequence)
print("\(hash4[encode("GGTA")] ?? 0)\tGGTA")

let hash6 = getSequenceHash(6, seq: sequence)
print("\(hash6[encode("GGTATT")] ?? 0)\tGGTATT")

let hash12 = getSequenceHash(12, seq: sequence)
print("\(hash12[encode("GGTATTTTAATT")] ?? 0)\tGGTATTTTAATT")

let hash18 = getSequenceHash(18, seq: sequence)
print("\(hash18[encode("GGTATTTTAATTTATAGT")] ?? 0)\tGGTATTTTAATTTATAGT")
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 5.10
(swift-5.10-RELEASE)


 Fri, 08 Mar 2024 01:17:22 GMT

MAKE:
/opt/src/swift-5.10-RELEASE/usr/bin/swiftc knucleotide.swift-2.swift -Ounchecked  -o knucleotide.swift-2.swift_run

16.13s to complete and log all make actions

COMMAND LINE:
 ./knucleotide.swift-2.swift_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT