The Computer Language
24.12 Benchmarks Game

k-nucleotide Swift program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   contributed by Ralph Ganszky
   modified by Michael Morrell

   No match for key (for example "GGTATTTTAATT") causes --
   "fatal error: unexpectedly found nil while unwrapping an Optional value"

*/

import Glibc
import Dispatch

let ENABLED_THREAD_COUNT = 4
let ntasks = ENABLED_THREAD_COUNT

let mQueue = DispatchQueue(label: "mergeQueue")

func compress(_ seq: ArraySlice<UInt8>) -> Int {
    var res = 0
    for i in seq.indices {
        res = (res << 2) | Int(seq[i])
    }
    return res
}

func getSequenceHash(_ n: Int, seq: [UInt8]) -> [Int:Int] {
    var hash = [Int:Int]()
    let slice = (seq.count-(n-1)) / ntasks
    let remainder = (seq.count-(n-1)) % ntasks
    let mask = n > 1 ? ((1 << (2*(n-1))) - 1) : 0
    DispatchQueue.concurrentPerform(iterations: ntasks) { i in
        var lHash = [Int:Int](minimumCapacity: 1 << min(n, 12))
        var idx = compress(seq[i*slice..<i*slice+n])
        lHash[idx] = (lHash[idx] ?? 0) + 1
        let startIdx = i*slice+n
        let endIdx = startIdx + slice - 1
        for l in startIdx..<endIdx {
            idx = ((idx & mask) << 2) | Int(seq[l])
            lHash[idx] = (lHash[idx] ?? 0) + 1
        }
        mQueue.sync {
            for (key, value) in lHash {
                hash[key] = (hash[key] ?? 0) + value
            }
        }
    }
    let startIdx = seq.count - remainder - (n - 1)
    let endIdx = seq.count - (n - 1)
    for i in startIdx..<endIdx {
        let idx = compress(seq[i..<i+n])
        hash[idx] = (hash[idx] ?? 0) + 1
    }
    return hash
}

let c2i: [Character:Int] = [ "A": 0, "C": 1, "T": 2, "G": 3 ]

func encode(_ seq: String) -> Int {
    let cSeq = seq.characters
    var res = 0
    for c in cSeq {
        res = res << 2 | c2i[c]!
    }
    return res
}

func roundDouble(_ num: Double, precision: Int) -> String {
    let exponent = pow(10.0, Double(precision))
    let number = Double(Int(num * exponent + 0.5)) / exponent
    var numberStr = "\(number)"
    while numberStr.characters.count < Int(log10(num)) + 2 + precision {
        numberStr = numberStr + "0"
    }
    return numberStr
}

func readInput() -> [UInt8] {
    var seq = [UInt8]()
    let pattern = ">THREE Homo sapiens frequency"

    while let line = readLine() {
        if line == pattern {
            break
        }
    }

    while let line = readLine() {
        seq += Array(line.utf8)
    }

    return seq
}

// Read sequence
var sequence = readInput()

// rewrite bytes with 2bit code
for i in 0..<sequence.count {
    sequence[i] = (sequence[i] & 0x6) >> 1
}

let hash = getSequenceHash(1, seq: sequence)

let i2c = [ 0: "A", 1: "C", 2: "T", 3: "G" ]

let total = hash.reduce(0) { $0 + $1.1 }
for k in hash.keys.sorted(by: {hash[$1]! < hash[$0]!}) {
    print("\(i2c[k]!) \(roundDouble(100.0*Double(hash[k]!)/Double(total), precision: 3))")
}
print()

let hash2 = getSequenceHash(2, seq: sequence)

let total2 = hash2.reduce(0) { $0 + $1.1 }
for k in hash2.keys.sorted(by: {hash2[$1]! < hash2[$0]!}) {
    print("\(i2c[k>>2]!)\(i2c[k&3]!) \(roundDouble(100.0*Double(hash2[k]!)/Double(total2), precision: 3))")
}
print()

let hash3 = getSequenceHash(3, seq: sequence)
print("\(hash3[encode("GGT")]!)\tGGT")

let hash4 = getSequenceHash(4, seq: sequence)
print("\(hash4[encode("GGTA")]!)\tGGTA")

let hash6 = getSequenceHash(6, seq: sequence)
print("\(hash6[encode("GGTATT")]!)\tGGTATT")

let hash12 = getSequenceHash(12, seq: sequence)
print("\(hash12[encode("GGTATTTTAATT")]!)\tGGTATTTTAATT")

let hash18 = getSequenceHash(18, seq: sequence)
print("\(hash18[encode("GGTATTTTAATTTATAGT")]!)\tGGTATTTTAATTTATAGT")
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 6.0
(swift-6.0-RELEASE)
Target: x86_64-unknown-linux-gnu


 Wed, 18 Sep 2024 23:16:17 GMT

MAKE:
/opt/src/swift-6.0-RELEASE/usr/bin/swiftc knucleotide.swift -Ounchecked -wmo  -o knucleotide.swift_run
knucleotide.swift:61:20: error: 'characters' is unavailable: Please use String directly
 59 | 
 60 | func encode(_ seq: String) -> Int {
 61 |     let cSeq = seq.characters
    |                    `- error: 'characters' is unavailable: Please use String directly
 62 |     var res = 0
 63 |     for c in cSeq {

Swift.String:5:16: note: 'characters' was obsoleted in Swift 5.0
3 |     public typealias CharacterView = String
4 |     @available(swift, deprecated: 3.2, obsoleted: 5.0, message: "Please use String directly")
5 |     public var characters: String { get set }
  |                `- note: 'characters' was obsoleted in Swift 5.0
6 |     @available(swift, deprecated: 3.2, obsoleted: 5.0, message: "Please mutate the String directly")
7 |     public mutating func withMutableCharacters<R>(_ body: (inout String) -> R) -> R

knucleotide.swift:73:21: error: 'characters' is unavailable: Please use String directly
 71 |     let number = Double(Int(num * exponent + 0.5)) / exponent
 72 |     var numberStr = "\(number)"
 73 |     while numberStr.characters.count < Int(log10(num)) + 2 + precision {
    |                     `- error: 'characters' is unavailable: Please use String directly
 74 |         numberStr = numberStr + "0"
 75 |     }

Swift.String:5:16: note: 'characters' was obsoleted in Swift 5.0
3 |     public typealias CharacterView = String
4 |     @available(swift, deprecated: 3.2, obsoleted: 5.0, message: "Please use String directly")
5 |     public var characters: String { get set }
  |                `- note: 'characters' was obsoleted in Swift 5.0
6 |     @available(swift, deprecated: 3.2, obsoleted: 5.0, message: "Please mutate the String directly")
7 |     public mutating func withMutableCharacters<R>(_ body: (inout String) -> R) -> R
make: [/home/dunham/all-benchmarksgame/2000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:483: knucleotide.swift_run] Error 1 (ignored)

9.51s to complete and log all make actions

COMMAND LINE:
 ./knucleotide.swift_run 0 < knucleotide-input250000.txt

MAKE ERROR