The Computer Language
24.12 Benchmarks Game

k-nucleotide Swift #7 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

   Naive transliteration from bearophile's program
   contributed by Isaac Gouy    
   (indices usage by Kedar Sukerkar SO)
*/

import Foundation

func seq_lines() -> [String] {
   while let line = readLine() {   
      if (line.hasPrefix(">THREE")) { break }     
   }  
   var lines: [String] = []
   while let line = readLine() {   
      if (line.hasPrefix(">")) { break }   
      lines.append(line)
   } 
   return lines
}

func base_counts(_ bases: Int, _ seq: String) -> [String: Int] {
   var counts: [String: Int] = [:]   
   for i in seq.indices.dropLast(bases - 1) { 
      let nucleo = String(seq[i..<seq.index(i, offsetBy: bases)])      
      if let v = counts[nucleo] {   
         counts[nucleo] = v + 1  
      } else {
         counts[nucleo] = 1 
      }            
   }       
   return counts   
}

func sorted_freq(_ bases: Int, _ seq: String) -> [(String, Double)] {
   let kv_ = base_counts(bases, seq)
   let size = Double(seq.count + 1 - bases)  
   let sorted_ = kv_.sorted{ $1.1 < $0.1 }
   return sorted_.map{ ($0.0, 100.0 * Double($0.1) / size) }
}

func specific_count(_ code: String, _ seq: String) -> Int {  
    return base_counts(code.count, seq)[code] ?? 0 
}    

func main() {
   let lines = seq_lines()
   let seq = lines.map{$0.uppercased()}.joined()
  
   for base in [1,2] {
      for (k,v) in sorted_freq(base, seq) {   
         print( k, String(format: "%.3f", v))          
      }
      print()      
   }
   
   for code in ["GGT", "GGTA", "GGTATT",
         "GGTATTTTAATT", "GGTATTTTAATTTATAGT"] {    
      print("\(specific_count(code, seq))\t\(code)")         
   }
}

main()

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 6.0
(swift-6.0-RELEASE)
Target: x86_64-unknown-linux-gnu


 Mon, 28 Oct 2024 16:26:57 GMT

MAKE:
/opt/src/swift-6.0-RELEASE/usr/bin/swiftc knucleotide.swift-7.swift -Ounchecked -wmo  -o knucleotide.swift-7.swift_run

2.57s to complete and log all make actions

COMMAND LINE:
 ./knucleotide.swift-7.swift_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT