The Computer Language
24.12 Benchmarks Game

k-nucleotide Go #8 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 
   Naive transliteration from bearophile's program 
   contributed by Isaac Gouy
*/

package main

import (
   "bufio"
   "fmt" 
   "os"
   "sort"
   "strings"
)

func seqLines() []string {
   in := bufio.NewScanner(os.Stdin)
   for in.Scan() {
      line := in.Text() 	 
      if strings.HasPrefix(line,">THREE") { break }     
   } 
   lines := []string{}
   for in.Scan() {
      line := in.Text() 	 
      if strings.HasPrefix(line,">") { break }    
      lines = append(lines,line)
   }   
   return lines
}

func baseCounts(bases int, seq string) map[string]int {
   counts := make(map[string]int)
   size := len(seq) + 1 - bases
   for i := 0; i < size; i++ {   
      nucleo := seq[i : i + bases]
      v, ok := counts[nucleo]
      if ok {
          counts[nucleo] = v+1      
      } else {
          counts[nucleo] = 1
      }
   }      
   return counts   
}

   type data struct {
      Key   string
      Count int
      Percent float64      
   }

func sortedFreq(bases int, seq string) []data {
   counts := []data{}
   for k, v := range baseCounts(bases, seq) {
      counts = append(counts, data{k,v,0.0})
   } 
   
   sort.Slice(counts, func(i,j int) bool {
         return counts[i].Count > counts[j].Count
   })
   
   size := len(seq) + 1 - bases   
   freqs := []data{}
   for _,e := range counts {    
      freqs = append(freqs, data{
            e.Key, 
            e.Count, 
            100.0 * float64(e.Count) / float64(size) })   
   }
   return freqs
}

func specificCount(code string, seq string) int {  
   return baseCounts(len(code),seq)[code]
}

func main() {
   lines := seqLines()
   var upper []string
   for _,s := range lines {   
      upper = append(upper,strings.ToUpper(s))   
   }
   seq := strings.Join(upper, "")

   for _, base := range []int{1,2} {
      for _,e := range sortedFreq(base,seq) {    
         fmt.Printf("%s %.3f\n", e.Key, e.Percent)
      }
      fmt.Printf("\n")      
   }     	
   	
   for _, code := range []string{"GGT", "GGTA", "GGTATT",
            "GGTATTTTAATT", "GGTATTTTAATTTATAGT"} {   
      fmt.Printf("%d\t%s\n", specificCount(code,seq), code)            
   }
}

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
go version go1.23.1 linux/amd64
GOAMD64=v2


 Wed, 11 Dec 2024 23:20:09 GMT

MAKE:
/opt/src/go1.23.1/go/bin/go build -o knucleotide.go-8.go_run knucleotide.go-8.go

0.35s to complete and log all make actions

COMMAND LINE:
 ./knucleotide.go-8.go_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT