The Computer Language
24.11 Benchmarks Game

k-nucleotide Rust program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by Renat Galimov


use std::sync::Arc;
use std::thread;
use std::collections::HashMap;
use std::vec::Vec;
use std::string::String;


fn count_nucleotides(input: &Vec<u8>, length: usize) -> HashMap<Vec<u8>, usize> {
    let mut nucleotides = HashMap::<Vec<u8>, usize>::new();

    if input.len() < length {
        return nucleotides;
    }

    let mut position = 0;
    let end = input.len() - length;

    while position <= end {
        *nucleotides.entry(input[position .. position + length].to_vec()).or_insert(0) += 1;
        position += 1;
    }

    nucleotides
}


fn read_input() -> Vec<u8> {
    let stdin = std::io::stdin();

    let mut input = String::new();

    while stdin.read_line(&mut input).unwrap() > 0 {
        if input.contains("THREE") {
            input.clear();
            break;
        }
        input.clear();
    }

    let mut data = Vec::<u8>::new();

    loop {
        let mut input = String::new();
        let bytes_read = stdin.read_line(&mut input).unwrap();

        if bytes_read == 0 {
            break
        }

        let mut input_bytes = input.to_uppercase().into_bytes();

        loop {
            match input_bytes.last() {
                Some(&b'\r') | Some(&b'\n') => { input_bytes.pop(); },
                _ => break
            }
        }

        data.append(&mut input_bytes);
    }

    data
}


fn print_percents(mut items: HashMap<Vec<u8>, usize>) {
    let mut sorted_items = Vec::<(Vec<u8>, usize)>::new();

    let mut total_count = 0;

    for (key, value) in items.drain() {
        total_count += value;
        sorted_items.push((key, value));
    }

    sorted_items.sort_by_key(|item| item.1);

    for item in sorted_items.iter().rev() {
        let count = (item.1 * 100) as f64 / total_count as f64;
        println!("{} {:.3}", std::str::from_utf8(&item.0).unwrap(), count);
    }
}


fn print_count(pattern: &str, items: HashMap<Vec<u8>, usize>) {
    let count = match items.get(pattern.as_bytes()) {
        Some(count) => *count,
        _ => 0
    };

    println!("{}\t{}", count, pattern);
}


fn main() {

    let data = Arc::new(read_input());

    let mut threads = HashMap::<usize, thread::JoinHandle<HashMap<Vec<u8>, usize>>>::new();

    for k in vec![1, 2, 3, 4, 6, 12, 18] {
        let data_ptr = data.clone();

        let handle = thread::spawn(move || {
            count_nucleotides(&data_ptr, k)
        });

        threads.insert(k, handle);
    }

    let mut results = HashMap::<usize, HashMap<Vec<u8>, usize>>::new();

    for (key, thread) in threads.drain() {
        thread.join().map(|value| {
            results.insert(key, value)
        });
    }

    print_percents(results.remove(&1).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    println!("");
    print_percents(results.remove(&2).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    println!("");

    print_count("GGT", results.remove(&3).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    print_count("GGTA", results.remove(&4).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    print_count("GGTATT", results.remove(&6).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    print_count("GGTATTTTAATT", results.remove(&12).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
    print_count("GGTATTTTAATTTATAGT", results.remove(&18).unwrap_or_else(|| HashMap::<Vec<u8>, usize>::new()));
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
1.80.1
(3f5fd8dd4
2024-08-06)
LLVM version: 18.1.7


 Thu, 05 Sep 2024 22:39:15 GMT

MAKE:
/opt/src/rust-1.80.1/bin/rustc -C opt-level=3 -C target-cpu=ivybridge -C codegen-units=1 -L /opt/src/rust-libs --extern futures=/opt/src/rust-libs/libfutures-42762cd113366010.rlib --extern indexmap=/opt/src/rust-libs/libindexmap-d12549edf9fc074e.rlib knucleotide.rs -o knucleotide.rust_run
warning: unused `Result` that must be used
   --> knucleotide.rs:120:9
    |
120 | /         thread.join().map(|value| {
121 | |             results.insert(key, value)
122 | |         });
    | |__________^
    |
    = note: this `Result` may be an `Err` variant, which should be handled
    = note: `#[warn(unused_must_use)]` on by default
help: use `let _ = ...` to ignore the resulting value
    |
120 |         let _ = thread.join().map(|value| {
    |         +++++++

warning: 1 warning emitted

rm knucleotide.rs

9.43s to complete and log all make actions

COMMAND LINE:
 ./knucleotide.rust_run 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT