The Computer Language
Benchmarks Game

k-nucleotide C# .NET Core #2 program

source code

/* The Computer Language Benchmarks Game
   https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
 *
 * contributed by Isaac Gouy
 * modified by Antti Lankila for generics
 */

using System;
using System.IO;
using System.Collections.Generic;
using System.Text;

public class program {
    public static void Main(string[] args) {
	string line;
	StreamReader source = new StreamReader(Console.OpenStandardInput());
	StringBuilder input = new StringBuilder();

	while ( (line = source.ReadLine() ) != null ) {
	    if (line[0] == '>' && line.Substring(1, 5) == "THREE")
		break;
	}
	 
	while ( (line = source.ReadLine()) != null ) {
            char c = line[0];
            if (c == '>')
               break;
            if (c != ';')
               input.Append(line.ToUpper());
	}

	KNucleotide kn = new KNucleotide(input.ToString());
        input = null;
	kn.WriteFrequencies(1);
	kn.WriteFrequencies(2);

	kn.WriteCount("GGT");
	kn.WriteCount("GGTA");
	kn.WriteCount("GGTATT");
	kn.WriteCount("GGTATTTTAATT");
	kn.WriteCount("GGTATTTTAATTTATAGT");
    }
}

public class KNucleotide {
    /* freq[foo] ++ implies a get and a set. */
    internal class Value {
	internal int v;

	internal Value(int v)
	{
	    this.v = v;
	}
    }

    private Dictionary<string, Value> frequencies = new Dictionary<string, Value>();
    private string sequence;

    public KNucleotide(string s)
    {
	sequence = s;
    }

    public void WriteFrequencies(int nucleotideLength) {
	GenerateFrequencies(nucleotideLength);

	List<KeyValuePair<string, Value>> items = new List<KeyValuePair<string, Value>>(frequencies);
	items.Sort(SortByFrequencyAndCode);

	int sum = sequence.Length - nucleotideLength + 1;
	foreach (KeyValuePair<string, Value> each in items) {
	    double percent = each.Value.v * 100.0 / sum;
	    Console.WriteLine("{0} {1:f3}", each.Key, percent);
	}
	Console.WriteLine("");
    }

    public void WriteCount(string nucleotideFragment) {
	GenerateFrequencies(nucleotideFragment.Length);

	int count = 0;
	if (frequencies.ContainsKey(nucleotideFragment))
	    count = frequencies[nucleotideFragment].v;
	Console.WriteLine("{0}\t{1}", count, nucleotideFragment);
    }

    private void GenerateFrequencies(int length) {
	frequencies.Clear();
	for (int frame = 0; frame < length; frame++)
	    KFrequency(frame, length);
    }

    private void KFrequency(int readingFrame, int k) {
	int n = sequence.Length - k + 1;
	/* string.Substring is a malloc monster :( */
	if (k > 6) {
	    for (int i = readingFrame; i < n; i += k) {
		string knucleo = sequence.Substring(i, k);
		if (frequencies.ContainsKey(knucleo))
		    frequencies[knucleo].v ++;
		else
		    frequencies[knucleo] = new Value(1);
	    }
	} else {
	    for (int i = readingFrame; i < n; i += k) {
		string knucleo = sequence.Substring(i, k);
		try {
		    frequencies[knucleo].v ++;
		}
		catch (KeyNotFoundException) {
		    frequencies[knucleo] = new Value(1);
		}
	    }
	}
    }

    int SortByFrequencyAndCode(KeyValuePair<string, Value> item1, KeyValuePair<string, Value> item2) {
	int comparison = item2.Value.v.CompareTo(item1.Value.v);
	if (comparison == 0) return item1.Key.CompareTo(item2.Key);
	else return comparison;
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
.NET Core SDK   3.0.100
Host Version: 3.0.0; Commit: 95a0a61858
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true


Mon, 23 Sep 2019 22:06:31 GMT

MAKE:
cp knucleotide.csharpcore-2.csharpcore Program.cs
cp Include/csharpcore/tmp.csproj .
/usr/bin/dotnet build -c Release
Microsoft (R) Build Engine version 16.3.0+0f4c62fea for .NET Core
Copyright (C) Microsoft Corporation. All rights reserved.

  Restore completed in 140.21 ms for /home/dunham/benchmarksgame_quadcore/knucleotide/tmp/tmp.csproj.
  tmp -> /home/dunham/benchmarksgame_quadcore/knucleotide/tmp/bin/Release/netcoreapp3.0/tmp.dll

Build succeeded.
    0 Warning(s)
    0 Error(s)

Time Elapsed 00:00:04.32

6.95s to complete and log all make actions

COMMAND LINE:
/usr/bin/dotnet ./bin/Release/netcoreapp3.0/tmp.dll 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT