source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
Naive transliteration from bearophile's program
contributed by Isaac Gouy
*/
using System.Collections.Generic;
class Knucleotide {
static List<String> SeqLines()
{
string line;
while ((line = Console.ReadLine()) != null) {
if (line.StartsWith(">THREE")) break;
}
var lines = new List<string>();
while ((line = Console.ReadLine()) != null) {
if (line.StartsWith(">")) break;
lines.Add(line);
}
return lines;
}
static Dictionary<string,int> BaseCounts(int bases, string seq)
{
var counts = new Dictionary<string,int>();
int size = seq.Count() + 1 - bases;
for (int i = 0; i < size; i++) {
var nucleo = seq.Substring(i,bases);
int v;
if (counts.TryGetValue(nucleo, out v)) {
counts[nucleo] = v+1;
} else {
counts.Add(nucleo, 1);
}
}
return counts;
}
static List<KeyValuePair<string,double>> SortedFreq(int bases, string seq)
{
int size = seq.Count() + 1 - bases;
var counts = BaseCounts(bases, seq).ToList();
counts.Sort((a, b) => b.Value.CompareTo(a.Value));
var freqs = new List<KeyValuePair<string,double>>();
foreach (var kvp in counts) {
freqs.Add( new KeyValuePair<string,double>(
kvp.Key, 100.0 * kvp.Value / size) );
}
return freqs;
}
static int SpecificCount(string code, string seq)
{
int v; BaseCounts(code.Count(),seq).TryGetValue(code, out v);
return v;
}
public static void Main(String[] args)
{
var seq = string.Join( "",
SeqLines()
.Select(s => s.ToUpper()) );
foreach (int i in new []{1,2}) {
foreach (KeyValuePair<string,double> kvp in SortedFreq(i,seq)) {
Console.WriteLine("{0} {1:f3}", kvp.Key, kvp.Value);
}
Console.WriteLine("");
}
foreach (string code in new []{"GGT", "GGTA", "GGTATT",
"GGTATTTTAATT", "GGTATTTTAATTTATAGT"}) {
Console.WriteLine("{0}\t{1}", SpecificCount(code,seq), code);
}
}
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
.NET SDK 9.0.100
Host Version: 9.0.0
Commit: 9d5a6a9aa4
<OutputType>Exe
<TargetFramework>net9.0
<ImplicitUsings>enable
<Nullable>enable
<AllowUnsafeBlocks>true
<ServerGarbageCollection>true
<ConcurrentGarbageCollection>true
<PublishAot>true
<OptimizationPreference>Speed
<IlcInstructionSet>native
Thu, 14 Nov 2024 23:00:16 GMT
MAKE:
cp knucleotide.csharpaot-8.csharpaot Program.cs
cp Include/csharpaot/program.csproj .
mkdir obj
cp Include/csharpaot/project.assets.json ./obj
/opt/src/dotnet-sdk-9.0.100/dotnet publish
Determining projects to restore...
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/program.csproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
Restored /home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/program.csproj (in 6.21 sec).
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/program.csproj : warning NU1900: Error occurred while getting package vulnerability data: Unable to load the service index for source https://api.nuget.org/v3/index.json.
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/Program.cs(17,22): warning CS8600: Converting null literal or possible null value to non-nullable type. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/program.csproj]
/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/Program.cs(21,22): warning CS8600: Converting null literal or possible null value to non-nullable type. [/home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/program.csproj]
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/bin/Release/net9.0/linux-x64/program.dll
Generating native code
program -> /home/dunham/all-benchmarksgame/benchmarksgame_i53330/knucleotide/tmp/bin/Release/net9.0/linux-x64/publish/
32.98s to complete and log all make actions
COMMAND LINE:
./bin/Release/net9.0/linux-x64/native/program 0 < knucleotide-input25000000.txt
PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754
AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902
1471758 GGT
446535 GGTA
47336 GGTATT
893 GGTATTTTAATT
893 GGTATTTTAATTTATAGT