The Computer Language
23.03 Benchmarks Game

k-nucleotide Matz's Ruby #7 program

source code

# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
#
# contributed by Aaron Tavistock
# modified by Isaac Gouy for byteslice not supported in Ruby 1.8.7 

def find_frequencies(keys)
  @frequencies = {}
  threads = []

  key_lengths = keys.map(&:size).uniq
  key_lengths.each do |key_length|
    threads << Thread.new do
      results_hash = key_frequency(key_length, @seq)
      @frequencies.merge!(results_hash)
    end
  end
  threads.each(&:join)
  @frequencies
end

def forking_key_frequency(key_length, seq)
  reader, writer = IO.pipe

  pid = Process.fork do
    begin
      reader.close
      results = original_key_frequency(key_length, seq)
      Marshal.dump(results, writer)
    ensure
      writer.close
    end
  end

  writer.close
  begin
    results = Marshal.load(reader)
  ensure
    reader.close
  end
  Process.waitpid(pid)

  results
end

def key_frequency(key_length, seq)
  count = Hash.new(0)
  start_index = 0
  last_length = seq.size - key_length
  while start_index < last_length
    key = seq.unpack("@#{start_index}a#{key_length}").first  # 1.8.7
    count[key] += 1
    start_index += 1
  end
  count
end

def frequency(keys)
  keys.map do |key|
    [key, @frequencies[key]]
  end
end

def percentage(keys)
  frequency(keys).sort { |a,b| b[1] <=> a[1] }.map do |key, value|
    "%s %.3f" % [ key.upcase, ( (value*100).to_f / @seq.size) ]
  end
end

def count(keys)
  frequency(keys).sort_by { |a| a[0].size }.map do |key, value|
    "#{value.to_s}\t#{key.upcase}"
  end
end

def load_sequence(marker)
  input = STDIN.read
  start_idx = input.index(marker) + marker.size
  @seq = input[start_idx, input.size - 1]
  @seq.delete!("\n ")
  @seq.freeze
  @seq
end

if (RUBY_PLATFORM != 'java')
  class << self
    alias_method :original_key_frequency, :key_frequency
    alias_method :key_frequency, :forking_key_frequency
  end
end

singles = %w(a t c g)
doubles = %w(aa at ac ag ta tt tc tg ca ct cc cg ga gt gc gg)
chains  = %w(ggt ggta ggtatt ggtattttaatt ggtattttaatttatagt)

load_sequence('>THREE Homo sapiens frequency')
find_frequencies(singles + doubles + chains)

print "#{percentage(singles).join("\n")}\n\n"
print "#{percentage(doubles).join("\n")}\n\n"
print "#{count(chains).join("\n")}\n"
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
ruby 1.8.7 
(2014-01-28
patchlevel 376)
[x86_64-linux]



Sat, 11 Feb 2023 01:10:27 GMT

COMMAND LINE:
/usr/share/rvm/rubies/ruby-1.8.7-head/bin/ruby  knucleotide.mri-7.mri 0 < knucleotide-input250000.txt

PROGRAM FAILED 


PROGRAM OUTPUT:

knucleotide.mri-7.mri:37:in `load': Marshal.load reentered at getc (RuntimeError)
	from knucleotide.mri-7.mri:101:in `join'
	from knucleotide.mri-7.mri:101:in `to_proc'
	from knucleotide.mri-7.mri:18:in `each'
	from knucleotide.mri-7.mri:18:in `find_frequencies'
	from knucleotide.mri-7.mri:97