The Q6600
Benchmarks Game

k-nucleotide Java OpenJ9 #3 program

source code

/* The Computer Language Benchmarks Game
 https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

 contributed by James McIlree
 ByteString code thanks to Matthieu Bentot and The Anh Tran
 modified by Andy Fingerhut 
 */

import java.util.*;
import java.io.*;
import java.util.concurrent.*;

public class knucleotide {
    static ArrayList<Callable< Map<ByteString, ByteString> > > createFragmentTasks(final byte[] sequence, int[] fragmentLengths) {
	ArrayList<Callable<Map<ByteString, ByteString>>> tasks = new ArrayList<Callable<Map<ByteString, ByteString>>>();
	for (int fragmentLength : fragmentLengths) {
	    for (int index=0; index<fragmentLength; index++) {
		final int offset = index;
		final int finalFragmentLength = fragmentLength;
		tasks.add(new Callable<Map<ByteString, ByteString>>() {
		    public Map<ByteString, ByteString> call() {
			return createFragmentMap(sequence, offset, finalFragmentLength);
		    }
		});
	    }
	}
	return tasks;
    }

    static Map<ByteString, ByteString> createFragmentMap(byte[] sequence, int offset, int fragmentLength) {
	HashMap<ByteString, ByteString> map = new HashMap<ByteString, ByteString>();
	int lastIndex = sequence.length - fragmentLength + 1;
	ByteString key = new ByteString(fragmentLength);
	for (int index=offset; index<lastIndex; index+=fragmentLength) {
	    key.calculateHash(sequence, index);
	    ByteString fragment = map.get(key);
	    if (fragment != null) {
		fragment.count++;
	    } else {
		map.put(key, key);
		key = new ByteString(fragmentLength);
	    }
	}

	return map;
    }

    // Destructive!
    static Map<ByteString, ByteString> sumTwoMaps(Map<ByteString, ByteString> map1, Map<ByteString, ByteString> map2) {
	for (Map.Entry<ByteString, ByteString> entry : map2.entrySet()) {
	    ByteString sum = map1.get(entry.getKey());
	    if (sum != null)
		sum.count += entry.getValue().count;
	    else
		map1.put(entry.getKey(), entry.getValue());
	}
	return map1;
    }

    static String writeFrequencies(float totalCount, Map<ByteString, ByteString> frequencies) {
	SortedSet<ByteString> list = new TreeSet<ByteString>(frequencies.values());
	StringBuilder sb = new StringBuilder();
	for (ByteString k : list)
	    sb.append(String.format("%s %.3f\n", k.toString().toUpperCase(), (float)(k.count) * 100.0f / totalCount));

	return sb.append('\n').toString();
    }

    static String writeCount(List<Future<Map<ByteString, ByteString>>> futures, String nucleotideFragment) throws Exception {
	ByteString key = new ByteString(nucleotideFragment.length());
	key.calculateHash(nucleotideFragment.getBytes(), 0);

	int count = 0;
	for (Future<Map<ByteString, ByteString>> future : futures) {
	    ByteString temp = future.get().get(key);
	    if (temp != null) count += temp.count;
	}

	return count + "\t" + nucleotideFragment.toUpperCase() + '\n';
    }

    public static void main (String[] args) throws Exception {
	String line;
	BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
	while ((line = in.readLine()) != null) {
	    if (line.startsWith(">THREE")) break;
	}

	ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte bytes[] = new byte[100];
        while((line = in.readLine()) != null) {
	    if (line.length() > bytes.length)
		bytes = new byte[line.length()];

	    int i;
	    for(i=0; i<line.length(); i++)
		bytes[i] = (byte)line.charAt(i);
	    baos.write(bytes, 0, i);
        }

	byte[] sequence = baos.toByteArray();

	ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
	int[] fragmentLengths = { 1, 2, 3, 4, 6, 12, 18 };
	List<Future<Map<ByteString, ByteString>>> futures = pool.invokeAll(createFragmentTasks(sequence, fragmentLengths));
	pool.shutdown();

	StringBuilder sb = new StringBuilder();

	sb.append(writeFrequencies(sequence.length, futures.get(0).get()));
	sb.append(writeFrequencies(sequence.length - 1, sumTwoMaps(futures.get(1).get(), futures.get(2).get())));

	String[] nucleotideFragments = { "ggt", "ggta", "ggtatt", "ggtattttaatt", "ggtattttaatttatagt" };
	for (String nucleotideFragment : nucleotideFragments) {
	    sb.append(writeCount(futures, nucleotideFragment));
	}

	System.out.print(sb.toString());
    }

    static final class ByteString implements Comparable<ByteString> {
        public int hash, count=1;
        public final byte bytes[];

        public ByteString(int size) {
            bytes = new byte[size];
        }

        public void calculateHash(byte k[], int offset) {
	    int temp = 0;
            for (int i=0; i<bytes.length; i++) {
		byte b = k[offset+i];
                bytes[i] = b;
                temp = temp * 31 + b;
            }
	    hash = temp;
        }

        public int hashCode() {
            return hash;
        }

        public boolean equals(Object obj) {
	    return Arrays.equals(bytes, ((ByteString)obj).bytes);
        }

        public int compareTo(ByteString other) {
	    if (other.count != count) {
		return other.count - count;
	    } else {
		// Without this case, if there are two or more strings
		// with exactly the same count in a Map, then the
		// TreeSet constructor called in writeFrequencies will
		// only add the first one, and the rest will not
		// appear in the output.  Also this is required to
		// satisfy the rules of the k-nucleotide problem.
		return toString().compareTo(other.toString());
	    }
        }

	public String toString() {
	    return new String(bytes);
	}
    }
}
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
openjdk 14 2020-03-17
OpenJDK Runtime Environment AdoptOpenJDK (build 14+36)
Eclipse OpenJ9 VM AdoptOpenJDK
build openj9-0.19.0 (JIT enabled, AOT enabled)



Tue, 05 May 2020 01:42:07 GMT

MAKE:
mv knucleotide.openj9-3.openj9 knucleotide.java
/opt/src/openj9-openjdk-14/bin/javac -d . -cp .:/opt/src/java-libs/fastutil-8.1.0.jar knucleotide.java

5.14s to complete and log all make actions

COMMAND LINE:
/opt/src/openj9-openjdk-14/bin/java -Xshareclasses  -cp .:/opt/src/java-libs/fastutil-8.1.0.jar knucleotide 0 < knucleotide-input25000000.txt

PROGRAM OUTPUT:
A 30.295
T 30.151
C 19.800
G 19.754

AA 9.177
TA 9.132
AT 9.131
TT 9.091
CA 6.002
AC 6.001
AG 5.987
GA 5.984
CT 5.971
TC 5.971
GT 5.957
TG 5.956
CC 3.917
GC 3.911
CG 3.909
GG 3.902

1471758	GGT
446535	GGTA
47336	GGTATT
893	GGTATTTTAATT
893	GGTATTTTAATTTATAGT