The Computer Language
24.11 Benchmarks Game

reverse-complement Python 3 #5 program

source code

# The Computer Language Benchmarks Game
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
#
# Contributed by Jeremy Zerfas.

from multiprocessing import Queue, Semaphore, Condition, Value, Process
from sys import stdin, stdout
# See comment farther down below regarding why it might be a good idea to call
# set_inheritable() with a file descriptor for stdin.
from os import cpu_count, read, write #, set_inheritable


def process_Sequences(info_For_Remaining_Sequences_Data_To_Process
  , stdin_File_Descriptor, CPU_Cores_Available_For_Processing_Sequences
  , sequence_Was_Written_Condition, next_Sequence_Number_To_Output):

	# This string/character array is used to convert characters into the
	# complementing character. Note that some of the reverse complementing code
	# also requires newlines to remain unchanged when complemented which is why
	# the eleventh character is set to a newline.
	COMPLEMENT_LOOKUP=(
	  b"          \n                                                     "
	  #  ABCDEFGHIJKLMNOPQRSTUVWXYZ      abcdefghijklmnopqrstuvwxyz
	  b" TVGH  CD  M KN   YSAABW R       TVGH  CD  M KN   YSAABW R      "
	  b"                                                                "
	  b"                                                                ")

	# This controls the size of reads from the input.
	READ_SIZE=65536

	# This defines how many characters (not including the newline) a full line
	# of input should have.
	LINE_LENGTH=60


	# Keep processing sequences until we exit this loop when we get None for
	# remaining_Sequences_Data_To_Process.	
	while True:

		# Get the remaining_Sequences_Data_To_Process and the sequence_Number
		# for the first sequence in it (assuming there still is any
		# remaining_Sequences_Data_To_Process).
		remaining_Sequences_Data_To_Process, sequence_Number= \
		  info_For_Remaining_Sequences_Data_To_Process.get()


		# Break out of the loop if there is no
		# remaining_Sequences_Data_To_Process. Also put a None, None tuple back
		# into info_For_Remaining_Sequences_Data_To_Process so that other
		# processes can also determine that there is no more
		# remaining_Sequences_Data_To_Process and release the CPU core this
		# process was using for processing sequences.
		if remaining_Sequences_Data_To_Process is None:
			info_For_Remaining_Sequences_Data_To_Process.put((None, None))
			CPU_Cores_Available_For_Processing_Sequences.release()
			break


		# The next loop will be adding data for the current sequence and will
		# stop adding data for the current sequence once it encounters a ">"
		# that indicates the start of a new sequence or it reaches the end of
		# input. If remaining_Sequences_Data_To_Process isn't an empty string,
		# then the first character will either be a non-">" character for data
		# that showed up before the first sequence header or a ">" for the start
		# of the current sequence. We need to remove the first character if it's
		# a ">" so that the next loop can function properly and if it's a
		# non-">" character we'll just be ignoring it later anyway (any data
		# before the initial sequence header is considered part of sequence zero
		# which the later code knows to just ignore). So regardless of what
		# character it is we just end up removing it from
		# remaining_Sequences_Data_To_Process and adding it to sequence.
		sequence=bytearray()
		sequence+=remaining_Sequences_Data_To_Process[0:1]
		remaining_Sequences_Data_To_Process= \
		  remaining_Sequences_Data_To_Process[1:]

		# Keep adding data from remaining_Sequences_Data_To_Process and then
		# from stdin to sequence until we encounter a ">" indicating the start
		# of a new sequence or we encounter the end of input.
		while True:

			# If we encounter a ">" then add everything before it to the current
			# sequence and add the ">" and everything following it to
			# remaining_Sequences_Data_To_Process which will then be added to
			# info_For_Remaining_Sequences_Data_To_Process later.
			if b">" in remaining_Sequences_Data_To_Process:
				preceding_Bytes, _, following_Bytes= \
				  remaining_Sequences_Data_To_Process.partition(b">")
				sequence+=preceding_Bytes
				remaining_Sequences_Data_To_Process=b">"+following_Bytes
				break

			sequence+=remaining_Sequences_Data_To_Process

			remaining_Sequences_Data_To_Process=read(stdin_File_Descriptor
			  , READ_SIZE)

			# Exit the loop if there is no more
			# remaining_Sequences_Data_To_Process. The following code will then
			# process any non-zero sequences and also add a None, None tuple to
			# info_For_Remaining_Sequences_Data_To_Process so that processes can
			# determine that the end of input has been reached.
			if not remaining_Sequences_Data_To_Process:
				break


		# If there is still any remaining_Sequences_Data_To_Process, add it to
		# info_For_Remaining_Sequences_Data_To_Process for one of the processes
		# to handle later. Otherwise add a None, None tuple to
		# info_For_Remaining_Sequences_Data_To_Process so that processes can
		# determine that the end of input has been reached.
		if remaining_Sequences_Data_To_Process:
			info_For_Remaining_Sequences_Data_To_Process.put( \
			  (remaining_Sequences_Data_To_Process, sequence_Number+1))

			# If the current sequence is anything but sequence zero, then also
			# try acquiring a CPU core for processing sequences and start
			# another process to process_Sequences while we start reverse
			# complementing and outputting the current sequence in just a moment.
			if sequence_Number>0 \
			  and CPU_Cores_Available_For_Processing_Sequences.acquire(False):
				Process(target=process_Sequences, args=(
				  info_For_Remaining_Sequences_Data_To_Process
				  , stdin_File_Descriptor
				  , CPU_Cores_Available_For_Processing_Sequences
				  , sequence_Was_Written_Condition
				  , next_Sequence_Number_To_Output)).start()
		else:
			info_For_Remaining_Sequences_Data_To_Process.put((None, None))


		# If the current sequence is anything but sequence zero, then now we
		# start reverse complementing and outputting that sequence.
		if sequence_Number>0:

			# Get the sequence_Header and save everything after the new line in
			# the header to temporary_Sequence_Data.
			sequence_Header, _, temporary_Sequence_Data= \
			  sequence.partition(b"\n")
			del sequence


			# Check to see if all the lines in the sequence data are an optimal
			# length (LINE_LENGTH characters plus a newline). If all the lines
			# are an optimal length, then this makes it much easier to reverse
			# the sequence since we won't need to deal with having to move
			# newlines around to their correct positions during/after the
			# reversal step. This is even more important for something like
			# CPython since this will allow us to use a library function (that
			# is typically implemented in a faster programming language) to do
			# the entire reversal step instead of having to use slower Python
			# code to do at least part of the the reversal step.
			if len(temporary_Sequence_Data)%(LINE_LENGTH+1)==0:

				modified_Sequence_Data= \
				  temporary_Sequence_Data.translate(COMPLEMENT_LOOKUP)
				modified_Sequence_Data.reverse()
				# The newline that was originally at the end of the
				# temporary_Sequence_Data will now be at the start of the
				# modified_Sequence_Data which is good since the newline that
				# originally was at the start of the sequence data got
				# removed while partitioning the sequence above. Now just append
				# another newline to the end of the modified_Sequence_Data since
				# the newline that was originally at the start of the sequence
				# data got removed and hence never was moved to the end of the
				# modified_Sequence_Data when it was reversed.
				modified_Sequence_Data+=b"\n"

			else:

				# The lines in the sequence data are not an optimal length so in
				# this case we can't just have library functions do all of the
				# work and we'll instead need to use a little more code to help
				# out with moving newlines into their correct postions. We start
				# out by removing all newlines (that are in incorrect positions)
				# and then after reversing the stripped sequence data we will
				# then create the correct modified_Sequence_Data by continuously
				# appending data from the stripped sequence data with newlines
				# at the appropriate positions.
				temporary_Sequence_Data= \
				  temporary_Sequence_Data.translate(COMPLEMENT_LOOKUP, b"\n")
				temporary_Sequence_Data.reverse()
				# In addition to adding newlines after each line of the
				# modified_Sequence_Data, we also need to put one more at the
				# start.
				modified_Sequence_Data=bytearray(b"\n")
				for i in range(0, len(temporary_Sequence_Data), LINE_LENGTH):
					modified_Sequence_Data+= \
					  temporary_Sequence_Data[i:i+LINE_LENGTH]
					modified_Sequence_Data+=b"\n"

			del temporary_Sequence_Data


			# Wait for our turn to output the sequence_Header and
			# modified_Sequence_Data and then update
			# next_Sequence_Number_To_Output and notify any other processes that
			# may be waiting to output following sequences.
			with sequence_Was_Written_Condition:
				while next_Sequence_Number_To_Output.value<sequence_Number:
					sequence_Was_Written_Condition.wait()

				write(stdout.fileno(), sequence_Header)
				write(stdout.fileno(), modified_Sequence_Data)

				next_Sequence_Number_To_Output.value+=1
				sequence_Was_Written_Condition.notify_all()

			del modified_Sequence_Data


if __name__=="__main__":

	# We need somewhere to keep a tuple with
	# info_For_Remaining_Sequences_Data_To_Process. The first item in the tuple
	# will keep the start of any remaining_Sequences_Data_To_Process that has
	# already been read and which contains the start of the next sequence (and
	# possibly following sequences too) to be processed. The second item in the
	# tuple is the sequence number for the sequence. We initially put an empty
	# string in it (since no data has been initially read) and set the sequence
	# number to zero. Sequence zero is considered to be anything that shows up
	# before the initial sequence header in the input and no further processing
	# will be done on it.
	#
	# Additionally the tuples in info_For_Remaining_Sequences_Data_To_Process
	# need to contain variable sized strings, it has to be shared with multiple
	# processes, and it needs to allow for code to efficiently wait for
	# something to be added to it when it's empty so a Queue appears to be the
	# best option in Python. We don't use this as much of a queue though, it
	# only ever has at most one item in it at any given time.
	info_For_Remaining_Sequences_Data_To_Process=Queue()
	info_For_Remaining_Sequences_Data_To_Process.put((b"", 0))

	# When running on a multicore system and reading input with multiple
	# sequences, this program can create multiple processes to process the
	# sequences. Passing around lots of sequence data efficiently between
	# processes could be annoying and doing so in Python is probably even more
	# annoying so instead we just have each Python process take turns reading in
	# sequence data directly from the stdin stream. Unfortunately for some
	# reason CPython reattaches the sys.stdin (and sys.__stdin__) file object to
	# the null device (but seemingly inconsistently leaves sys.stdout alone) but
	# it does leave the underlying file descriptor open. This seems a bit hacky
	# but we will use os.read() to read from the underlying file descriptor
	# instead so we record the file descriptor for stdin here. The standard file
	# streams do get inherited by processes by default currently with CPython
	# but that could maybe change in the future so it might be a good idea to
	# uncomment the second line of code below to explicitly set stdin to be
	# inheritable.
	stdin_File_Descriptor=stdin.fileno()
	#set_inheritable(stdin_File_Descriptor, True)

	# When running this program on a multicore system with input that has
	# multiple sequences, this program can start up additional processes to
	# process the sequences faster. It isn't strictly necessary but we will be
	# limiting the amount of processes used to the number of CPU cores available
	# on the system. Additionally by keeping track of how many CPU cores are
	# being used, it's possible to determine when all of the input has been
	# processed which helps out with some other problems (see last comment in
	# this code block).
	#
	# Initialize CPU_Cores_Available_For_Processing_Sequences with one less than
	# the number of CPU cores available on the system (we use one less than the
	# number of CPU cores since this main process will also be used for
	# processing sequences in just a moment). Note that using cpu_count() will
	# return the number of CPU cores online on the system but not necessarily
	# available to the program (if something like taskset is used) plus
	# cpu_count() can sometimes return incorrect numbers too. Using something
	# like len(os.sched_getaffinity(0)) on systems where it is available would
	# be more accurate but is less portable.
	CPU_Cores_Available_For_Processing_Sequences=Semaphore((cpu_count() or 1)-1)

	# These are used to let processes wait for their turns to output sequences
	# and to output them in the correct order.
	sequence_Was_Written_Condition=Condition()
	next_Sequence_Number_To_Output=Value("L", 1)


	# Start processing all the sequences.
	process_Sequences(info_For_Remaining_Sequences_Data_To_Process
	  , stdin_File_Descriptor, CPU_Cores_Available_For_Processing_Sequences
	  , sequence_Was_Written_Condition, next_Sequence_Number_To_Output)


	# It's possible for the main process to try exiting before other descendent
	# processes do. It isn't strictly a problem for the main process to exit
	# before all its descendent processes do since the descendent processes
	# inherit the standard streams of the main process and will keep them open
	# until all the sequences are processed. However it's also possible for the
	# main process to try exiting before other processes have even finished
	# starting up and that seems to sometimes cause problems with CPython (at
	# least at the moment when using the "spawn" process start method on Linux).
	# Additionally having the main process exit before all the sequences are
	# processed could potentially mess up other programs that may be monitoring
	# the program. To avoid these and potentially other issues, we wait for all
	# the processes to exit (which will be indicated by them releasing CPU cores
	# allowing this main process to acquire them all).
	for i in range(cpu_count() or 1):
		CPU_Cores_Available_For_Processing_Sequences.acquire()
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Python 3.13.0


 Sat, 12 Oct 2024 05:01:39 GMT

MAKE:
mv revcomp.python3-5.python3 revcomp.py
pyright .
0 errors, 0 warnings, 0 informations 

4.93s to complete and log all make actions

COMMAND LINE:
 /opt/src/Python-3.13.0/bin/python3 -OO revcomp.py 0 < revcomp-input100000001.txt

(TRUNCATED) PROGRAM OUTPUT:
>ONE Homo sapiens alu
CTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTC
GCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTC
AAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGC
CCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCT
CGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGG
CGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAG
TGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCT
GCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTT
GTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACC
TCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGC
GCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGA
TCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCC
GAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAG
AGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGC
CCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTT
GAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTG
CAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGAT
TACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCA
CCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTC
CCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTC
GCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTC
CCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGC
CACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAG
GCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGG
ATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCA
GGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCG
ATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGC
TAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAAC
TCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGA
GCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAG
TGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTC
AGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATT
TTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGG
TGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCG
GCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCG
GCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTA
GCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACG
GGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCC
TCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGAC
GGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACC
TCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAG
GCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATG
TTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAA
GTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCT
GTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGG
TTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCA
CGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGG
TCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTAC
AGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTG
GAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCT
CCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATT
TTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTG
ACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCAC
CGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCG
CGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCT
CCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAG
TAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATC
CGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTT
TTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCA
CTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGG
GATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTT
TCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGC
CTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGT
CTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGC
CTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCG
CGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGC
CAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCT
GGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGC
CCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAA
GCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCC
GGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCG
AACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCG
TGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTG
CAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGC
CTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGT
ATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTC
AGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGC
CCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATC
TCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGA
GTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAG
ACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCC
GCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGA
GACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCA
ACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTA
CAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACC
ATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCC
AAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGC
TCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCC
GGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCA
CCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGC
TGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGAT
TACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGG
CTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGAT
TCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTA
ATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTC
CTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGC
CACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTG
GCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAG
CCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTT
TAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTG
ATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGC
CTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGC
TCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGC
TGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGG
GTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTC
GGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGG
AGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTC
CGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGC
GCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTT
GGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGT
GCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGT
CGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTT
CAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACG
CCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTC
TCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAG
GCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGA
GTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCC
TGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTT
TGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGAC
CTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCG
CGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCG
ATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCC
CGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTA
GAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCG
CCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTT
TGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACT
GCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGA
TTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTC
ACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCT
CCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCT
CGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCT
CCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCG
CCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCA
GGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGG
GATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCC
AGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGC
GATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGG
CTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAA
CTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTG
AGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCA
GTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCT
CAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTAT
TTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAG
GTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCC
GGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTC
GGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGT
AGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGAC
GGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGC
CTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGA
CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC
CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA
GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT
GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA
AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC
TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG
GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC
ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG
GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA
CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT
GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC
TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT
TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT
GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA
CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC
GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC
TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA
GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT
CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGA