# The Computer Language Benchmarks Game
# Contributed by Jeremy Zerfas
# WARNING: I normally do my programming in other languages. This may not be an
# optimal Python program. Please contribute a better program if you can make a
# better program.
from ctypes import c_char, c_char_p, c_int, c_size_t, c_uint32, \
byref, CDLL, create_string_buffer, POINTER
from ctypes.util import find_library
from multiprocessing import Pipe, Process
from multiprocessing.connection import wait
from multiprocessing.sharedctypes import RawArray
from os import cpu_count
from sys import stdin
# We'll be using PCRE2 for our regular expression needs instead of using
# Python's built in regular expression engine because it is significantly
# By default, Python assumes that C functions will return int's but that is not
# correct for the following functions which return pointers instead so we tell
# it the correct types here.
# Function for searching a src_String for a pattern, replacing it with some
# specified replacement, and storing the result in dst_String which is also
# returned along with the updated dst_String_Length.
def replace(pattern, replacement, src_String, src_String_Length,
# Compile the pattern and also enable JIT compilation to make matching
regex=PCRE2.pcre2_compile_8(pattern, c_size_t(len(pattern)), c_uint32(0),
byref(c_int()), byref(c_size_t()), None)
# Convert dst_String_Length from an int to c_size_t so that the
# pcre2_substitute_8() function will be able to access it properly.
# Have PCRE2 do most of the replacement work.
PCRE2.pcre2_substitute_8(regex, src_String, c_size_t(src_String_Length),
c_size_t(0), c_uint32(0x100), None, None,
return dst_String, dst_String_Length.value
# This function is used as the main function for the worker subprocesses. The
# worker subprocesses communicate with the manager process via worker_Pipe. The
# worker subprocesses receive tasks from the manager process, complete the
# appropriate task, and then send back a result to the manager process (except
# when the manager process tells the worker subprocess to exit). Tasks are
# performed on the sequences string which is shared between all the processes
# using shared memory.
def process_Task(worker_Pipe, sequences, sequences_Length):
# When a worker subprocess first starts up, it sends None to the manager
# process as an indicator that the worker subprocess is ready to process
# Start an infinte loop to process received tasks accordingly until a
# request to exit is received.
# Wait for a request.
# If there is no task to do, then just exit the loop and let this
# subprocess exit.
if task is None:
# If the task contains a tuple and the first element is an int, then
# this is a task requesting that a match_Count be performed for
# pattern_To_Count and that this is for the element located at
# index_For_Pattern_To_Count in count_Info.
elif type(task) is int:
# Compile the pattern and also enable JIT compilation to make
# matching faster.
c_size_t(len(pattern_To_Count)), c_uint32(0), byref(c_int()),
# Count how many matches there are for pattern in sequences.
c_size_t(match.contents), c_uint32(0), match_Data, None)>=0:
# Send the result back to the manager process.
# If we got here, then this is a task requesting that replacements be
# made to the sequences string using all the (pattern, replacement)
# tuples in the task.
# We'll use two strings when doing all the replacements, searching
# for patterns in prereplace_String and using postreplace_String to
# store the string after the replacements have been made. After
# each iteration these two then get swapped. Make both strings 10%
# larger than the sequences string in order to accomodate some of
# the replacements causing sequences to grow and also copy the
# sequences string into prereplace_String for the initial iteration.
# Iterate through all the (pattern, replacement) tuples in the task.
for pattern, replacement in task:
# Swap prereplace_String and postreplace_String in preparation
# for the next iteration.
prereplace_String, postreplace_String= \
# If any replacements were made, they'll be in prereplace_String
# instead of postreplace_String because of the swap done after each
# iteration. Send its length back to the manager process.
if __name__ == '__main__':
# Read in input from stdin and also get the input_Length.
# Set up some shared memory for the sequences string so that it can be
# shared between all the processes and make it the same length as the
# Find all sequence descriptions and new lines in input, replace them
# with empty strings, and store the result in the sequences string.
sequences, sequences_Length=replace(b">.*\\n|\\n", b"", input, input_Length,
# We'll be using the sequences string instead of the input string from now
# on so delete our reference to it since this can often free up the memory
# that was used by it.
# Start a worker subprocess on each processor that is available to us and
# send each worker subprocess the sequences string & a worker_Pipe to use
# for communicating with the manager process.
for i in range(cpu_count() or 1):
args=(worker_Pipe, sequences, sequences_Length)).start()
# Wait for the first worker subproces to send us a None object that
# indicates it's ready to start processing tasks and then have it start
# working on performing all the replacements serially.
# Now the manger process needs to start managing all the worker subprocesses
# by waiting for them to become ready to process tasks, handling any results
# that they send back, sending them any remaining counting tasks, and then
# finally telling them when it's OK for them to exit (when there are no more
# tasks to process).
# Wait for any one of the manager_Pipes to receive something.
for manager_Pipe in wait(manager_Pipes):
# If the result is an int, then it's the postreplace_Length that
# resulted after applying all the replacments that were specified
if type(result) is int:
# If the result is a tuple, then it's the results from one of the
# counting tasks for the patterns in count_Info. The first element
# is the index of the pattern that the result is for and the second
# element is the number of matches for it. Add the number of matches
# to count_Info.
elif type(result) is tuple:
# Send the worker subprocess the index_For_Next_Count and pattern to
# work on if we haven't reached the end of count_Info yet.
# If we have reached the end of count_Info then there are no more
# tasks to start working on so just send the worker subprocess None
# to indicate it can exit and also stop keeping track of the
# manger_Pipe for it.
# Print the match_Count for each pattern in count_Info.
for pattern, match_Count in count_Info:
# Print the size of the original input, the size of the input without the
# sequence descriptions & new lines, and the size after having made all the