source code
/* The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
contributed by Filip Sajdak
*/
#include <pcre.h>
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
template <typename T>
auto reserve(size_t size) {
T out;
out.reserve(size);
return out;
}
template <auto initial_size = 16384, auto buffer_size = initial_size>
auto load(std::istream& in) {
auto str = reserve<std::string>(initial_size);
auto buffer = std::array<char, buffer_size>();
while (in) {
in.read(buffer.data(), buffer.size());
str.append(buffer.cbegin(), buffer.cbegin()+in.gcount());
}
return str;
}
template<typename T, typename Deleter>
auto make_unique_with_deleter(T* ptr, Deleter&& deleter)
{
return std::unique_ptr<T, Deleter>(ptr, std::forward<Deleter>(deleter));
}
auto make_regex(const std::string_view pattern) {
char const* error;
int offset;
return make_unique_with_deleter(
pcre_compile(pattern.data(), 0, &error, &offset, NULL), pcre_free);
}
auto make_aid(const pcre* regex) {
char const* error;
return make_unique_with_deleter(
pcre_study(regex, PCRE_STUDY_JIT_COMPILE, &error), pcre_free_study);
}
static void replace(const std::string_view pattern, const std::string_view replacement,
const std::string_view source, std::string& output, pcre_jit_stack* const stack){
const auto regex = make_regex(pattern);
const auto aid = make_aid(regex.get());
int pos = 0;
for(int match[3]; pcre_jit_exec(regex.get(), aid.get(), source.data(),
source.size(), pos, 0, match, 3, stack) >= 0; pos = match[1]){
output.append(std::cbegin(source) + pos, std::cbegin(source) + match[0]);
output.append(std::cbegin(replacement), std::cend(replacement));
}
output.append(std::cbegin(source) + pos, std::cend(source));
}
int main(void){
std::ios::sync_with_stdio(false);
char const * const count_Info[]={
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct"
}, * const replace_Info[][2]={
{"tHa[Nt]", "<4>"},
{"aND|caN|Ha[DS]|WaS", "<3>"},
{"a[NSt]|BY", "<2>"},
{"<[^>]*>", "|"},
{"\\|[^|][^|]*\\|", "-"}
};
auto sequences = reserve<std::string>(16384);
size_t postreplace_Size = 0;
std::string input = load(std::cin);
#pragma omp parallel
{
auto stack = make_unique_with_deleter(
pcre_jit_stack_alloc(16384, 16384), pcre_jit_stack_free);
#pragma omp single
{
replace(">.*\\n|\\n", "", input, sequences, stack.get());
}
#pragma omp single nowait
{
auto prereplace_String = sequences;
auto postreplace_String = reserve<std::string>(sequences.capacity());
for ( const auto& [pattern, replacement] : replace_Info ) {
postreplace_String.clear();
replace(pattern, replacement,
prereplace_String, postreplace_String, stack.get());
std::swap(prereplace_String, postreplace_String);
}
postreplace_Size = prereplace_String.size();
}
#pragma omp for schedule(dynamic) ordered
for(auto i=0u; i < std::size(count_Info); i++){
auto regex = make_regex(count_Info[i]);
auto aid = make_aid(regex.get());
int count = 0;
for(int pos = 0, match[3]; pcre_jit_exec(regex.get(), aid.get(),
sequences.data(), sequences.size(), pos, 0, match, 3,
stack.get()) >= 0; pos=match[1]) {
++count;
}
#pragma omp ordered
printf("%s %d\n", count_Info[i], count);
}
}
printf("\n%zu\n%zu\n%zu\n", input.size(), sequences.size(), postreplace_Size);
return 0;
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
g++ (Ubuntu 14.2.0-4ubuntu2) 14.2.0
Tue, 22 Oct 2024 21:13:39 GMT
MAKE:
/usr/bin/g++ -c -pipe -O3 -fomit-frame-pointer -march=ivybridge -std=c++17 -fopenmp -flto regexredux.gpp-4.c++ -o regexredux.gpp-4.c++.o && \
/usr/bin/g++ regexredux.gpp-4.c++.o -o regexredux.gpp-4.gpp_run -fopenmp -flto -lpcre
regexredux.gpp-4.c++: In instantiation of ‘auto load(std::istream&) [with auto initial_size = 16384; auto buffer_size = 16384; std::istream = std::basic_istream<char>]’:
regexredux.gpp-4.c++:94:28: required from here
94 | std::string input = load(std::cin);
| ~~~~^~~~~~~~~~
regexredux.gpp-4.c++:23:26: error: invalid use of incomplete type ‘struct std::array<char, 16384>’
23 | auto buffer = std::array<char, buffer_size>();
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from /usr/include/c++/14/bits/stl_algobase.h:64,
from /usr/include/c++/14/string:51,
from /usr/include/c++/14/bits/locale_classes.h:40,
from /usr/include/c++/14/bits/ios_base.h:41,
from /usr/include/c++/14/ios:44,
from /usr/include/c++/14/ostream:40,
from /usr/include/c++/14/iostream:41,
from regexredux.gpp-4.c++:8:
/usr/include/c++/14/bits/stl_pair.h:99:12: note: declaration of ‘struct std::array<char, 16384>’
99 | struct array;
| ^~~~~
make: [/home/dunham/all-benchmarksgame/2000-benchmarksgame/nanobench/makefiles/u64q.programs.Makefile:54: regexredux.gpp-4.gpp_run] Error 1 (ignored)
rm regexredux.gpp-4.c++
2.45s to complete and log all make actions
COMMAND LINE:
./regexredux.gpp-4.gpp_run 0 < regexredux-input50000.txt
MAKE ERROR