source code
/* The Computer Language Benchmarks Game
* https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
*
* regex-dna program contributed by The Go Authors.
* modified by Tylor Arndt.
* modified by Chandra Sekar S to use optimized PCRE binding.
* modified by Matt Dellandrea.
* modified by Pavel Griaznov to use PCRE JIT compilation.
*/
package main
import (
"fmt"
"io/ioutil"
"os"
"runtime"
"github.com/GRbit/go-pcre"
)
type substitution struct {
pattern string
replacement string
}
func countMatches(pat string, b []byte) int {
m := pcre.MustCompileJIT(pat, 0, pcre.STUDY_JIT_COMPILE).Matcher(b, 0)
n := 0
for f := m.Matches; f; f = m.Match(b, 0) {
n++
b = b[m.Index()[1]:]
}
return n
}
func main() {
var variants = []string{
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct",
}
var substs = []substitution{
{"tHa[Nt]", "<4>"},
{"aND|caN|Ha[DS]|WaS", "<3>"},
{"a[NSt]|BY", "<2>"},
{"<[^>]*>", "|"},
{"\\|[^|][^|]*\\|", "-"},
}
runtime.GOMAXPROCS(runtime.NumCPU())
b, err := ioutil.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
os.Exit(2)
}
ilen := len(b)
// Delete the comment lines and newlines
b = pcre.
MustCompileJIT("(>[^\n]*)?\n", 0, pcre.STUDY_JIT_COMPILE).
ReplaceAll(b, []byte{}, 0)
clen := len(b)
mresults := make([]chan int, len(variants))
for i := 0; i < len(variants); i++ {
mresults[i] = make(chan int)
go func(ch chan int, s string) {
ch <- countMatches(s, b)
}(mresults[i], variants[i])
}
lenresult := make(chan int)
go func(b []byte) {
for i := 0; i < len(substs); i++ {
b = pcre.
MustCompileJIT(substs[i].pattern, 0, pcre.STUDY_JIT_COMPILE).
ReplaceAll(b, []byte(substs[i].replacement), 0)
}
lenresult <- len(b)
}(b)
for i := 0; i < len(variants); i++ {
fmt.Printf("%s %d\n", variants[i], <-mresults[i])
}
fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult)
}
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
go version go1.14 linux/amd64
Sat, 16 May 2020 21:19:34 GMT
MAKE:
/opt/src/go1.14.linux-amd64/go/bin/go build -o regexredux.go-5.go_run
2.22s to complete and log all make actions
COMMAND LINE:
./regexredux.go-5.go_run 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361