source code
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by Francois Green
import Foundation
import Dispatch
let input = FileHandle.standardInput.readDataToEndOfFile()
var sequence = String(data: input, encoding: .utf8)!
let inputLength = input.count
let regex: (String) -> NSRegularExpression = { pattern in
return try! NSRegularExpression(pattern: pattern, options: [])
}
sequence = regex(">[^\n]*\n|\n").stringByReplacingMatches(in: sequence, options: [], range: NSRange(location: 0, length: inputLength), withTemplate: "")
let codeLength = sequence.utf8.count
var resultLength: Int?
let group = DispatchGroup()
DispatchQueue.global(qos: .background).async {
group.enter()
resultLength = [
(regex: "tHa[Nt]", replacement: "<4>"),
(regex: "aND|caN|Ha[DS]|WaS", replacement: "<3>"),
(regex: "a[NSt]|BY", replacement: "<2>"),
(regex: "<[^>]*>", replacement: "|"),
(regex: "\\|[^|][^|]*\\|", replacement: "-")
].reduce(sequence) { buffer, iub in
return regex(iub.regex).stringByReplacingMatches(in: buffer, options: [], range: NSRange(location: 0, length: buffer.utf16.count), withTemplate: iub.replacement)
}.utf8.count
group.leave()
}
[
"agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct"
].forEach { variant in
print(variant, regex(variant).numberOfMatches(in: sequence, options: [], range: NSRange(location: 0, length: sequence.utf8.count)))
}
group.wait()
print("", inputLength, codeLength, resultLength!, separator: "\n")
notes, command-line, and program output
NOTES:
64-bit Ubuntu quad core
Swift version 5.1.3 (swift-5.1.3-RELEASE)
Target: x86_64-unknown-linux-gnu
Tue, 05 May 2020 20:55:55 GMT
MAKE:
/opt/src/swift-5.1.3-RELEASE-ubuntu18.04/usr/bin/swiftc regexredux.swift -Ounchecked -target-cpu core2 -o regexredux.swift_run
<unknown>:0: warning: argument unused during compilation: '-mcpu=core2'
12.41s to complete and log all make actions
COMMAND LINE:
./regexredux.swift_run 0 < regexredux-input5000000.txt
PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178
50833411
50000000
27388361