The Computer Language
22.05 Benchmarks Game

regex-redux Swift #2 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// regex-dna program contributed by Daniel Muellenborn
// converted from regex-dna program

import Foundation

var data = FileHandle.standardInput.readDataToEndOfFile()

var sequence = String(data: data, encoding: .utf8)!

let inputLength = data.count

let regex: (String) -> NSRegularExpression = { pattern in
  return try! NSRegularExpression(pattern: pattern, options: [])
}

sequence = regex(">[^\n]*\n|\n").stringByReplacingMatches(in: sequence, options: [], range: NSRange(location: 0, length: inputLength), withTemplate: "")

let codeLength = sequence.utf8.count

let variants = [
  "agggtaaa|tttaccct",
  "[cgt]gggtaaa|tttaccc[acg]",
  "a[act]ggtaaa|tttacc[agt]t",
  "ag[act]gtaaa|tttac[agt]ct",
  "agg[act]taaa|ttta[agt]cct",
  "aggg[acg]aaa|ttt[cgt]ccct",
  "agggt[cgt]aa|tt[acg]accct",
  "agggta[cgt]a|t[acg]taccct",
  "agggtaa[cgt]|[acg]ttaccct",
]

var counts = Array(repeating: ("",0), count: variants.count)

// parallelized version is slower
// let queue = DispatchQueue(label: "Queue")
// DispatchQueue.concurrentPerform(iterations: variants.count) { n in
for n in 0..<variants.count {
  counts[n] = (variants[n], regex(variants[n]).numberOfMatches(in: sequence, options: [], range: NSRange(location: 0, length: codeLength)))
}

for (variant, count) in counts {
  print(variant, "\(count)")
}

let replacements = [
  (regex("tHa[Nt]"), "<4>"),
  (regex("aND|caN|Ha[DS]|WaS"), "<3>"),
  (regex("a[NSt]|BY"), "<2>"),
  (regex("<[^>]*>"), "|"),
  (regex("\\|[^|][^|]*\\|"), "-"),
]

for (re, replacement) in replacements {
  sequence = re.stringByReplacingMatches(in: sequence, options: [], range: NSRange(location: 0, length: sequence.utf16.count), withTemplate: replacement)
}

let resultLength = sequence.utf8.count

print("", inputLength, codeLength, resultLength, separator: "\n")
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 5.7-dev
(LLVM a177fc627109410,
Swift 31967c6df177cce)


Wed, 04 May 2022 23:33:58 GMT

MAKE:
/opt/src/swift-5.7-DEVELOPMENT-SNAPSHOT-2022-04-25-a-ubuntu20.04/usr/bin/swiftc regexredux.swift-2.swift -Ounchecked  -o regexredux.swift-2.swift_run

10.06s to complete and log all make actions

COMMAND LINE:
./regexredux.swift-2.swift_run 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361