The Computer Language
22.05 Benchmarks Game

regex-redux Swift program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by Francois Green


import Foundation
import Dispatch

let input = FileHandle.standardInput.readDataToEndOfFile()

var sequence = String(data: input, encoding: .utf8)!

let inputLength = input.count

let regex: (String) -> NSRegularExpression = { pattern in
  return try! NSRegularExpression(pattern: pattern, options: [])
}

sequence = regex(">[^\n]*\n|\n").stringByReplacingMatches(in: sequence, options: [], range: NSRange(location: 0, length: inputLength), withTemplate: "")

let codeLength = sequence.utf8.count

var resultLength: Int?

let group = DispatchGroup()

DispatchQueue.global(qos: .background).async {
  group.enter()
  resultLength = [
    (regex: "tHa[Nt]",            replacement: "<4>"),
    (regex: "aND|caN|Ha[DS]|WaS", replacement: "<3>"),
    (regex: "a[NSt]|BY",          replacement: "<2>"),
    (regex: "<[^>]*>",            replacement: "|"),
    (regex: "\\|[^|][^|]*\\|",    replacement: "-")
  ].reduce(sequence) { buffer, iub in
    return regex(iub.regex).stringByReplacingMatches(in: buffer, options: [], range: NSRange(location: 0, length: buffer.utf16.count), withTemplate: iub.replacement)
  }.utf8.count
  group.leave()
}

[
  "agggtaaa|tttaccct",
  "[cgt]gggtaaa|tttaccc[acg]",
  "a[act]ggtaaa|tttacc[agt]t",
  "ag[act]gtaaa|tttac[agt]ct",
  "agg[act]taaa|ttta[agt]cct",
  "aggg[acg]aaa|ttt[cgt]ccct",
  "agggt[cgt]aa|tt[acg]accct",
  "agggta[cgt]a|t[acg]taccct",
  "agggtaa[cgt]|[acg]ttaccct"
].forEach { variant in
  print(variant, regex(variant).numberOfMatches(in: sequence, options: [], range: NSRange(location: 0, length: sequence.utf8.count)))
}

group.wait()
print("", inputLength, codeLength, resultLength!, separator: "\n")
    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 5.7-dev
(LLVM a177fc627109410,
Swift 31967c6df177cce)


Wed, 04 May 2022 23:38:38 GMT

MAKE:
/opt/src/swift-5.7-DEVELOPMENT-SNAPSHOT-2022-04-25-a-ubuntu20.04/usr/bin/swiftc regexredux.swift -Ounchecked  -o regexredux.swift_run

9.65s to complete and log all make actions

COMMAND LINE:
./regexredux.swift_run 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361