The Computer Language
24.09 Benchmarks Game

regex-redux Swift #4 program

source code

// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by Francois Green
// variant concurrency added by Daniel Sell

import Foundation
import Dispatch

let input = FileHandle.standardInput.readDataToEndOfFile()

var sequence = String(data: input, encoding: .ascii)!

let inputLength = input.count

let regex: (String) -> NSRegularExpression = { pattern in
    return try! NSRegularExpression(pattern: pattern, options: [])
}

sequence = sequence.replacingOccurrences(of: ">[^\n]*\n|\n", with: "", 
               options: .regularExpression)

let codeLength = sequence.utf8.count

var resultLength = 0

let group = DispatchGroup()

group.enter()
DispatchQueue.global().async {
    resultLength = [
        (regex: "tHa[Nt]",            replacement: "<4>"),
        (regex: "aND|caN|Ha[DS]|WaS", replacement: "<3>"),
        (regex: "a[NSt]|BY",          replacement: "<2>"),
        (regex: "<[^>]*>",            replacement: "|"),
        (regex: "\\|[^|][^|]*\\|",    replacement: "-")
        ].reduce(sequence) { buffer, iub in
            return buffer.replacingOccurrences(of: iub.regex, 
                with: iub.replacement, options: .regularExpression)
    }.utf8.count
    group.leave()
}

let variants = [
    "agggtaaa|tttaccct",
    "[cgt]gggtaaa|tttaccc[acg]",
    "a[act]ggtaaa|tttacc[agt]t",
    "ag[act]gtaaa|tttac[agt]ct",
    "agg[act]taaa|ttta[agt]cct",
    "aggg[acg]aaa|ttt[cgt]ccct",
    "agggt[cgt]aa|tt[acg]accct",
    "agggta[cgt]a|t[acg]taccct",
    "agggtaa[cgt]|[acg]ttaccct",
]

var variantMatches = Array(repeating: 0, count: variants.count)

for (i, variant) in variants.enumerated() {
    group.enter()
    DispatchQueue.global().async {
        variantMatches[i] = regex(variant).numberOfMatches(in: sequence, 
            options: [], range: NSRange(location: 0, length: codeLength))
        group.leave()
    }
}

group.wait()

for (i, variant) in variants.enumerated() {
    print(variant, variantMatches[i])
}

print("", inputLength, codeLength, resultLength, separator: "\n")

    

notes, command-line, and program output

NOTES:
64-bit Ubuntu quad core
Swift version 6.0
(swift-6.0-RELEASE)
Target: x86_64-unknown-linux-gnu


 Wed, 18 Sep 2024 23:42:55 GMT

MAKE:
/opt/src/swift-6.0-RELEASE/usr/bin/swiftc regexredux.swift-4.swift -Ounchecked -wmo  -o regexredux.swift-4.swift_run

13.86s to complete and log all make actions

COMMAND LINE:
 ./regexredux.swift-4.swift_run 0 < regexredux-input5000000.txt

PROGRAM OUTPUT:
agggtaaa|tttaccct 356
[cgt]gggtaaa|tttaccc[acg] 1250
a[act]ggtaaa|tttacc[agt]t 4252
ag[act]gtaaa|tttac[agt]ct 2894
agg[act]taaa|ttta[agt]cct 5435
aggg[acg]aaa|ttt[cgt]ccct 1537
agggt[cgt]aa|tt[acg]accct 1431
agggta[cgt]a|t[acg]taccct 1608
agggtaa[cgt]|[acg]ttaccct 2178

50833411
50000000
27388361