首页 > 解决方案 > Swift 5.1 正则表达式错误

问题描述

对于以下代码:

import Foundation

extension String {
    var fullRange: NSRange {
        return .init(self.startIndex ..< self.endIndex, in: self)
    }
    public subscript(range: Range<Int>) -> Self.SubSequence {
        let st = self.index(self.startIndex, offsetBy: range.startIndex)
        let ed = self.index(self.startIndex, offsetBy: range.endIndex)
        let sub = self[st ..< ed]
        return sub
    }

    func split(regex pattern: String) throws -> [String] {
        let regex = try NSRegularExpression.init(pattern: pattern, options: [])
        let fRange = self.fullRange
        let match = regex.matches(in: self, options: [], range: fRange)

        var list = [String]()
        var start = 0
        for m in match {
            let r = m.range
            let end = r.location

            list.append(String(self[start ..< end]))
            start = end + r.length
        }
        if start < self.count {
            list.append(String(self[start ..< self.count]))
        }
        return list
    }
}

print(try! "مرتفع جداً\nVery High".split(regex: "\n"))

输出应该是:

["مرتفع جداً", "Very High"]

但它是:

["مرتفع جداً\n", "ery High"]

那是因为正则表达式(对于这种情况)匹配\n偏移量10而不是9

我的代码有什么问题吗,或者是正则表达式的错误!

标签: swiftregex

解决方案


这不是一个错误。您正在尝试使用Int在 Unicode 环境中容易出错且强烈建议不要使用的索引。

这相当于您的代码具有正确的String.Index类型和专用的 API,NSRange反之亦然Range<String.Index>fullRange并且subscript已经过时了。

我只是省略了打印行。startIndex并且endIndexString

extension String {
    func split(regex pattern: String) throws -> [String] {
        let regex = try NSRegularExpression(pattern: pattern)
        let matches = regex.matches(in: self, range: NSRange(startIndex..., in: self))

        var list = [String]()
        var start = startIndex
        for match in matches {

            let range = Range(match.range, in: self)!
            let end = range.lowerBound

            list.append(String(self[start..<end]))
            start = range.upperBound
        }
        if start < endIndex {
            list.append(String(self[start..<endIndex]))
        }

        return list
    }
}

print(try! "مرتفع جداً\nVery High".split(regex: "\n"))

结果是["مرتفع جداً", "Very High"]


推荐阅读