Skip to content

Commit b716d50

Browse files
committed
Handle case insensitivity properly in CCC ranges
The prior implementation didn't make a lot of sense, and couldn't handle cases like `/(?i)[X-c]/`. This new approach uses simple case matching to test if the character is within the range, then tests if the uppercase or lowercase mappings are within the range. Fixes swiftlang#395
1 parent bae6401 commit b716d50

File tree

2 files changed

+55
-34
lines changed

2 files changed

+55
-34
lines changed

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -276,54 +276,48 @@ extension DSLTree.CustomCharacterClass.Member {
276276
}
277277
return c
278278
case let .range(low, high):
279-
// TODO:
280279
guard let lhs = low.literalCharacterValue, lhs.hasExactlyOneScalar else {
281280
throw Unsupported("\(low) in range")
282281
}
283282
guard let rhs = high.literalCharacterValue, rhs.hasExactlyOneScalar else {
284283
throw Unsupported("\(high) in range")
285284
}
285+
guard lhs <= rhs else {
286+
throw Unsupported("Invalid range \(low)-\(high)")
287+
}
286288

289+
let isCaseInsensitive = opts.isCaseInsensitive
287290
let isCharacterSemantic = opts.semanticLevel == .graphemeCluster
288291

289-
if opts.isCaseInsensitive {
290-
let lhsLower = lhs.lowercased()
291-
let rhsLower = rhs.lowercased()
292-
guard lhsLower <= rhsLower else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
293-
return { input, bounds in
294-
// TODO: check for out of bounds?
295-
let curIdx = bounds.lowerBound
296-
if isCharacterSemantic {
297-
guard input[curIdx].hasExactlyOneScalar else { return nil }
298-
if (lhsLower...rhsLower).contains(input[curIdx].lowercased()) {
299-
return input.index(after: curIdx)
300-
}
301-
} else {
302-
if (lhsLower...rhsLower).contains(input.unicodeScalars[curIdx].properties.lowercaseMapping) {
303-
return input.unicodeScalars.index(after: curIdx)
304-
}
305-
}
292+
return { input, bounds in
293+
// TODO: check for out of bounds?
294+
let curIdx = bounds.lowerBound
295+
let nextIndex = isCharacterSemantic
296+
? input.index(after: curIdx)
297+
: input.unicodeScalars.index(after: curIdx)
298+
if isCharacterSemantic && !input[curIdx].hasExactlyOneScalar {
306299
return nil
307300
}
308-
} else {
309-
guard lhs <= rhs else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
310-
return { input, bounds in
311-
// TODO: check for out of bounds?
312-
let curIdx = bounds.lowerBound
313-
if isCharacterSemantic {
314-
guard input[curIdx].hasExactlyOneScalar else { return nil }
315-
if (lhs...rhs).contains(input[curIdx]) {
316-
return input.index(after: curIdx)
317-
}
318-
} else {
319-
if (lhs...rhs).contains(Character(input.unicodeScalars[curIdx])) {
320-
return input.unicodeScalars.index(after: curIdx)
321-
}
322-
}
301+
let scalar = input.unicodeScalars[curIdx]
302+
let scalarRange = lhs.unicodeScalars.first! ... rhs.unicodeScalars.first!
303+
if scalarRange.contains(scalar) {
304+
return nextIndex
305+
}
306+
if !isCaseInsensitive {
323307
return nil
324308
}
309+
310+
let stringRange = String(lhs)...String(rhs)
311+
if (scalar.properties.changesWhenLowercased
312+
&& stringRange.contains(scalar.properties.lowercaseMapping))
313+
|| (scalar.properties.changesWhenUppercased
314+
&& stringRange.contains(scalar.properties.uppercaseMapping)) {
315+
return nextIndex
316+
}
317+
318+
return nil
325319
}
326-
320+
327321
case let .custom(ccc):
328322
return try ccc.generateConsumer(opts)
329323

Tests/RegexTests/MatchTests.swift

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,33 @@ extension RegexTests {
731731
firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
732732
syntax: .experimental)
733733
firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
734+
735+
// Case sensitivity and ranges.
736+
for ch in "abcD" {
737+
firstMatchTest("[a-cD]", input: String(ch), match: String(ch))
738+
}
739+
for ch in "ABCd" {
740+
firstMatchTest("[a-cD]", input: String(ch), match: nil)
741+
}
742+
743+
for ch in "abcABCdD" {
744+
firstMatchTest("(?i)[a-cd]", input: String(ch), match: String(ch))
745+
firstMatchTest("(?i)[A-CD]", input: String(ch), match: String(ch))
746+
firstMatchTest("(?iu)[a-cd]", input: String(ch), match: String(ch))
747+
firstMatchTest("(?iu)[A-CD]", input: String(ch), match: String(ch))
748+
}
749+
750+
for ch in "XYZ[\\]^_`abcd" {
751+
firstMatchTest("[X-cd]", input: String(ch), match: String(ch))
752+
firstMatchTest("[X-cd]", input: String(ch), match: String(ch))
753+
firstMatchTest("(?u)[X-cd]", input: String(ch), match: String(ch))
754+
firstMatchTest("(?u)[X-cd]", input: String(ch), match: String(ch))
755+
}
756+
757+
for ch in "XYZ[\\]^_`abcxyzABCdD" {
758+
firstMatchTest("(?i)[X-cd]", input: String(ch), match: String(ch))
759+
firstMatchTest("(?iu)[X-cD]", input: String(ch), match: String(ch))
760+
}
734761
}
735762

736763
func testCharacterProperties() {

0 commit comments

Comments
 (0)