@@ -673,6 +673,11 @@ extension RegexTests {
673
673
}
674
674
firstMatchTest ( #"[\t-\t]"# , input: " \u{8} \u{A} \u{9} " , match: " \u{9} " )
675
675
676
+ firstMatchTest ( #"[12]"# , input: " 1️⃣ " , match: nil )
677
+ firstMatchTest ( #"[1-2]"# , input: " 1️⃣ " , match: nil )
678
+ firstMatchTest ( #"[\d]"# , input: " 1️⃣ " , match: " 1️⃣ " )
679
+ firstMatchTest ( #"(?P)[\d]"# , input: " 1️⃣ " , match: nil )
680
+
676
681
// Currently not supported in the matching engine.
677
682
for c : UnicodeScalar in [ " a " , " b " , " c " ] {
678
683
firstMatchTest ( #"[\c!-\C-#]"# , input: " def \( c) " , match: " \( c) " ,
@@ -1507,31 +1512,28 @@ extension RegexTests {
1507
1512
}
1508
1513
1509
1514
func testCanonicalEquivalenceCustomCharacterClass( ) throws {
1510
- // Expectation: Concatenations with custom character classes should be able
1511
- // to match within a grapheme cluster. That is, a regex should be able to
1512
- // match the scalar values that comprise a grapheme cluster in separate,
1513
- // or repeated, custom character classes.
1515
+ // Expectation: Custom character class matches do not cross grapheme
1516
+ // character boundaries by default. When matching with Unicode scalar
1517
+ // semantics, grapheme cluster boundaries are ignored, so matching
1518
+ // sequences of custom character classes can succeed .
1514
1519
1515
1520
matchTest (
1516
1521
#"[áéíóú]$"# ,
1517
1522
( eComposed, true ) ,
1518
1523
( eDecomposed, true ) )
1519
1524
1520
- // FIXME: Custom char classes don't use canonical equivalence with composed characters
1521
- firstMatchTest ( #"e[\u{301}]$"# , input: eComposed, match: eComposed,
1522
- xfail: true )
1523
- firstMatchTest ( #"e[\u{300}-\u{320}]$"# , input: eComposed, match: eComposed,
1524
- xfail: true )
1525
- firstMatchTest ( #"[a-z][\u{300}-\u{320}]$"# , input: eComposed, match: eComposed,
1526
- xfail: true )
1525
+ // Unicode scalar semantics
1526
+ firstMatchTest ( #"(?u)e[\u{301}]$"# , input: eDecomposed, match: eDecomposed)
1527
+ firstMatchTest ( #"(?u)e[\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed)
1528
+ firstMatchTest ( #"(?u)[e][\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed)
1529
+ firstMatchTest ( #"(?u)[e-e][\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed)
1530
+ firstMatchTest ( #"(?u)[a-z][\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed)
1527
1531
1528
- // FIXME: Custom char classes don't match decomposed characters
1529
- firstMatchTest ( #"e[\u{301}]$"# , input: eDecomposed, match: eDecomposed,
1530
- xfail: true )
1531
- firstMatchTest ( #"e[\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed,
1532
- xfail: true )
1533
- firstMatchTest ( #"[a-z][\u{300}-\u{320}]$"# , input: eDecomposed, match: eDecomposed,
1534
- xfail: true )
1532
+ // Grapheme cluster semantics
1533
+ firstMatchTest ( #"e[\u{301}]$"# , input: eComposed, match: nil )
1534
+ firstMatchTest ( #"e[\u{300}-\u{320}]$"# , input: eComposed, match: nil )
1535
+ firstMatchTest ( #"[e][\u{300}-\u{320}]$"# , input: eComposed, match: nil )
1536
+ firstMatchTest ( #"[a-z][\u{300}-\u{320}]$"# , input: eComposed, match: nil )
1535
1537
1536
1538
let flag = " 🇰🇷 "
1537
1539
firstMatchTest ( #"🇰🇷"# , input: flag, match: flag)
@@ -1540,27 +1542,15 @@ extension RegexTests {
1540
1542
firstMatchTest ( #"\u{1F1F0 1F1F7}"# , input: flag, match: flag)
1541
1543
1542
1544
// First Unicode scalar followed by CCC of regional indicators
1543
- firstMatchTest ( #"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"# , input: flag, match: flag,
1544
- xfail: true )
1545
-
1546
- // FIXME: CCC of Regional Indicator doesn't match with both parts of a flag character
1545
+ firstMatchTest ( #"(?u)^\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]$"# , input: flag, match: flag)
1546
+ // A CCC of regional indicators followed by the second Unicode scalar
1547
+ firstMatchTest ( #"(?u)^[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}$"# , input: flag, match: flag)
1547
1548
// A CCC of regional indicators x 2
1548
- firstMatchTest ( #"[\u{1F1E6}-\u{1F1FF}]{2}"# , input: flag, match: flag,
1549
- xfail: true )
1549
+ firstMatchTest ( #"(?u)^[\u{1F1E6}-\u{1F1FF}]{2}$"# , input: flag, match: flag)
1550
1550
1551
- // FIXME: A single CCC of regional indicators matches the whole flag character
1552
- // A CCC of regional indicators followed by the second Unicode scalar
1553
- firstMatchTest ( #"[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}"# , input: flag, match: flag,
1554
- xfail: true )
1555
1551
// A single CCC of regional indicators
1556
- firstMatchTest ( #"[\u{1F1E6}-\u{1F1FF}]"# , input: flag, match: nil ,
1557
- xfail: true )
1558
-
1559
- // A single CCC of actual flag emojis / combined regional indicators
1560
- firstMatchTest ( #"[🇦🇫-🇿🇼]"# , input: flag, match: flag)
1561
- // This succeeds (correctly) because \u{1F1F0} is lexicographically
1562
- // within the CCC range
1563
- firstMatchTest ( #"[🇦🇫-🇿🇼]"# , input: " \u{1F1F0} abc " , match: " \u{1F1F0} " )
1552
+ firstMatchTest ( #"^[\u{1F1E6}-\u{1F1FF}]$"# , input: flag, match: nil )
1553
+ firstMatchTest ( #"^(?u)[\u{1F1E6}-\u{1F1FF}]$"# , input: flag, match: nil )
1564
1554
}
1565
1555
1566
1556
func testAnyChar( ) throws {
0 commit comments