@@ -2741,7 +2741,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
2741
2741
if ( noper < tail
2742
2742
&& ( OP(noper) == flags
2743
2743
|| (flags == EXACT && OP(noper) == EXACT_ONLY8)
2744
- || (flags == EXACTFU && OP(noper) == EXACTFU_SS)))
2744
+ || (flags == EXACTFU && ( OP(noper) == EXACTFU_ONLY8
2745
+ || OP(noper) == EXACTFU_SS))) )
2745
2746
{
2746
2747
uc= (U8*)STRING(noper);
2747
2748
e= uc + STR_LEN(noper);
@@ -2958,7 +2959,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
2958
2959
if ( noper < tail
2959
2960
&& ( OP(noper) == flags
2960
2961
|| (flags == EXACT && OP(noper) == EXACT_ONLY8)
2961
- || (flags == EXACTFU && OP(noper) == EXACTFU_SS) ) )
2962
+ || (flags == EXACTFU && ( OP(noper) == EXACTFU_ONLY8
2963
+ || OP(noper) == EXACTFU_SS))) )
2962
2964
{
2963
2965
const U8 *uc= (U8*)STRING(noper);
2964
2966
const U8 *e= uc + STR_LEN(noper);
@@ -3182,7 +3184,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
3182
3184
if ( noper < tail
3183
3185
&& ( OP(noper) == flags
3184
3186
|| (flags == EXACT && OP(noper) == EXACT_ONLY8)
3185
- || (flags == EXACTFU && OP(noper) == EXACTFU_SS) ) )
3187
+ || (flags == EXACTFU && ( OP(noper) == EXACTFU_ONLY8
3188
+ || OP(noper) == EXACTFU_SS))) )
3186
3189
{
3187
3190
const U8 *uc= (U8*)STRING(noper);
3188
3191
const U8 *e= uc + STR_LEN(noper);
@@ -4671,6 +4674,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
4671
4674
EXACT | EXACT
4672
4675
EXACT_ONLY8 | EXACT
4673
4676
EXACTFU | EXACTFU
4677
+ EXACTFU_ONLY8 | EXACTFU
4674
4678
EXACTFU_SS | EXACTFU
4675
4679
EXACTFAA | EXACTFAA
4676
4680
EXACTL | EXACTL
@@ -4682,7 +4686,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
4682
4686
? NOTHING \
4683
4687
: ( EXACT == (X) || EXACT_ONLY8 == (X) ) \
4684
4688
? EXACT \
4685
- : ( EXACTFU == (X) || EXACTFU_SS == (X) ) \
4689
+ : ( EXACTFU == (X) \
4690
+ || EXACTFU_ONLY8 == (X) \
4691
+ || EXACTFU_SS == (X) ) \
4686
4692
? EXACTFU \
4687
4693
: ( EXACTFAA == (X) ) \
4688
4694
? EXACTFAA \
@@ -13759,6 +13765,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
13759
13765
* target string is (also) in UTF-8 */
13760
13766
bool requires_utf8_target = FALSE;
13761
13767
13768
+ bool has_micro_sign = FALSE;
13769
+
13762
13770
/* Allocate an EXACT node. The node_type may change below to
13763
13771
* another EXACTish node, but since the size of the node doesn't
13764
13772
* change, it works */
@@ -14222,6 +14230,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14222
14230
14223
14231
if (ender > 255) {
14224
14232
requires_utf8_target = TRUE;
14233
+ if (UNLIKELY(ender == GREEK_SMALL_LETTER_MU)) {
14234
+ has_micro_sign = TRUE;
14235
+ }
14225
14236
}
14226
14237
}
14227
14238
}
@@ -14264,6 +14275,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14264
14275
}
14265
14276
#endif
14266
14277
14278
+ else if (UNLIKELY(ender == MICRO_SIGN)) {
14279
+ has_micro_sign = TRUE;
14280
+ }
14281
+
14267
14282
/* Even when folding, we store just the input
14268
14283
* character, as we have an array that finds its fold
14269
14284
* quickly */
@@ -14481,6 +14496,16 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
14481
14496
else if (node_type == EXACTF) {
14482
14497
RExC_seen_d_op = TRUE;
14483
14498
}
14499
+
14500
+ /* The micro sign is the only below 256 character that
14501
+ * folds to above 255 */
14502
+ if ( OP(REGNODE_p(ret)) == EXACTFU
14503
+ && requires_utf8_target
14504
+ && LIKELY(! has_micro_sign))
14505
+ {
14506
+ OP(REGNODE_p(ret)) = EXACTFU_ONLY8;
14507
+ }
14508
+
14484
14509
}
14485
14510
14486
14511
alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len,
@@ -19252,6 +19277,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
19252
19277
case EXACTFAA_NO_TRIE:
19253
19278
case EXACTFAA:
19254
19279
case EXACTFU:
19280
+ case EXACTFU_ONLY8:
19255
19281
case EXACTFLU8:
19256
19282
case EXACTFU_SS:
19257
19283
case EXACTFL:
0 commit comments