Skip to content

Commit f3af23e

Browse files
fix: Fix unicode Regex miscounting emoji length (#2942)
Many emojis are 2+ unicode bytes long. The \u tag which allows searching for punctuation also counts emojis as single chars. Slicing the strings into an array restores the correct character count.
1 parent 8fb1711 commit f3af23e

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

β€Žsrc/Tokenizer.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,8 @@ export class _Tokenizer {
625625
const nextChar = match[1] || match[2] || '';
626626

627627
if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
628-
const lLength = match[0].length - 1;
628+
// unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below)
629+
const lLength = [...match[0]].length - 1;
629630
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
630631

631632
const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
@@ -639,7 +640,7 @@ export class _Tokenizer {
639640

640641
if (!rDelim) continue; // skip single * in __abc*abc__
641642

642-
rLength = rDelim.length;
643+
rLength = [...rDelim].length;
643644

644645
if (match[3] || match[4]) { // found another Left Delim
645646
delimTotal += rLength;
@@ -658,7 +659,7 @@ export class _Tokenizer {
658659
// Remove extra characters. *a*** -> *a*
659660
rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
660661

661-
const raw = src.slice(0, lLength + match.index + rLength + 1);
662+
const raw = [...src].slice(0, lLength + match.index + rLength + 1).join('');
662663

663664
// Create `em` if smallest delimiter has odd char count. *a***
664665
if (Math.min(lLength, rLength) % 2) {

β€Žtest/specs/new/emoji_inline.html

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<p>Situations where it fails:</p>
2+
<p><strong>test πŸ’</strong></p>
3+
<p><strong>πŸ’ test</strong></p>
4+
<p><strong>πŸ€“ test</strong></p>
5+
<p><strong>πŸ–οΈ test</strong></p>
6+
<p><strong>πŸ–οΈπŸ€“πŸ’ test</strong></p>
7+
<p>Situations where it works:</p>
8+
<p>**πŸ’ **</p>
9+
<p><strong>⚠️ test</strong></p>
10+
<p>Here, the emoji rendering works, but the text doesn't get rendered in italic.</p>
11+
<p><em>πŸ’ test</em></p>

β€Žtest/specs/new/emoji_inline.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Situations where it fails:
2+
3+
**test πŸ’**
4+
5+
**πŸ’ test**
6+
7+
**πŸ€“ test**
8+
9+
**πŸ–οΈ test**
10+
11+
**πŸ–οΈπŸ€“πŸ’ test**
12+
13+
Situations where it works:
14+
15+
**πŸ’ **
16+
17+
**⚠️ test**
18+
19+
Here, the emoji rendering works, but the text doesn't get rendered in italic.
20+
21+
*πŸ’ test*

0 commit comments

Comments
Β (0)