Skip to content

Commit daa0281

Browse files
authored
fix(tokenizer): Decode entities after < (#1008)
1 parent 6a1b54d commit daa0281

File tree

2 files changed

+39
-36
lines changed

2 files changed

+39
-36
lines changed

src/Tokenizer.ts

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ const enum CharCodes {
2626
Eq = 0x3d, // "="
2727
Gt = 0x3e, // ">"
2828
Questionmark = 0x3f, // "?"
29-
LowerC = 0x63, // "c"
30-
LowerS = 0x73, // "s"
31-
LowerT = 0x74, // "t"
3229
UpperA = 0x41, // "A"
3330
LowerA = 0x61, // "a"
3431
UpperF = 0x46, // "F"
@@ -37,7 +34,6 @@ const enum CharCodes {
3734
LowerZ = 0x7a, // "z"
3835
LowerX = 0x78, // "x"
3936
OpeningSquareBracket = 0x5b, // "["
40-
ClosingSquareBracket = 0x5d, // "]"
4137
}
4238

4339
/** All the states the tokenizer can be in. */
@@ -402,38 +398,39 @@ export default class Tokenizer {
402398
* We allow anything that wouldn't end the tag.
403399
*/
404400
private isTagStartChar(c: number) {
405-
return isASCIIAlpha(c) || (this.xmlMode && !isEndOfTagSection(c));
401+
return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
406402
}
403+
404+
private startSpecial(sequence: Uint16Array, offset: number) {
405+
this.isSpecial = true;
406+
this.currentSequence = sequence;
407+
this.sequenceIndex = offset;
408+
this._state = State.SpecialStartSequence;
409+
}
410+
407411
private stateBeforeTagName(c: number) {
408-
if (c === CharCodes.Slash) {
409-
this._state = State.BeforeClosingTagName;
410-
} else if (c === CharCodes.Lt) {
411-
this.cbs.ontext(this.getSection());
412-
this.sectionStart = this._index;
413-
} else if (c === CharCodes.Gt || isWhitespace(c)) {
414-
this._state = State.Text;
415-
} else if (c === CharCodes.ExclamationMark) {
412+
if (c === CharCodes.ExclamationMark) {
416413
this._state = State.BeforeDeclaration;
417414
this.sectionStart = this._index + 1;
418415
} else if (c === CharCodes.Questionmark) {
419416
this._state = State.InProcessingInstruction;
420417
this.sectionStart = this._index + 1;
421-
} else if (!this.isTagStartChar(c)) {
422-
this._state = State.Text;
423-
} else {
418+
} else if (this.isTagStartChar(c)) {
424419
const lower = c | 0x20;
425420
this.sectionStart = this._index;
426-
if (!this.xmlMode && lower === CharCodes.LowerT) {
427-
this.isSpecial = true;
428-
this.currentSequence = Sequences.TitleEnd;
429-
this.sequenceIndex = 3;
430-
this._state = State.SpecialStartSequence;
421+
if (!this.xmlMode && lower === Sequences.TitleEnd[2]) {
422+
this.startSpecial(Sequences.TitleEnd, 3);
431423
} else {
432424
this._state =
433-
!this.xmlMode && lower === CharCodes.LowerS
425+
!this.xmlMode && lower === Sequences.ScriptEnd[2]
434426
? State.BeforeSpecialS
435427
: State.InTagName;
436428
}
429+
} else if (c === CharCodes.Slash) {
430+
this._state = State.BeforeClosingTagName;
431+
} else {
432+
this._state = State.Text;
433+
this.stateText(c);
437434
}
438435
}
439436
private stateInTagName(c: number) {
@@ -449,11 +446,10 @@ export default class Tokenizer {
449446
// Ignore
450447
} else if (c === CharCodes.Gt) {
451448
this._state = State.Text;
452-
} else if (!this.isTagStartChar(c)) {
453-
this._state = State.InSpecialComment;
454-
this.sectionStart = this._index;
455449
} else {
456-
this._state = State.InClosingTagName;
450+
this._state = this.isTagStartChar(c)
451+
? State.InClosingTagName
452+
: State.InSpecialComment;
457453
this.sectionStart = this._index;
458454
}
459455
}
@@ -617,16 +613,10 @@ export default class Tokenizer {
617613
}
618614
private stateBeforeSpecialS(c: number) {
619615
const lower = c | 0x20;
620-
if (lower === CharCodes.LowerC) {
621-
this.isSpecial = true;
622-
this.currentSequence = Sequences.ScriptEnd;
623-
this.sequenceIndex = 4;
624-
this._state = State.SpecialStartSequence;
625-
} else if (lower === CharCodes.LowerT) {
626-
this.isSpecial = true;
627-
this.currentSequence = Sequences.StyleEnd;
628-
this.sequenceIndex = 4;
629-
this._state = State.SpecialStartSequence;
616+
if (lower === Sequences.ScriptEnd[3]) {
617+
this.startSpecial(Sequences.ScriptEnd, 4);
618+
} else if (lower === Sequences.StyleEnd[3]) {
619+
this.startSpecial(Sequences.StyleEnd, 4);
630620
} else {
631621
this._state = State.InTagName;
632622
this.stateInTagName(c); // Consume the token again
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"name": "Entity after <",
3+
"options": {},
4+
"input": "<&amp;",
5+
"expected": [
6+
{
7+
"event": "text",
8+
"startIndex": 0,
9+
"endIndex": 5,
10+
"data": ["<&"]
11+
}
12+
]
13+
}

0 commit comments

Comments
 (0)