Skip to content

Commit 28c162b

Browse files
authored
refactor: Fix how indices are computed, add attrib indices (#929)
The previous approach had several short-comings; eg. _special_ comments like `</12>` or CDATA in HTML would have misreported indices. As a new feature, attributes will now have indices set appropriately.
1 parent 4e25252 commit 28c162b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+926
-69
lines changed

src/Parser.spec.ts

Lines changed: 18 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -79,30 +79,6 @@ describe("API", () => {
7979
expect(text).toBe("0&#xn");
8080
});
8181

82-
test("should update the position", () => {
83-
const p = new Parser();
84-
85-
p.write("foo");
86-
87-
expect(p.startIndex).toBe(0);
88-
expect(p.endIndex).toBe(2);
89-
90-
p.write("<select>");
91-
92-
expect(p.startIndex).toBe(3);
93-
expect(p.endIndex).toBe(10);
94-
95-
p.write("<select>");
96-
97-
expect(p.startIndex).toBe(11);
98-
expect(p.endIndex).toBe(18);
99-
100-
p.parseChunk("</select>");
101-
102-
expect(p.startIndex).toBe(19);
103-
expect(p.endIndex).toBe(27);
104-
});
105-
10682
test("should not have the start index be greater than the end index", () => {
10783
const onopentag = jest.fn();
10884
const onclosetag = jest.fn();
@@ -134,22 +110,33 @@ describe("API", () => {
134110
});
135111

136112
test("should update the position when a single tag is spread across multiple chunks", () => {
137-
const p = new Parser();
113+
let called = false;
114+
const p = new Parser({
115+
onopentag() {
116+
called = true;
117+
expect(p.startIndex).toBe(0);
118+
expect(p.endIndex).toBe(12);
119+
},
120+
});
138121

139122
p.write("<div ");
140123
p.write("foo=bar>");
141124

142-
expect(p.startIndex).toBe(0);
143-
expect(p.endIndex).toBe(12);
125+
expect(called).toBe(true);
144126
});
145127

146128
test("should have the correct position for implied opening tags", () => {
147-
const p = new Parser();
129+
let called = false;
130+
const p = new Parser({
131+
onopentag() {
132+
called = true;
133+
expect(p.startIndex).toBe(0);
134+
expect(p.endIndex).toBe(3);
135+
},
136+
});
148137

149138
p.write("</p>");
150-
151-
expect(p.startIndex).toBe(0);
152-
expect(p.endIndex).toBe(3);
139+
expect(called).toBe(true);
153140
});
154141

155142
test("should parse <__proto__> (#387)", () => {

src/Parser.ts

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,11 @@ export class Parser {
196196
public startIndex = 0;
197197
/** The end index of the last event. */
198198
public endIndex = 0;
199+
/**
200+
* Store the start index of the current open tag,
201+
* so we can update the start index for attributes.
202+
*/
203+
private openTagStart = 0;
199204

200205
private tagname = "";
201206
private attribname = "";
@@ -212,7 +217,6 @@ export class Parser {
212217
cbs?: Partial<Handler> | null,
213218
private readonly options: ParserOptions = {}
214219
) {
215-
this.options = options;
216220
this.cbs = cbs ?? {};
217221
this.lowerCaseTagNames = options.lowerCaseTags ?? !options.xmlMode;
218222
this.lowerCaseAttributeNames =
@@ -224,24 +228,23 @@ export class Parser {
224228
this.cbs.onparserinit?.(this);
225229
}
226230

227-
private updatePosition(offset: number) {
228-
this.startIndex = this.tokenizer.getAbsoluteSectionStart() - offset;
229-
this.endIndex = this.tokenizer.getAbsoluteIndex();
230-
}
231-
232231
// Tokenizer event handlers
232+
233+
/** @internal */
233234
ontext(data: string): void {
234-
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
235-
this.endIndex = this.tokenizer.getAbsoluteIndex() - 1;
235+
const idx = this.tokenizer.getAbsoluteIndex();
236+
this.endIndex = idx;
236237
this.cbs.ontext?.(data);
238+
this.startIndex = idx;
237239
}
238240

239241
protected isVoidElement(name: string): boolean {
240242
return !this.options.xmlMode && voidElements.has(name);
241243
}
242244

245+
/** @internal */
243246
onopentagname(name: string): void {
244-
this.updatePosition(1);
247+
this.endIndex = this.tokenizer.getAbsoluteIndex();
245248

246249
if (this.lowerCaseTagNames) {
247250
name = name.toLowerCase();
@@ -251,6 +254,7 @@ export class Parser {
251254
}
252255

253256
private emitOpenTag(name: string) {
257+
this.openTagStart = this.startIndex;
254258
this.tagname = name;
255259

256260
const impliesClose =
@@ -277,7 +281,9 @@ export class Parser {
277281
if (this.cbs.onopentag) this.attribs = {};
278282
}
279283

284+
/** @internal */
280285
onopentagend(): void {
286+
this.startIndex = this.openTagStart;
281287
this.endIndex = this.tokenizer.getAbsoluteIndex();
282288

283289
if (this.attribs) {
@@ -287,11 +293,16 @@ export class Parser {
287293
if (this.cbs.onclosetag && this.isVoidElement(this.tagname)) {
288294
this.cbs.onclosetag(this.tagname);
289295
}
296+
290297
this.tagname = "";
298+
// Set `startIndex` for next node
299+
this.startIndex = this.endIndex + 1;
291300
}
292301

302+
/** @internal */
293303
onclosetag(name: string): void {
294-
this.updatePosition(2);
304+
this.endIndex = this.tokenizer.getAbsoluteIndex();
305+
295306
if (this.lowerCaseTagNames) {
296307
name = name.toLowerCase();
297308
}
@@ -319,8 +330,12 @@ export class Parser {
319330
this.emitOpenTag(name);
320331
this.closeCurrentTag();
321332
}
333+
334+
// Set `startIndex` for next node
335+
this.startIndex = this.endIndex + 1;
322336
}
323337

338+
/** @internal */
324339
onselfclosingtag(): void {
325340
if (
326341
this.options.xmlMode ||
@@ -329,35 +344,44 @@ export class Parser {
329344
) {
330345
this.closeCurrentTag();
331346
} else {
347+
// Ignore the fact that the tag is self-closing.
332348
this.onopentagend();
333349
}
334350
}
335351

336352
private closeCurrentTag() {
337353
const name = this.tagname;
338354
this.onopentagend();
339-
/*
340-
* Self-closing tags will be on the top of the stack
341-
* (cheaper check than in onclosetag)
342-
*/
355+
356+
// Self-closing tags will be on the top of the stack
343357
if (this.stack[this.stack.length - 1] === name) {
358+
// Reset the start index
359+
this.startIndex = this.openTagStart;
360+
344361
this.cbs.onclosetag?.(name);
345362
this.stack.pop();
346363
}
347364
}
348365

366+
/** @internal */
349367
onattribname(name: string): void {
368+
this.startIndex = this.tokenizer.getAbsoluteSectionStart();
369+
350370
if (this.lowerCaseAttributeNames) {
351371
name = name.toLowerCase();
352372
}
353373
this.attribname = name;
354374
}
355375

376+
/** @internal */
356377
onattribdata(value: string): void {
357378
this.attribvalue += value;
358379
}
359380

381+
/** @internal */
360382
onattribend(quote: string | undefined | null): void {
383+
this.endIndex = this.tokenizer.getAbsoluteIndex();
384+
361385
this.cbs.onattribute?.(this.attribname, this.attribvalue, quote);
362386
if (
363387
this.attribs &&
@@ -380,47 +404,70 @@ export class Parser {
380404
return name;
381405
}
382406

407+
/** @internal */
383408
ondeclaration(value: string): void {
409+
this.endIndex = this.tokenizer.getAbsoluteIndex();
410+
384411
if (this.cbs.onprocessinginstruction) {
385-
this.updatePosition(2);
386412
const name = this.getInstructionName(value);
387413
this.cbs.onprocessinginstruction(`!${name}`, `!${value}`);
388414
}
415+
416+
// Set `startIndex` for next node
417+
this.startIndex = this.endIndex + 1;
389418
}
390419

420+
/** @internal */
391421
onprocessinginstruction(value: string): void {
422+
this.endIndex = this.tokenizer.getAbsoluteIndex();
423+
392424
if (this.cbs.onprocessinginstruction) {
393-
this.updatePosition(2);
394425
const name = this.getInstructionName(value);
395426
this.cbs.onprocessinginstruction(`?${name}`, `?${value}`);
396427
}
428+
429+
// Set `startIndex` for next node
430+
this.startIndex = this.endIndex + 1;
397431
}
398432

433+
/** @internal */
399434
oncomment(value: string): void {
400-
this.updatePosition(4);
435+
this.endIndex = this.tokenizer.getAbsoluteIndex();
436+
401437
this.cbs.oncomment?.(value);
402438
this.cbs.oncommentend?.();
439+
440+
// Set `startIndex` for next node
441+
this.startIndex = this.endIndex + 1;
403442
}
404443

444+
/** @internal */
405445
oncdata(value: string): void {
406-
this.updatePosition(9);
446+
this.endIndex = this.tokenizer.getAbsoluteIndex();
447+
407448
if (this.options.xmlMode || this.options.recognizeCDATA) {
408449
this.cbs.oncdatastart?.();
409450
this.cbs.ontext?.(value);
410451
this.cbs.oncdataend?.();
411452
} else {
412-
this.oncomment(`[CDATA[${value}]]`);
453+
this.cbs.oncomment?.(`[CDATA[${value}]]`);
454+
this.cbs.oncommentend?.();
413455
}
456+
457+
// Set `startIndex` for next node
458+
this.startIndex = this.endIndex + 1;
414459
}
415460

461+
/** @internal */
416462
onerror(err: Error): void {
417463
this.cbs.onerror?.(err);
418464
}
419465

466+
/** @internal */
420467
onend(): void {
421468
if (this.cbs.onclosetag) {
422-
// Set start- and end indices for remaining tags
423-
this.startIndex = this.endIndex = this.tokenizer.getAbsoluteIndex();
469+
// Set the end index for all remaining tags
470+
this.endIndex = this.startIndex;
424471
for (
425472
let i = this.stack.length;
426473
i > 0;
@@ -440,6 +487,8 @@ export class Parser {
440487
this.attribname = "";
441488
this.attribs = null;
442489
this.stack = [];
490+
this.startIndex = 0;
491+
this.endIndex = 0;
443492
this.cbs.onparserinit?.(this);
444493
}
445494

src/__fixtures__/Events/01-simple.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
},
1111
{
1212
"event": "attribute",
13+
"startIndex": 4,
14+
"endIndex": 14,
1315
"data": ["class", "test", null]
1416
},
1517
{
@@ -25,6 +27,8 @@
2527
},
2628
{
2729
"event": "text",
30+
"startIndex": 15,
31+
"endIndex": 19,
2832
"data": ["adsf"]
2933
},
3034
{

src/__fixtures__/Events/02-template.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
},
2323
{
2424
"event": "attribute",
25+
"startIndex": 11,
26+
"endIndex": 30,
2527
"data": ["type", "text/template", "\""]
2628
},
2729
{
@@ -37,6 +39,8 @@
3739
},
3840
{
3941
"event": "text",
42+
"startIndex": 32,
43+
"endIndex": 49,
4044
"data": ["<h1>Heading1</h1>"]
4145
},
4246
{

src/__fixtures__/Events/03-lowercase_tags.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
},
1616
{
1717
"event": "attribute",
18+
"startIndex": 4,
19+
"endIndex": 14,
1820
"data": ["class", "test", null]
1921
},
2022
{
@@ -30,6 +32,8 @@
3032
},
3133
{
3234
"event": "text",
35+
"startIndex": 15,
36+
"endIndex": 19,
3337
"data": ["adsf"]
3438
},
3539
{

src/__fixtures__/Events/04-cdata.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
},
2828
{
2929
"event": "text",
30+
"startIndex": 5,
31+
"endIndex": 41,
3032
"data": [" asdf ><asdf></adsf><> fo"]
3133
},
3234
{

src/__fixtures__/Events/05-cdata-special.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
},
1717
{
1818
"event": "text",
19+
"startIndex": 8,
20+
"endIndex": 53,
1921
"data": ["/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"]
2022
},
2123
{

src/__fixtures__/Events/06-leading-lt.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
"expected": [
55
{
66
"event": "text",
7+
"startIndex": 0,
8+
"endIndex": 3,
79
"data": [">a>"]
810
}
911
]

0 commit comments

Comments
 (0)