Skip to content

Commit ef1bd28

Browse files
committed
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better.
1 parent dd73b0a commit ef1bd28

14 files changed

+458
-346
lines changed

.eslintrc.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@
223223
"unicorn/no-new-array": "off",
224224
"unicorn/no-null": "off",
225225
"unicorn/no-unsafe-regex": "off",
226+
"unicorn/no-useless-undefined": "off",
226227
"unicorn/prefer-at": "off",
227228
"unicorn/prefer-module": "off",
228229
"unicorn/prefer-string-replace-all": "off",

demo/markdownlint-browser.js

Lines changed: 200 additions & 175 deletions
Large diffs are not rendered by default.

doc-build/md051.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ letters, numbers, `-`, and `_`):
2626
[Link](#custom-name)
2727
```
2828

29-
Alternatively, an HTML `a` tag with an `id` or a `name` attribute can be used to
30-
define a fragment:
29+
Alternatively, any HTML tag with an `id` attribute or an `a` tag with a `name`
30+
attribute can be used to define a fragment:
3131

3232
```markdown
3333
<a id="bookmark"></a>

doc/Rules.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2176,8 +2176,8 @@ letters, numbers, `-`, and `_`):
21762176
[Link](#custom-name)
21772177
```
21782178

2179-
Alternatively, an HTML `a` tag with an `id` or a `name` attribute can be used to
2180-
define a fragment:
2179+
Alternatively, any HTML tag with an `id` attribute or an `a` tag with a `name`
2180+
attribute can be used to define a fragment:
21812181

21822182
```markdown
21832183
<a id="bookmark"></a>

doc/md051.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ letters, numbers, `-`, and `_`):
3434
[Link](#custom-name)
3535
```
3636

37-
Alternatively, an HTML `a` tag with an `id` or a `name` attribute can be used to
38-
define a fragment:
37+
Alternatively, any HTML tag with an `id` attribute or an `a` tag with a `name`
38+
attribute can be used to define a fragment:
3939

4040
```markdown
4141
<a id="bookmark"></a>

helpers/helpers.js

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ const inlineCommentStartRe =
1818
/(<!--\s*markdownlint-(disable|enable|capture|restore|disable-file|enable-file|disable-line|disable-next-line|configure-file))(?:\s|-->)/gi;
1919
module.exports.inlineCommentStartRe = inlineCommentStartRe;
2020

21-
// Regular expression for matching HTML elements
22-
const htmlElementRe = /<(([A-Za-z][A-Za-z\d-]*)(?:\s[^`>]*)?)\/?>/g;
23-
module.exports.htmlElementRe = htmlElementRe;
24-
2521
// Regular expressions for range matching
2622
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
2723
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
@@ -444,23 +440,6 @@ module.exports.flattenLists = function flattenLists(tokens) {
444440
return flattenedLists;
445441
};
446442

447-
/**
448-
* Calls the provided function for each specified inline child token.
449-
*
450-
* @param {Object} params RuleParams instance.
451-
* @param {string} type Token type identifier.
452-
* @param {Function} handler Callback function.
453-
* @returns {void}
454-
*/
455-
function forEachInlineChild(params, type, handler) {
456-
filterTokens(params, "inline", (token) => {
457-
for (const child of token.children.filter((c) => c.type === type)) {
458-
handler(child, token);
459-
}
460-
});
461-
}
462-
module.exports.forEachInlineChild = forEachInlineChild;
463-
464443
// Calls the provided function for each heading's content
465444
module.exports.forEachHeading = function forEachHeading(params, handler) {
466445
let heading = null;

helpers/micromark.cjs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,30 @@ function filterByTypes(tokens, allowed) {
204204
);
205205
}
206206

207+
/**
208+
* Filter a list of Micromark tokens for HTML tokens.
209+
*
210+
* @param {Token[]} tokens Micromark tokens.
211+
* @returns {Token[]} Filtered tokens.
212+
*/
213+
function filterByHtmlTokens(tokens) {
214+
const result = [];
215+
const pending = [ tokens ];
216+
let current = null;
217+
while ((current = pending.shift())) {
218+
for (const token of filterByTypes(current, [ "htmlFlow", "htmlText" ])) {
219+
if (token.type === "htmlText") {
220+
result.push(token);
221+
} else {
222+
// token.type === "htmlFlow"
223+
// @ts-ignore
224+
pending.push(token.htmlFlowChildren);
225+
}
226+
}
227+
}
228+
return result;
229+
}
230+
207231
/**
208232
* Returns a list of all nested child tokens.
209233
*
@@ -293,6 +317,7 @@ function tokenIfType(token, type) {
293317

294318
module.exports = {
295319
"parse": micromarkParse,
320+
filterByHtmlTokens,
296321
filterByPredicate,
297322
filterByTypes,
298323
flattenedChildren,

lib/md033.js

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"use strict";
44

55
const { addError } = require("../helpers");
6-
const { filterByTypes, getHtmlTagInfo } = require("../helpers/micromark.cjs");
6+
const { filterByHtmlTokens, getHtmlTagInfo } =
7+
require("../helpers/micromark.cjs");
78

89
const nextLinesRe = /[\r\n][\s\S]*$/;
910

@@ -15,34 +16,24 @@ module.exports = {
1516
let allowedElements = params.config.allowed_elements;
1617
allowedElements = Array.isArray(allowedElements) ? allowedElements : [];
1718
allowedElements = allowedElements.map((element) => element.toLowerCase());
18-
const pending = [ params.parsers.micromark.tokens ];
19-
let current = null;
20-
while ((current = pending.shift())) {
21-
const tokens = current;
22-
for (const token of filterByTypes(tokens, [ "htmlFlow", "htmlText" ])) {
23-
if (token.type === "htmlText") {
24-
const htmlTagInfo = getHtmlTagInfo(token);
25-
if (
26-
htmlTagInfo &&
27-
!htmlTagInfo.close &&
28-
!allowedElements.includes(htmlTagInfo.name.toLowerCase())
29-
) {
30-
const range = [
31-
token.startColumn,
32-
token.text.replace(nextLinesRe, "").length
33-
];
34-
addError(
35-
onError,
36-
token.startLine,
37-
"Element: " + htmlTagInfo.name,
38-
undefined,
39-
range
40-
);
41-
}
42-
} else {
43-
// token.type === "htmlFlow"
44-
pending.push(token.htmlFlowChildren);
45-
}
19+
for (const token of filterByHtmlTokens(params.parsers.micromark.tokens)) {
20+
const htmlTagInfo = getHtmlTagInfo(token);
21+
if (
22+
htmlTagInfo &&
23+
!htmlTagInfo.close &&
24+
!allowedElements.includes(htmlTagInfo.name.toLowerCase())
25+
) {
26+
const range = [
27+
token.startColumn,
28+
token.text.replace(nextLinesRe, "").length
29+
];
30+
addError(
31+
onError,
32+
token.startLine,
33+
"Element: " + htmlTagInfo.name,
34+
undefined,
35+
range
36+
);
4637
}
4738
}
4839
}

lib/md051.js

Lines changed: 80 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
"use strict";
44

5-
const { addError, addErrorDetailIf, escapeForRegExp, filterTokens,
6-
forEachInlineChild, forEachHeading, htmlElementRe } = require("../helpers");
5+
const { addError, addErrorDetailIf } = require("../helpers");
6+
const { filterByHtmlTokens, filterByTypes, getHtmlTagInfo } =
7+
require("../helpers/micromark.cjs");
78

89
// Regular expression for identifying HTML anchor names
910
const idRe = /\sid\s*=\s*['"]?([^'"\s>]+)/iu;
@@ -14,14 +15,14 @@ const anchorRe = /\{(#[a-z\d]+(?:[-_][a-z\d]+)*)\}/gu;
1415
* Converts a Markdown heading into an HTML fragment according to the rules
1516
* used by GitHub.
1617
*
17-
* @param {Object} inline Inline token for heading.
18+
* @param {Object} headingText Heading text token.
1819
* @returns {string} Fragment string for heading.
1920
*/
20-
function convertHeadingToHTMLFragment(inline) {
21-
const inlineText = inline.children
22-
.filter((token) => token.type !== "html_inline")
23-
.map((token) => token.content)
24-
.join("");
21+
function convertHeadingToHTMLFragment(headingText) {
22+
const inlineText =
23+
filterByTypes(headingText.children, [ "codeTextData", "data" ])
24+
.map((token) => token.text)
25+
.join("");
2526
return "#" + encodeURIComponent(
2627
inlineText
2728
.toLowerCase()
@@ -42,86 +43,96 @@ module.exports = {
4243
"description": "Link fragments should be valid",
4344
"tags": [ "links" ],
4445
"function": function MD051(params, onError) {
46+
const { tokens } = params.parsers.micromark;
4547
const fragments = new Map();
48+
4649
// Process headings
47-
forEachHeading(params, (heading, content, inline) => {
48-
const fragment = convertHeadingToHTMLFragment(inline);
50+
const headingTexts = filterByTypes(
51+
tokens,
52+
[ "atxHeadingText", "setextHeadingText" ]
53+
);
54+
for (const headingText of headingTexts) {
55+
const fragment = convertHeadingToHTMLFragment(headingText);
4956
const count = fragments.get(fragment) || 0;
5057
if (count) {
5158
fragments.set(`${fragment}-${count}`, 0);
5259
}
5360
fragments.set(fragment, count + 1);
5461
let match = null;
55-
while ((match = anchorRe.exec(content)) !== null) {
62+
while ((match = anchorRe.exec(headingText.text)) !== null) {
5663
const [ , anchor ] = match;
5764
if (!fragments.has(anchor)) {
5865
fragments.set(anchor, 1);
5966
}
6067
}
61-
});
68+
}
69+
6270
// Process HTML anchors
63-
const processHtmlToken = (token) => {
64-
let match = null;
65-
while ((match = htmlElementRe.exec(token.content)) !== null) {
66-
const [ tag, , element ] = match;
67-
const anchorMatch = idRe.exec(tag) ||
68-
(element.toLowerCase() === "a" && nameRe.exec(tag));
71+
for (const token of filterByHtmlTokens(tokens)) {
72+
const htmlTagInfo = getHtmlTagInfo(token);
73+
if (htmlTagInfo && !htmlTagInfo.close) {
74+
const anchorMatch = idRe.exec(token.text) ||
75+
(htmlTagInfo.name.toLowerCase() === "a" && nameRe.exec(token.text));
6976
if (anchorMatch) {
7077
fragments.set(`#${anchorMatch[1]}`, 0);
7178
}
7279
}
73-
};
74-
filterTokens(params, "html_block", processHtmlToken);
75-
forEachInlineChild(params, "html_inline", processHtmlToken);
76-
// Process link fragments
77-
forEachInlineChild(params, "link_open", (token) => {
78-
const { attrs, lineNumber, line } = token;
79-
const href = attrs.find((attr) => attr[0] === "href");
80-
const id = href && href[1];
81-
if (id && (id.length > 1) && (id[0] === "#") && !fragments.has(id)) {
82-
let context = id;
83-
let range = null;
84-
let fixInfo = null;
85-
const match = line.match(
86-
new RegExp(`\\[.*?\\]\\(${escapeForRegExp(context)}\\)`)
87-
);
88-
if (match) {
89-
[ context ] = match;
90-
const index = match.index;
91-
const length = context.length;
92-
range = [ index + 1, length ];
93-
fixInfo = {
94-
"editColumn": index + (length - id.length),
95-
"deleteCount": id.length,
96-
"insertText": null
97-
};
98-
}
99-
const idLower = id.toLowerCase();
100-
const mixedCaseKey = [ ...fragments.keys() ]
101-
.find((key) => idLower === key.toLowerCase());
102-
if (mixedCaseKey) {
103-
(fixInfo || {}).insertText = mixedCaseKey;
104-
addErrorDetailIf(
105-
onError,
106-
lineNumber,
107-
mixedCaseKey,
108-
id,
109-
undefined,
110-
context,
111-
range,
112-
fixInfo
113-
);
114-
} else {
115-
addError(
116-
onError,
117-
lineNumber,
118-
undefined,
119-
context,
120-
// @ts-ignore
121-
range
122-
);
80+
}
81+
82+
// Process link and definition fragments
83+
const parentChilds = [
84+
[ "link", "resourceDestinationString" ],
85+
[ "definition", "definitionDestinationString" ]
86+
];
87+
for (const [ parentType, definitionType ] of parentChilds) {
88+
const links = filterByTypes(tokens, [ parentType ]);
89+
for (const link of links) {
90+
const definitions = filterByTypes(link.children, [ definitionType ]);
91+
for (const definition of definitions) {
92+
if (
93+
(definition.text.length > 1) &&
94+
definition.text.startsWith("#") &&
95+
!fragments.has(definition.text)
96+
) {
97+
// eslint-disable-next-line no-undef-init
98+
let range = undefined;
99+
// eslint-disable-next-line no-undef-init
100+
let fixInfo = undefined;
101+
if (link.startLine === link.endLine) {
102+
range = [ link.startColumn, link.endColumn - link.startColumn ];
103+
fixInfo = {
104+
"editColumn": definition.startColumn,
105+
"deleteCount": definition.endColumn - definition.startColumn
106+
};
107+
}
108+
const definitionTextLower = definition.text.toLowerCase();
109+
const mixedCaseKey = [ ...fragments.keys() ]
110+
.find((key) => definitionTextLower === key.toLowerCase());
111+
if (mixedCaseKey) {
112+
// @ts-ignore
113+
(fixInfo || {}).insertText = mixedCaseKey;
114+
addErrorDetailIf(
115+
onError,
116+
link.startLine,
117+
mixedCaseKey,
118+
definition.text,
119+
undefined,
120+
link.text,
121+
range,
122+
fixInfo
123+
);
124+
} else {
125+
addError(
126+
onError,
127+
link.startLine,
128+
undefined,
129+
link.text,
130+
range
131+
);
132+
}
133+
}
123134
}
124135
}
125-
});
136+
}
126137
}
127138
};

0 commit comments

Comments
 (0)