diff --git a/CHANGELOG.md b/CHANGELOG.md index 596f9309b..17f1ff18b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ title: Changelog ### Bug Fixes - Attempting to highlight a supported language which is not enabled is now a warning, not an error, #2956. +- Improved compatibility with CommonMark's link parsing, #2959. ## v0.28.5 (2025-05-26) diff --git a/src/lib/converter/comments/textParser.ts b/src/lib/converter/comments/textParser.ts index 1c020a201..c2f705894 100644 --- a/src/lib/converter/comments/textParser.ts +++ b/src/lib/converter/comments/textParser.ts @@ -19,7 +19,6 @@ interface TextParserData { sourcePath: NormalizedPath; token: Token; pos: number; - i18n: TranslationProxy; warning: (msg: TranslatedString, token: Token) => void; files: FileRegistry; atNewLine: boolean; @@ -41,6 +40,7 @@ interface RelativeLink { */ export class TextParserReentryState { withinLinkLabel = false; + withinLinkDest = false; private lastPartWasNewline = false; checkState(token: Token) { @@ -48,11 +48,13 @@ export class TextParserReentryState { case TokenSyntaxKind.Code: if (/\n\s*\n/.test(token.text)) { this.withinLinkLabel = false; + this.withinLinkDest = false; } break; case TokenSyntaxKind.NewLine: if (this.lastPartWasNewline) { this.withinLinkLabel = false; + this.withinLinkDest = false; } break; } @@ -76,17 +78,18 @@ export function textContent( reentry: TextParserReentryState, ) { let lastPartEnd = 0; + let canEndMarkdownLink = true; const data: TextParserData = { sourcePath, token, pos: 0, // relative to the token - i18n, warning, files: files, atNewLine, }; function addRef(ref: RelativeLink) { + canEndMarkdownLink = true; outContent.push({ kind: "text", text: token.text.slice(lastPartEnd, ref.pos), @@ -116,10 +119,15 @@ export function textContent( } while (data.pos < token.text.length) { - const link = checkMarkdownLink(data, reentry); - if (link) { - addRef(link); - continue; + if (canEndMarkdownLink) { + const link = checkMarkdownLink(data, reentry); + if (link) { + addRef(link); + continue; + } + // If we're within a Markdown link, then `checkMarkdownLink` + // already scanned `token` up to a line feed (if any). + canEndMarkdownLink = !reentry.withinLinkLabel && !reentry.withinLinkDest; } const reference = checkReference(data); @@ -134,7 +142,9 @@ export function textContent( continue; } - data.atNewLine = token.text[data.pos] === "\n"; + const atNewLine = token.text[data.pos] === "\n"; + data.atNewLine = atNewLine; + if (atNewLine && !reentry.withinLinkDest) canEndMarkdownLink = true; ++data.pos; } @@ -160,53 +170,73 @@ function checkMarkdownLink( const { token, sourcePath, files } = data; let searchStart: number; - if (reentry.withinLinkLabel) { + if (reentry.withinLinkLabel || reentry.withinLinkDest) { searchStart = data.pos; - reentry.withinLinkLabel = false; } else if (token.text[data.pos] === "[") { searchStart = data.pos + 1; } else { return; } - const labelEnd = findLabelEnd(token.text, searchStart); - if (labelEnd === -1) { - // This markdown link might be split across multiple display parts - // [ `text` ](link) - // ^^ text - // ^^^^^^ code - // ^^^^^^^^ text - reentry.withinLinkLabel = true; - return; + if (!reentry.withinLinkDest) { + const labelEnd = findLabelEnd(token.text, searchStart); + if (labelEnd === -1 || token.text[labelEnd] === "\n") { + // This markdown link might be split across multiple lines or input tokens + // [prefix `code` suffix](target) + // ........^^^^^^................ + // Unless we encounter two consecutive line feeds, expect it to keep going. + reentry.withinLinkLabel = labelEnd !== data.pos || !data.atNewLine; + return; + } + reentry.withinLinkLabel = false; + if (!token.text.startsWith("](", labelEnd)) return; + searchStart = labelEnd + 2; } - if (token.text[labelEnd] === "]" && token.text[labelEnd + 1] === "(") { - const link = MdHelpers.parseLinkDestination( - token.text, - labelEnd + 2, - token.text.length, - ); - - if (link.ok) { - // Only make a relative-link display part if it's actually a relative link. - // Discard protocol:// links, unix style absolute paths, and windows style absolute paths. - if (isRelativePath(link.str)) { - const { target, anchor } = files.register( - sourcePath, - link.str as NormalizedPath, - ) || { target: undefined, anchor: undefined }; - return { - pos: labelEnd + 2, - end: link.pos, - target, - targetAnchor: anchor, - }; - } - - // This was a link, skip ahead to ensure we don't happen to parse - // something else as a link within the link. - data.pos = link.pos - 1; + // Skip whitespace (including line breaks) between "](" and the link destination. + // https://spec.commonmark.org/0.31.2/#links + const end = token.text.length; + let lookahead = searchStart; + for (let newlines = 0;; ++lookahead) { + if (lookahead === end) { + reentry.withinLinkDest = true; + return; } + switch (token.text[lookahead]) { + case "\n": + if (++newlines === 2) { + reentry.withinLinkDest = false; + return; + } + continue; + case " ": + case "\t": + continue; + } + break; + } + reentry.withinLinkDest = false; + + const link = MdHelpers.parseLinkDestination(token.text, lookahead, end); + if (link.ok) { + // Only make a relative-link display part if it's actually a relative link. + // Discard protocol:// links, unix style absolute paths, and windows style absolute paths. + if (isRelativePath(link.str)) { + const { target, anchor } = files.register( + sourcePath, + link.str as NormalizedPath, + ) || { target: undefined, anchor: undefined }; + return { + pos: lookahead, + end: link.pos, + target, + targetAnchor: anchor, + }; + } + + // This was a link, skip ahead to ensure we don't happen to parse + // something else as a link within the link. + data.pos = link.pos - 1; } } @@ -328,6 +358,10 @@ function isRelativePath(link: string) { function findLabelEnd(text: string, pos: number) { while (pos < text.length) { switch (text[pos]) { + case "\\": + ++pos; + if (pos < text.length && text[pos] === "\n") return pos; + break; case "\n": case "]": case "[": diff --git a/src/test/comments.test.ts b/src/test/comments.test.ts index cc1d9c6f1..27d50c9d2 100644 --- a/src/test/comments.test.ts +++ b/src/test/comments.test.ts @@ -5,7 +5,7 @@ import type { CommentParserConfig } from "../lib/converter/comments/index.js"; import { lexBlockComment } from "../lib/converter/comments/blockLexer.js"; import { lexLineComments } from "../lib/converter/comments/lineLexer.js"; import { type Token, TokenSyntaxKind } from "../lib/converter/comments/lexer.js"; -import { parseComment } from "../lib/converter/comments/parser.js"; +import { parseComment, parseCommentString } from "../lib/converter/comments/parser.js"; import { lexCommentString } from "../lib/converter/comments/rawLexer.js"; import { Comment, type CommentDisplayPart, CommentTag } from "../lib/models/index.js"; import { TestLogger } from "./TestLogger.js"; @@ -13,6 +13,68 @@ import { extractTagName } from "../lib/converter/comments/tagName.js"; import { FileRegistry } from "../lib/models/FileRegistry.js"; import { dedent, MinimalSourceFile, type NormalizedPath } from "#utils"; +const CONTENT_PARTS = ["text", "`code`"]; +const SEPARATORS = [" ", "\n", "\n ", "\n\n"]; +const MAX_CONTENT_PARTS = 3; +function* generateLinkTitleCases() { + const makeCase = (linkTitle: string) => { + const ok = !linkTitle.includes("\n\n"); + const input = `[${linkTitle}](./relative.md)`; + const expect: CommentDisplayPart[] = input + .replace("(./relative.md)", "(") + .split(/(`code`)/g) + .map((text) => { + const kind = text[0] === "`" ? "code" : "text"; + return { kind, text }; + }); + if (ok) { + expect.push( + { + kind: "relative-link", + text: "./relative.md", + target: 1, + targetAnchor: undefined, + }, + { kind: "text", text: ")" }, + ); + } else { + expect[expect.length - 1].text += "./relative.md)"; + } + expect[expect.length - 1].text += "\n["; + expect.push( + { kind: "code", text: "`code`" }, + { kind: "text", text: "](" }, + { + kind: "relative-link", + text: "./relative.md", + target: 1, + targetAnchor: undefined, + }, + { kind: "text", text: ")" }, + ); + return { input: input + "\n[`code`](./relative.md)", expect }; + }; + for (let n = 1; n <= MAX_CONTENT_PARTS; n++) { + // 3 bits for each part (except the first): + // selects a part from CONTENT_PARTS + // selects a preceding separator from SEPARATORS + for (let bits = 0; bits < 2 ** (3 * n); bits += 4) { + const inner = Array.from({ length: n }, (_, i) => { + const partSelections = bits >> (3 * i); + const part = CONTENT_PARTS[partSelections & 4 ? 1 : 0]; + const sepText = SEPARATORS[partSelections & 3]; + return i === 0 ? part : `${sepText}${part}`; + }).join(""); + // We also wrap the parts with arbitrary leading and trailing whitespace + for (const prefix of ["", ...SEPARATORS]) { + for (const suffix of ["", ...SEPARATORS]) { + yield makeCase(`${prefix}${inner}${suffix}`); + } + } + } + } +} + describe("Block Comment Lexer", () => { function lex(text: string): Token[] { return Array.from(lexBlockComment(text)); @@ -1500,6 +1562,51 @@ describe("Comment Parser", () => { ); }); + it("Recognizes markdown links which contain parentheses and escapes in the label", () => { + const comment = getComment(String.raw`/** + * [(parens) \[brackets\]](./relative.md) + * + * [ + * multi-line + * \[brackets\] + * (parens) + * ]( + * ./relative.md + * ) + */`); + + const link = { + kind: "relative-link", + text: "./relative.md", + target: 1, + targetAnchor: undefined, + } as const; + + equal( + comment.summary, + [ + { kind: "text", text: String.raw`[(parens) \[brackets\]](` }, + link, + { kind: "text", text: `)\n\n[\n multi-line\n ${String.raw`\[brackets\]`}\n (parens)\n](\n ` }, + link, + { kind: "text", text: "\n )" }, + ] satisfies CommentDisplayPart[], + ); + }); + + it("Parses markdown link titles with arbitrarily-separated arbitrary combinations of text and code", () => { + const embedInComment = (input: string) => { + const lines = input.split("\n"); + const embedded = `/**\n${lines.map(line => " * " + line).join("\n")}\n */`; + return getComment(embedded); + }; + + for (const { input, expect } of generateLinkTitleCases()) { + const comment = embedInComment(input); + equal(comment.summary, expect, `input: ${JSON.stringify(input)}`); + } + }); + it("Recognizes markdown reference definition blocks", () => { const comment = getComment(`/** * [1]: ./example.md @@ -1663,6 +1770,105 @@ describe("Comment Parser", () => { }); }); +describe("Raw Comment Parser", () => { + const config: CommentParserConfig = { + blockTags: new Set([ + "@param", + "@remarks", + "@module", + "@inheritDoc", + "@defaultValue", + ]), + inlineTags: new Set(["@link"]), + modifierTags: new Set([ + "@public", + "@private", + "@protected", + "@readonly", + "@enum", + "@event", + "@packageDocumentation", + ]), + jsDocCompatibility: { + defaultTag: true, + exampleTag: true, + ignoreUnescapedBraces: false, + inheritDocTag: false, + }, + suppressCommentWarningsInDeclarationFiles: false, + useTsLinkResolution: false, + commentStyle: "jsdoc", + }; + + function getComment(text: string) { + const files = new FileRegistry(); + const logger = new TestLogger(); + const content = lexCommentString(text); + const comment = parseCommentString( + content, + config, + new MinimalSourceFile(text, "/dev/zero" as NormalizedPath), + logger, + files, + ); + logger.expectNoOtherMessages(); + return comment; + } + + it("Recognizes markdown links which contain parentheses and escapes in the label", () => { + const comment = getComment(dedent(String.raw` + [(parens) \[brackets\]](./relative.md) + + [ + multi-line + \[brackets\] + (parens) + ]( + ./relative.md + ) + `)); + + const link = { + kind: "relative-link", + text: "./relative.md", + target: 1, + targetAnchor: undefined, + } as const; + + equal( + comment.content, + [ + { kind: "text", text: String.raw`[(parens) \[brackets\]](` }, + link, + { kind: "text", text: `)\n\n[\n multi-line\n ${String.raw`\[brackets\]`}\n (parens)\n](\n ` }, + link, + { kind: "text", text: "\n )" }, + ] satisfies CommentDisplayPart[], + ); + }); + + it("Parses markdown link titles with arbitrarily-separated arbitrary combinations of text and code", () => { + for (const { input, expect } of generateLinkTitleCases()) { + const comment = getComment(input); + equal(comment.content, expect, `input: ${JSON.stringify(input)}`); + } + }); +}); + +describe("Markdown Link Title Generation", () => { + const inputs: string[] = []; + for (const { input } of generateLinkTitleCases()) { + inputs.push(input); + } + const inputsSet = new Set(inputs); + equal(inputsSet.size, inputs.length, "each generated input must be unique"); + + const expectCount = Array.from({ length: MAX_CONTENT_PARTS }, (_, i) => i + 1) + .map(n => (SEPARATORS.length * CONTENT_PARTS.length) ** n / SEPARATORS.length * (SEPARATORS.length + 1) ** 2) + .reduce((a, b) => a + b); + equal(inputsSet.size, expectCount, "generated input count"); +}); + describe("extractTagName", () => { it("Handles simple name", () => { equal(extractTagName("T - abc"), { name: "T", newText: "abc" });