diff --git a/src/generators/metadata/constants.mjs b/src/generators/metadata/constants.mjs index e8ac140b..93bc6765 100644 --- a/src/generators/metadata/constants.mjs +++ b/src/generators/metadata/constants.mjs @@ -1,3 +1,7 @@ +// These openers/closers are used to determine if a type string is well-formed +export const TYPE_OPENERS = new Set(['<', '(', '{', '[']); +export const TYPE_CLOSERS = new Set(['>', ')', '}', ']']); + // On "About this Documentation", we define the stability indices, and thus // we don't need to check it for stability references export const IGNORE_STABILITY_STEMS = ['documentation']; @@ -56,8 +60,5 @@ export const DOC_API_HEADING_TYPES = [ }, ]; -// This regex is used to match basic TypeScript generic types (e.g., Promise) -export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/; - // This is the base URL of the Man7 documentation export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man'; diff --git a/src/generators/metadata/utils/__tests__/transformers.test.mjs b/src/generators/metadata/utils/__tests__/transformers.test.mjs index ef625d4c..cbb5836e 100644 --- a/src/generators/metadata/utils/__tests__/transformers.test.mjs +++ b/src/generators/metadata/utils/__tests__/transformers.test.mjs @@ -75,4 +75,41 @@ describe('transformTypeToReferenceLink', () => { '[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)> & [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>' ); }); + + it('should transform a function returning a Generic type', () => { + strictEqual( + transformTypeToReferenceLink('(err: Error) => Promise', {}), + '(err: [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)>' + ); + }); + + it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => { + strictEqual( + transformTypeToReferenceLink('string | number & boolean', {}), + '[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)' + ); + }); + + it('should handle extreme nested combinations of functions, arrays, generics, unions, and intersections', () => { + const input = + '(str: string[]) => Promise, Map>'; + + const expected = + '(str: [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)[]) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)>, [``](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)<[``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)>>'; + + strictEqual(transformTypeToReferenceLink(input, {}), expected); + }); + + it('should parse functions with array destructuring in callbacks returning functions with object destructuring', () => { + const input = + '(cb: ([first, second]: string[]) => void) => ({ id, name }: User) => boolean'; + + const expected = + '(cb: ([first, second]: [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)[]) => ``) => ({ id, name }: [``](userLink)) => [``](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)'; + + strictEqual( + transformTypeToReferenceLink(input, { User: 'userLink' }), + expected + ); + }); }); diff --git a/src/generators/metadata/utils/transformers.mjs b/src/generators/metadata/utils/transformers.mjs index 7fac4e85..dd6de8b9 100644 --- a/src/generators/metadata/utils/transformers.mjs +++ b/src/generators/metadata/utils/transformers.mjs @@ -1,9 +1,6 @@ -import { - DOC_MAN_BASE_URL, - DOC_API_HEADING_TYPES, - TYPE_GENERIC_REGEX, -} from '../constants.mjs'; +import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs'; import { slug } from './slugger.mjs'; +import { parseType } from './typeParser.mjs'; import { transformNodesToString } from '../../../utils/unist.mjs'; import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' }; import MDN_TYPE_MAP from '../maps/mdn.json' with { type: 'json' }; @@ -22,84 +19,7 @@ export const transformUnixManualToLink = ( ) => { return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`; }; -/** - * Safely splits the string by `|` or `&` at the top level (ignoring those - * inside `< >`), and returns both the pieces and the separator used. - * - * @param {string} str The type string to split - * @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `) - */ -const splitByOuterSeparator = str => { - const pieces = []; - let current = ''; - let depth = 0; - let separator; - - for (const char of str) { - if (char === '<') { - depth++; - } else if (char === '>') { - depth--; - } else if ((char === '|' || char === '&') && depth === 0) { - pieces.push(current); - current = ''; - separator ??= ` ${char} `; - continue; - } - current += char; - } - - pieces.push(current); - return { pieces, separator }; -}; - -/** - * Attempts to parse and format a basic Generic type (e.g., Promise). - * It also supports union and multi-parameter types within the generic brackets. - * - * @param {string} typePiece The plain type piece to be evaluated - * @param {Function} transformType The function used to resolve individual types into links - * @returns {string|null} The formatted Markdown link, or null if no match is found - */ -const formatBasicGeneric = (typePiece, transformType) => { - const genericMatch = typePiece.match(TYPE_GENERIC_REGEX); - - if (genericMatch) { - const baseType = genericMatch[1].trim(); - const innerType = genericMatch[2].trim(); - - const baseResult = transformType(baseType.replace(/\[\]$/, '')); - const baseFormatted = baseResult - ? `[\`<${baseType}>\`](${baseResult})` - : `\`<${baseType}>\``; - // Split while capturing delimiters (| or ,) to preserve original syntax - const parts = innerType.split(/([|,])/); - - const innerFormatted = parts - .map(part => { - const trimmed = part.trim(); - // If it is a delimiter, return it as is - if (trimmed === '|') { - return ' | '; - } - - if (trimmed === ',') { - return ', '; - } - - const innerRes = transformType(trimmed.replace(/\[\]$/, '')); - return innerRes - ? `[\`<${trimmed}>\`](${innerRes})` - : `\`<${trimmed}>\``; - }) - .join(''); - - return `${baseFormatted}<${innerFormatted}>`; - } - - return null; -}; /** * This method replaces plain text Types within the Markdown content into Markdown links * that link to the actual relevant reference for such type (either internal or external link) @@ -111,7 +31,10 @@ const formatBasicGeneric = (typePiece, transformType) => { export const transformTypeToReferenceLink = (type, record) => { // Removes the wrapping curly braces that wrap the type references // We keep the angle brackets `<>` intact here to parse Generics later - const typeInput = type.replace(/[{}]/g, ''); + const typeInput = type + .trim() + .replace(/^\{(.*)\}$/, '$1') + .trim(); /** * Handles the mapping (if there's a match) of the input text @@ -150,32 +73,7 @@ export const transformTypeToReferenceLink = (type, record) => { return ''; }; - const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput); - - const typePieces = outerPieces.map(piece => { - // This is the content to render as the text of the Markdown link - const trimmedPiece = piece.trim(); - - // 1. Attempt to format as a basic Generic type first - const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType); - if (genericMarkdown) { - return genericMarkdown; - } - - // 2. Fallback to the logic for plain types - // This is what we will compare against the API types mappings - // The ReGeX below is used to remove `[]` from the end of the type - const result = transformType(trimmedPiece.replace(/\[\]$/, '')); - - // If we have a valid result and the piece is not empty, we return the Markdown link - if (trimmedPiece.length && result.length) { - return `[\`<${trimmedPiece}>\`](${result})`; - } - }); - - // Filter out pieces that we failed to map and then join the valid ones - // using the same separator that appeared in the original type string - const markdownLinks = typePieces.filter(Boolean).join(separator); + const markdownLinks = parseType(typeInput, transformType); // Return the replaced links or the original content if they all failed to be replaced // Note that if some failed to get replaced, only the valid ones will be returned diff --git a/src/generators/metadata/utils/typeParser.mjs b/src/generators/metadata/utils/typeParser.mjs new file mode 100644 index 00000000..5d977379 --- /dev/null +++ b/src/generators/metadata/utils/typeParser.mjs @@ -0,0 +1,236 @@ +import { TYPE_OPENERS, TYPE_CLOSERS } from '../constants.mjs'; + +/** True when the `>` at `i` is the tail of `=>` and shouldn't pop depth. */ +const isArrowTail = (str, i) => str[i] === '>' && str[i - 1] === '='; + +/** + * Walks `str` once, invoking `onToken(i, char)` for each character that + * sits at depth 0. `onToken` may return: + * - a number: advance the cursor by that many extra positions + * - `true`: stop iteration altogether + */ +const walkAtDepthZero = (str, onToken) => { + let depth = 0; + for (let i = 0; i < str.length; i++) { + const char = str[i]; + if (TYPE_OPENERS.has(char)) { + depth++; + } else if (TYPE_CLOSERS.has(char) && !isArrowTail(str, i)) { + depth--; + } + + if (depth === 0) { + const skip = onToken(i, char); + if (skip === true) { + return; + } + if (typeof skip === 'number') { + i += skip; + } + } + } +}; + +/** Format a known type as a Markdown link, or as a bare code span. */ +const formatType = (name, transformType) => { + const url = transformType(name); + return url ? `[\`<${name}>\`](${url})` : `\`<${name}>\``; +}; + +/** Resolve a sub-expression recursively, falling back to a code span. */ +const resolveOr = (part, transformType) => + parseType(part, transformType) || `\`<${part.trim()}>\``; + +/** + * Splits `str` by `separator` at depth 0. `separator` is a single + * character or the two-char string '=>'. + */ +const splitByOuterSeparator = (str, separator) => { + const isArrow = separator === '=>'; + const pieces = []; + let start = 0; + + walkAtDepthZero(str, (i, char) => { + const matches = isArrow + ? char === '=' && str[i + 1] === '>' + : char === separator; + if (!matches) { + return; + } + pieces.push(str.slice(start, i).trim()); + start = i + (isArrow ? 2 : 1); + if (isArrow) { + return 1; + } // skip the '>' + }); + + pieces.push(str.slice(start).trim()); + return pieces; +}; + +/** + * Strips redundant outer parens like `((A | B))` → `A | B`, while + * leaving `(A) | (B)` alone. + */ +const stripOuterParentheses = typeString => { + let s = typeString.trim(); + while (s.length >= 2 && s.startsWith('(') && s.endsWith(')')) { + // The outer `(` matches the outer `)` if depth doesn't hit 0 + // anywhere before the final character. + let wrapsWhole = true; + walkAtDepthZero(s.slice(0, -1), i => { + if (i > 0) { + wrapsWhole = false; + return true; // stop early + } + }); + if (!wrapsWhole) { + break; + } + s = s.slice(1, -1).trim(); + } + return s; +}; + +/** + * Finds the lowest-precedence top-level operator: `=>` beats `|` beats + * `&`. + */ +const findTopLevelOperator = str => { + let arrowIdx = -1; + let unionIdx = -1; + let intersectIdx = -1; + + walkAtDepthZero(str, (i, char) => { + if (char === '=' && str[i + 1] === '>') { + if (arrowIdx === -1) { + arrowIdx = i; + } + return 1; // skip '>' + } + if (char === '|' && unionIdx === -1) { + unionIdx = i; + } else if (char === '&' && intersectIdx === -1) { + intersectIdx = i; + } + }); + + if (arrowIdx !== -1) { + return { op: '=>', index: arrowIdx, width: 2 }; + } + + if (unionIdx !== -1) { + return { op: '|', index: unionIdx, width: 1 }; + } + + if (intersectIdx !== -1) { + return { op: '&', index: intersectIdx, width: 1 }; + } + + return null; +}; + +/** + * Parses the left side of an arrow function (e.g. `(a: string, b: number)` + * or `(x: T)`). Locates the parameter list as the last `(` that opens + * at depth 0. + */ +const parseFunctionSignature = (signature, transformType) => { + const trimmed = signature.trim(); + if (!trimmed.endsWith(')')) { + return signature; + } + + // Find the `(` that opens the outermost group ending at the final `)`. + let depth = 0; + let openIdx = -1; + for (let i = 0; i < trimmed.length; i++) { + const char = trimmed[i]; + if (depth === 0 && char === '(') { + openIdx = i; + } + if (TYPE_OPENERS.has(char)) { + depth++; + } else if (TYPE_CLOSERS.has(char) && !isArrowTail(trimmed, i)) { + depth--; + } + } + if (openIdx === -1) { + return signature; + } + + const prefix = trimmed.slice(0, openIdx); + const paramsString = trimmed.slice(openIdx + 1, -1); + if (!paramsString.trim()) { + return `${prefix}()`; + } + + const parsedArgs = splitByOuterSeparator(paramsString, ',').map(arg => { + const colonParts = splitByOuterSeparator(arg, ':'); + if (colonParts.length > 1) { + const paramName = colonParts[0]; + const paramType = colonParts.slice(1).join(':'); + return `${paramName}: ${resolveOr(paramType, transformType)}`; + } + return parseType(arg, transformType) || arg; + }); + + return `${prefix}(${parsedArgs.join(', ')})`; +}; + +/** + * Recursively parses TypeScript types into Markdown links. + * + * @param {string} typeString The type string to evaluate. + * @param {(name: string) => string | null | undefined} transformType Resolves a bare type name to a URL, or returns falsy. + * @returns {string | null} Markdown for the type, or null when the base type doesn't resolve. + */ +export const parseType = (typeString, transformType) => { + const trimmed = stripOuterParentheses(typeString); + if (!trimmed) { + return null; + } + + const op = findTopLevelOperator(trimmed); + if (op) { + if (op.op === '=>') { + const left = trimmed.slice(0, op.index).trim(); + const right = trimmed.slice(op.index + op.width).trim(); + const sig = parseFunctionSignature(left, transformType); + return `${sig} => ${resolveOr(right, transformType)}`; + } + + // Union / intersection + const parts = splitByOuterSeparator(trimmed, op.op); + const joiner = op.op === '|' ? ' | ' : ' & '; + return parts.map(p => resolveOr(p, transformType)).join(joiner); + } + + // Strip a trailing `[]` for now; reapply on the way out. + const isArray = trimmed.endsWith('[]'); + const core = isArray ? trimmed.slice(0, -2).trim() : trimmed; + const arrayTail = isArray ? '[]' : ''; + + // Generic: `Base<...>`. + const ltIdx = core.indexOf('<'); + if (ltIdx !== -1 && core.endsWith('>')) { + const baseType = core.slice(0, ltIdx).trim(); + const innerType = core.slice(ltIdx + 1, -1).trim(); + const inner = splitByOuterSeparator(innerType, ',') + .map(arg => resolveOr(arg, transformType)) + .join(', '); + return `${formatType(baseType, transformType)}<${inner}>${arrayTail}`; + } + + // Plain base type. + if (!core.length) { + return null; + } + + const url = transformType(core); + if (!url) { + return null; + } + + return `[\`<${core}>\`](${url})${arrayTail}`; +};