From 322f4bf1ba4c8fbaa1a5c369edc07a950149f355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 14 May 2025 18:11:47 -0300 Subject: [PATCH 1/3] fix: resolve generic interface extension error --- src/types.d.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types.d.ts b/src/types.d.ts index 6519e2f..1a9e1c2 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -4,9 +4,9 @@ import type { SemVer } from 'semver'; import type { Data, Node, Parent, Position } from 'unist'; // Unist Node with typed Data, which allows better type inference -interface NodeWithData extends T { +type NodeWithData = T & { data: J; -} +}; declare global { export interface StabilityIndexMetadataEntry { From 195cc7323877993963d869a76777d45d3e90a666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Wed, 14 May 2025 19:43:14 -0300 Subject: [PATCH 2/3] refactor: create metadata generator --- bin/commands/generate.mjs | 37 +++--- bin/utils.mjs | 2 +- src/generators.mjs | 15 +-- src/generators/addon-verify/index.mjs | 2 +- src/generators/ast-js/index.mjs | 2 +- src/generators/index.mjs | 2 + src/generators/json-simple/index.mjs | 2 +- src/generators/legacy-html/index.mjs | 2 +- src/generators/legacy-json/index.mjs | 2 +- src/generators/llms-txt/index.mjs | 2 +- src/generators/man-page/index.mjs | 2 +- src/generators/metadata/index.mjs | 167 ++++++++++++++++++++++++++ src/generators/orama-db/index.mjs | 2 +- src/linter/tests/fixtures/issues.mjs | 2 - src/loaders/markdown.mjs | 4 +- src/metadata.mjs | 2 +- src/parsers/markdown.mjs | 156 ++---------------------- src/types.d.ts | 8 ++ 18 files changed, 228 insertions(+), 183 deletions(-) create mode 100644 src/generators/metadata/index.mjs diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index 6911b7d..f8d748f 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -1,6 +1,5 @@ import { cpus } from 'node:os'; import { resolve } from 'node:path'; -import process from 'node:process'; import { coerce } from 'semver'; @@ -12,7 +11,6 @@ import createGenerator from '../../src/generators.mjs'; import { publicGenerators } from '../../src/generators/index.mjs'; import createNodeReleases from '../../src/releases.mjs'; import { loadAndParse } from '../utils.mjs'; -import { runLint } from './lint.mjs'; const availableGenerators = Object.keys(publicGenerators); @@ -125,22 +123,29 @@ export default { async action(opts) { const docs = await loadAndParse(opts.input, opts.ignore); - if (!opts.skipLint && !runLint(docs)) { - console.error('Lint failed; aborting generation.'); - process.exit(1); - } + // if (!opts.skipLint && !runLint(docs)) { + // console.error('Lint failed; aborting generation.'); + // process.exit(1); + // } - const { runGenerators } = createGenerator(docs); const { getAllMajors } = createNodeReleases(opts.changelog); - await runGenerators({ - generators: opts.target, - input: opts.input, - output: opts.output && resolve(opts.output), - version: coerce(opts.version), - releases: await getAllMajors(), - gitRef: opts.gitRef, - threads: parseInt(opts.threads, 10), - }); + const releases = await getAllMajors(); + + await Promise.all( + docs.map(async doc => { + const { runGenerators } = createGenerator(doc); + + await runGenerators({ + generators: opts.target, + input: opts.input, + output: opts.output && resolve(opts.output), + version: coerce(opts.version), + releases, + gitRef: opts.gitRef, + threads: parseInt(opts.threads, 10), + }); + }) + ); }, }; diff --git a/bin/utils.mjs b/bin/utils.mjs index b32a092..54ea999 100644 --- a/bin/utils.mjs +++ b/bin/utils.mjs @@ -23,7 +23,7 @@ const parser = lazy(createMarkdownParser); * Load and parse markdown API docs. * @param {string[]} input - Glob patterns for input files. * @param {string[]} [ignore] - Glob patterns to ignore. - * @returns {Promise} - Parsed documentation objects. + * @returns {Promise[]>} */ export async function loadAndParse(input, ignore) { const files = await loader().loadFiles(input, ignore); diff --git a/src/generators.mjs b/src/generators.mjs index bd058db..2b7a304 100644 --- a/src/generators.mjs +++ b/src/generators.mjs @@ -4,11 +4,6 @@ import { allGenerators } from './generators/index.mjs'; import WorkerPool from './threading/index.mjs'; /** - * @typedef {{ ast: GeneratorMetadata}} AstGenerator The AST "generator" is a facade for the AST tree and it isn't really a generator - * @typedef {AvailableGenerators & AstGenerator} AllGenerators A complete set of the available generators, including the AST one - * @param markdownInput - * @param jsInput - * * This method creates a system that allows you to register generators * and then execute them in a specific order, keeping track of the * generation process, and handling errors that may occur from the @@ -21,10 +16,12 @@ import WorkerPool from './threading/index.mjs'; * Generators can also write to files. These would usually be considered * the final generators in the chain. * - * @param {ApiDocMetadataEntry} markdownInput The parsed API doc metadata entries - * @param {Array} parsedJsFiles + * @typedef {{ ast: GeneratorMetadata}} AstGenerator The AST "generator" is a facade for the AST tree and it isn't really a generator + * @typedef {AvailableGenerators & AstGenerator} AllGenerators A complete set of the available generators, including the AST one + * + * @param {ParserOutput} input The API doc AST tree */ -const createGenerator = markdownInput => { +const createGenerator = input => { /** * We store all the registered generators to be processed * within a Record, so we can access their results at any time whenever needed @@ -32,7 +29,7 @@ const createGenerator = markdownInput => { * * @type {{ [K in keyof AllGenerators]: ReturnType }} */ - const cachedGenerators = { ast: Promise.resolve(markdownInput) }; + const cachedGenerators = { ast: Promise.resolve(input) }; const threadPool = new WorkerPool(); diff --git a/src/generators/addon-verify/index.mjs b/src/generators/addon-verify/index.mjs index 450bf96..31b9318 100644 --- a/src/generators/addon-verify/index.mjs +++ b/src/generators/addon-verify/index.mjs @@ -30,7 +30,7 @@ export default { description: 'Generates a file list from code blocks extracted from `doc/api/addons.md` to facilitate C++ compilation and JavaScript runtime validations', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates a file list from code blocks. diff --git a/src/generators/ast-js/index.mjs b/src/generators/ast-js/index.mjs index b31cb04..1549790 100644 --- a/src/generators/ast-js/index.mjs +++ b/src/generators/ast-js/index.mjs @@ -20,7 +20,7 @@ export default { description: 'Parses Javascript source files passed into the input.', - dependsOn: 'ast', + dependsOn: 'metadata', /** * @param {Input} _ diff --git a/src/generators/index.mjs b/src/generators/index.mjs index 7a9db52..9850869 100644 --- a/src/generators/index.mjs +++ b/src/generators/index.mjs @@ -11,8 +11,10 @@ import apiLinks from './api-links/index.mjs'; import oramaDb from './orama-db/index.mjs'; import astJs from './ast-js/index.mjs'; import llmsTxt from './llms-txt/index.mjs'; +import metadata from './metadata/index.mjs'; export const publicGenerators = { + metadata: metadata, 'json-simple': jsonSimple, 'legacy-html': legacyHtml, 'legacy-html-all': legacyHtmlAll, diff --git a/src/generators/json-simple/index.mjs b/src/generators/json-simple/index.mjs index f23395c..423223f 100644 --- a/src/generators/json-simple/index.mjs +++ b/src/generators/json-simple/index.mjs @@ -26,7 +26,7 @@ export default { description: 'Generates the simple JSON version of the API docs, and returns it as a string', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates the simplified JSON version of the API docs diff --git a/src/generators/legacy-html/index.mjs b/src/generators/legacy-html/index.mjs index 77aefb6..52f8d71 100644 --- a/src/generators/legacy-html/index.mjs +++ b/src/generators/legacy-html/index.mjs @@ -41,7 +41,7 @@ export default { description: 'Generates the legacy version of the API docs in HTML, with the assets and styles included as files', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates the legacy version of the API docs in HTML diff --git a/src/generators/legacy-json/index.mjs b/src/generators/legacy-json/index.mjs index add7ff8..e26b7e7 100644 --- a/src/generators/legacy-json/index.mjs +++ b/src/generators/legacy-json/index.mjs @@ -25,7 +25,7 @@ export default { description: 'Generates the legacy version of the JSON API docs.', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates a legacy JSON file. diff --git a/src/generators/llms-txt/index.mjs b/src/generators/llms-txt/index.mjs index 997fb34..b80c234 100644 --- a/src/generators/llms-txt/index.mjs +++ b/src/generators/llms-txt/index.mjs @@ -19,7 +19,7 @@ export default { description: 'Generates a llms.txt file to provide information to LLMs at inference time', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates a llms.txt file diff --git a/src/generators/man-page/index.mjs b/src/generators/man-page/index.mjs index f0bd846..b2d0bfd 100644 --- a/src/generators/man-page/index.mjs +++ b/src/generators/man-page/index.mjs @@ -25,7 +25,7 @@ export default { description: 'Generates the Node.js man-page.', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates the Node.js man-page diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs new file mode 100644 index 0000000..0a085f7 --- /dev/null +++ b/src/generators/metadata/index.mjs @@ -0,0 +1,167 @@ +'use strict'; + +import { u as createTree } from 'unist-builder'; +import { findAfter } from 'unist-util-find-after'; +import { remove } from 'unist-util-remove'; +import { selectAll } from 'unist-util-select'; +import { SKIP, visit } from 'unist-util-visit'; + +import { createNodeSlugger } from '../../utils/slugger/index.mjs'; +import { getRemark } from '../../utils/remark.mjs'; +import createQueries from '../../utils/queries/index.mjs'; +import createMetadata from '../../metadata.mjs'; + +/** + * This generator generates a flattened list of metadata entries from a API doc + * + * @typedef {ParserOutput} Input + * + * @type {GeneratorMetadata} + */ +export default { + name: 'metadata', + + version: '1.0.0', + + description: 'generates a flattened list of API doc metadata entries', + + dependsOn: 'ast', + + /** + * @param {Input} input + * @returns {Promise} + */ + async generate({ file, tree }) { + const { + setHeadingMetadata, + addYAMLMetadata, + updateMarkdownLink, + updateTypeReference, + updateLinkReference, + addStabilityMetadata, + } = createQueries(); + + // Creates an instance of the Remark processor with GFM support + // which is used for stringifying the AST tree back to Markdown + const remarkProcessor = getRemark(); + + /** + * This holds references to all the Metadata entries for a given file + * this is used so we can traverse the AST tree and keep mutating things + * and then stringify the whole api doc file at once without creating sub traversals + * + * Then once we have the whole file parsed, we can split the resulting string into sections + * and seal the Metadata Entries (`.create()`) and return the result to the caller of parae. + * + * @type {Array} + */ + const metadataCollection = []; + + // Creates a new Slugger instance for the current API doc file + const nodeSlugger = createNodeSlugger(); + + // Get all Markdown Footnote definitions from the tree + const markdownDefinitions = selectAll('definition', tree); + + // Get all Markdown Heading entries from the tree + const headingNodes = selectAll('heading', tree); + + // Handles Markdown link references and updates them to be plain links + visit(tree, createQueries.UNIST.isLinkReference, node => + updateLinkReference(node, markdownDefinitions) + ); + + // Removes all the original definitions from the tree as they are not needed + // anymore, since all link references got updated to be plain links + remove(tree, markdownDefinitions); + + // Handles the normalisation URLs that reference to API doc files with .md extension + // to replace the .md into .html, since the API doc files get eventually compiled as HTML + visit(tree, createQueries.UNIST.isMarkdownUrl, node => + updateMarkdownLink(node) + ); + + // If the document has no headings but it has content, we add a fake heading to the top + // so that our parsing logic can work correctly, and generate content for the whole file + if (headingNodes.length === 0 && tree.children.length > 0) { + tree.children.unshift(createTree('heading', { depth: 1 }, [])); + } + + // Handles iterating the tree and creating subtrees for each API doc entry + // where an API doc entry is defined by a Heading Node + // (so all elements after a Heading until the next Heading) + // and then it creates and updates a Metadata entry for each API doc entry + // and then generates the final content for each API doc entry and pushes it to the collection + visit(tree, createQueries.UNIST.isHeading, (headingNode, index) => { + // Creates a new Metadata entry for the current API doc file + const apiEntryMetadata = createMetadata(nodeSlugger); + + // Adds the Metadata of the current Heading Node to the Metadata entry + setHeadingMetadata(headingNode, apiEntryMetadata); + + // We retrieve the immediate next Heading if it exists + // This is used for ensuring that we don't include items that would + // belong only to the next heading to the current Heading metadata + // Note that if there is no next heading, we use the current node as the next one + const nextHeadingNode = + findAfter(tree, index, createQueries.UNIST.isHeading) ?? headingNode; + + // This is the cutover index of the subtree that we should get + // of all the Nodes within the AST tree that belong to this section + // If `next` is equals the current heading, it means there's no next heading + // and we are reaching the end of the document, hence the cutover should be the end of + // the document itself. + const stop = + headingNode === nextHeadingNode + ? tree.children.length + : tree.children.indexOf(nextHeadingNode); + + // Retrieves all the nodes that should belong to the current API docs section + // `index + 1` is used to skip the current Heading Node + const subTree = createTree('root', tree.children.slice(index, stop)); + + // Visits all Stability Index nodes from the current subtree if there's any + // and then apply the Stability Index metadata to the current metadata entry + visit(subTree, createQueries.UNIST.isStabilityNode, node => + addStabilityMetadata(node, apiEntryMetadata) + ); + + // Visits all HTML nodes from the current subtree and if there's any that matches + // our YAML metadata structure, it transforms into YAML metadata + // and then apply the YAML Metadata to the current Metadata entry + visit(subTree, createQueries.UNIST.isYamlNode, node => { + // TODO: Is there always only one YAML node? + apiEntryMetadata.setYamlPosition(node.position); + addYAMLMetadata(node, apiEntryMetadata); + }); + + // Visits all Text nodes from the current subtree and if there's any that matches + // any API doc type reference and then updates the type reference to be a Markdown link + visit(subTree, createQueries.UNIST.isTextWithType, (node, _, parent) => + updateTypeReference(node, parent) + ); + + // Removes already parsed items from the subtree so that they aren't included in the final content + remove(subTree, [createQueries.UNIST.isYamlNode]); + + // Applies the AST transformations to the subtree based on the API doc entry Metadata + // Note that running the transformation on the subtree isn't costly as it is a reduced tree + // and the GFM transformations aren't that heavy + const parsedSubTree = remarkProcessor.runSync(subTree); + + // We seal and create the API doc entry Metadata and push them to the collection + const parsedApiEntryMetadata = apiEntryMetadata.create( + file, + parsedSubTree + ); + + // We push the parsed API doc entry Metadata to the collection + metadataCollection.push(parsedApiEntryMetadata); + + return SKIP; + }); + + // Returns the Metadata entries for the given API doc file + return metadataCollection; + }, +}; diff --git a/src/generators/orama-db/index.mjs b/src/generators/orama-db/index.mjs index c2210c6..bd2447a 100644 --- a/src/generators/orama-db/index.mjs +++ b/src/generators/orama-db/index.mjs @@ -21,7 +21,7 @@ export default { description: 'Generates the Orama database for the API docs.', - dependsOn: 'ast', + dependsOn: 'metadata', /** * Generates the Orama database. diff --git a/src/linter/tests/fixtures/issues.mjs b/src/linter/tests/fixtures/issues.mjs index 1546e8b..b52678b 100644 --- a/src/linter/tests/fixtures/issues.mjs +++ b/src/linter/tests/fixtures/issues.mjs @@ -1,5 +1,3 @@ -// @ts-check - /** * @type {import('../../types').LintIssue} */ diff --git a/src/loaders/markdown.mjs b/src/loaders/markdown.mjs index ed1073c..fbaecf9 100644 --- a/src/loaders/markdown.mjs +++ b/src/loaders/markdown.mjs @@ -15,8 +15,8 @@ const createLoader = () => { /** * Loads API Doc files and transforms it into VFiles * - * @param {string} searchPath A glob/path for API docs to be loaded - * @param {string | undefined} ignorePath A glob/path of files to ignore + * @param {string[]} searchPath A glob/path for API docs to be loaded + * @param {string[] | undefined} ignorePath A glob/path of files to ignore * The input string can be a simple path (relative or absolute) * The input string can also be any allowed glob string * diff --git a/src/metadata.mjs b/src/metadata.mjs index 6b65df9..cde808f 100644 --- a/src/metadata.mjs +++ b/src/metadata.mjs @@ -109,7 +109,7 @@ const createMetadata = slugger => { * The Navigation entries has a dedicated separate method for retrieval * as it can be manipulated outside of the scope of the generation of the content * - * @param {import('vfile').VFile} apiDoc The API doc file being parsed + * @param {{stem?: string, basename?: string}} apiDoc The API doc file being parsed * @param {ApiDocMetadataEntry['content']} section An AST tree containing the Nodes of the API doc entry section * @returns {ApiDocMetadataEntry} The locally created Metadata entries */ diff --git a/src/parsers/markdown.mjs b/src/parsers/markdown.mjs index 3617bac..4df9d4c 100644 --- a/src/parsers/markdown.mjs +++ b/src/parsers/markdown.mjs @@ -1,58 +1,25 @@ 'use strict'; -import { u as createTree } from 'unist-builder'; -import { findAfter } from 'unist-util-find-after'; -import { remove } from 'unist-util-remove'; -import { selectAll } from 'unist-util-select'; -import { SKIP, visit } from 'unist-util-visit'; - -import createMetadata from '../metadata.mjs'; import createQueries from '../utils/queries/index.mjs'; import { getRemark } from '../utils/remark.mjs'; -import { createNodeSlugger } from '../utils/slugger/index.mjs'; /** * Creates an API doc parser for a given Markdown API doc file - * - * @param {import('./linter/index.mjs').Linter | undefined} linter */ const createParser = () => { // Creates an instance of the Remark processor with GFM support - // which is used for stringifying the AST tree back to Markdown const remarkProcessor = getRemark(); - const { - setHeadingMetadata, - addYAMLMetadata, - updateMarkdownLink, - updateTypeReference, - updateLinkReference, - addStabilityMetadata, - updateStabilityPrefixToLink, - } = createQueries(); + const { updateStabilityPrefixToLink } = createQueries(); /** - * Parses a given API doc metadata file into a list of Metadata entries + * Parses a given API doc file into a AST tree * * @param {import('vfile').VFile | Promise} apiDoc + * @returns {Promise>} */ const parseApiDoc = async apiDoc => { - /** - * This holds references to all the Metadata entries for a given file - * this is used so we can traverse the AST tree and keep mutating things - * and then stringify the whole api doc file at once without creating sub traversals - * - * Then once we have the whole file parsed, we can split the resulting string into sections - * and seal the Metadata Entries (`.create()`) and return the result to the caller of parae. - * - * @type {Array} - */ - const metadataCollection = []; - - // Creates a new Slugger instance for the current API doc file - const nodeSlugger = createNodeSlugger(); - // We allow the API doc VFile to be a Promise of a VFile also, // hence we want to ensure that it first resolves before we pass it to the parser const resolvedApiDoc = await Promise.resolve(apiDoc); @@ -63,113 +30,13 @@ const createParser = () => { // Parses the API doc into an AST tree using `unified` and `remark` const apiDocTree = remarkProcessor.parse(resolvedApiDoc); - // Get all Markdown Footnote definitions from the tree - const markdownDefinitions = selectAll('definition', apiDocTree); - - // Get all Markdown Heading entries from the tree - const headingNodes = selectAll('heading', apiDocTree); - - // Handles Markdown link references and updates them to be plain links - visit(apiDocTree, createQueries.UNIST.isLinkReference, node => - updateLinkReference(node, markdownDefinitions) - ); - - // Removes all the original definitions from the tree as they are not needed - // anymore, since all link references got updated to be plain links - remove(apiDocTree, markdownDefinitions); - - // Handles the normalisation URLs that reference to API doc files with .md extension - // to replace the .md into .html, since the API doc files get eventually compiled as HTML - visit(apiDocTree, createQueries.UNIST.isMarkdownUrl, node => - updateMarkdownLink(node) - ); - - // If the document has no headings but it has content, we add a fake heading to the top - // so that our parsing logic can work correctly, and generate content for the whole file - if (headingNodes.length === 0 && apiDocTree.children.length > 0) { - apiDocTree.children.unshift(createTree('heading', { depth: 1 }, [])); - } - - // Handles iterating the tree and creating subtrees for each API doc entry - // where an API doc entry is defined by a Heading Node - // (so all elements after a Heading until the next Heading) - // and then it creates and updates a Metadata entry for each API doc entry - // and then generates the final content for each API doc entry and pushes it to the collection - visit(apiDocTree, createQueries.UNIST.isHeading, (headingNode, index) => { - // Creates a new Metadata entry for the current API doc file - const apiEntryMetadata = createMetadata(nodeSlugger); - - // Adds the Metadata of the current Heading Node to the Metadata entry - setHeadingMetadata(headingNode, apiEntryMetadata); - - // We retrieve the immediate next Heading if it exists - // This is used for ensuring that we don't include items that would - // belong only to the next heading to the current Heading metadata - // Note that if there is no next heading, we use the current node as the next one - const nextHeadingNode = - findAfter(apiDocTree, index, createQueries.UNIST.isHeading) ?? - headingNode; - - // This is the cutover index of the subtree that we should get - // of all the Nodes within the AST tree that belong to this section - // If `next` is equals the current heading, it means there's no next heading - // and we are reaching the end of the document, hence the cutover should be the end of - // the document itself. - const stop = - headingNode === nextHeadingNode - ? apiDocTree.children.length - : apiDocTree.children.indexOf(nextHeadingNode); - - // Retrieves all the nodes that should belong to the current API docs section - // `index + 1` is used to skip the current Heading Node - const subTree = createTree( - 'root', - apiDocTree.children.slice(index, stop) - ); - - // Visits all Stability Index nodes from the current subtree if there's any - // and then apply the Stability Index metadata to the current metadata entry - visit(subTree, createQueries.UNIST.isStabilityNode, node => - addStabilityMetadata(node, apiEntryMetadata) - ); - - // Visits all HTML nodes from the current subtree and if there's any that matches - // our YAML metadata structure, it transforms into YAML metadata - // and then apply the YAML Metadata to the current Metadata entry - visit(subTree, createQueries.UNIST.isYamlNode, node => { - // TODO: Is there always only one YAML node? - apiEntryMetadata.setYamlPosition(node.position); - addYAMLMetadata(node, apiEntryMetadata); - }); - - // Visits all Text nodes from the current subtree and if there's any that matches - // any API doc type reference and then updates the type reference to be a Markdown link - visit(subTree, createQueries.UNIST.isTextWithType, (node, _, parent) => - updateTypeReference(node, parent) - ); - - // Removes already parsed items from the subtree so that they aren't included in the final content - remove(subTree, [createQueries.UNIST.isYamlNode]); - - // Applies the AST transformations to the subtree based on the API doc entry Metadata - // Note that running the transformation on the subtree isn't costly as it is a reduced tree - // and the GFM transformations aren't that heavy - const parsedSubTree = remarkProcessor.runSync(subTree); - - // We seal and create the API doc entry Metadata and push them to the collection - const parsedApiEntryMetadata = apiEntryMetadata.create( - resolvedApiDoc, - parsedSubTree - ); - - // We push the parsed API doc entry Metadata to the collection - metadataCollection.push(parsedApiEntryMetadata); - - return SKIP; - }); - - // Returns the Metadata entries for the given API doc file - return metadataCollection; + return { + file: { + stem: resolvedApiDoc.stem, + basename: resolvedApiDoc.basename, + }, + tree: apiDocTree, + }; }; /** @@ -177,13 +44,14 @@ const createParser = () => { * and it simply wraps parseApiDoc with the given API docs * * @param {Array>} apiDocs List of API doc files to be parsed + * @returns {Promise[]>} */ const parseApiDocs = async apiDocs => { // We do a Promise.all, to ensure that each API doc is resolved asynchronously // but all need to be resolved first before we return the result to the caller const resolvedApiDocEntries = await Promise.all(apiDocs.map(parseApiDoc)); - return resolvedApiDocEntries.flat(); + return resolvedApiDocEntries; }; return { parseApiDocs, parseApiDoc }; diff --git a/src/types.d.ts b/src/types.d.ts index 1a9e1c2..e1f7121 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -9,6 +9,14 @@ type NodeWithData = T & { }; declare global { + export interface ParserOutput { + file: { + stem?: string; + basename?: string; + }; + tree: T; + } + export interface StabilityIndexMetadataEntry { index: number; description: string; From 37a572d0e951004cc2f6f8f975a1b93e1a08b0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Ara=C3=BAjo?= Date: Fri, 23 May 2025 10:52:35 -0300 Subject: [PATCH 3/3] fix: remove thread pool loop --- bin/commands/generate.mjs | 24 ++-- src/generators/metadata/index.mjs | 149 +----------------------- src/generators/metadata/utils/parse.mjs | 148 +++++++++++++++++++++++ 3 files changed, 163 insertions(+), 158 deletions(-) create mode 100644 src/generators/metadata/utils/parse.mjs diff --git a/bin/commands/generate.mjs b/bin/commands/generate.mjs index f8d748f..acb6259 100644 --- a/bin/commands/generate.mjs +++ b/bin/commands/generate.mjs @@ -132,20 +132,16 @@ export default { const releases = await getAllMajors(); - await Promise.all( - docs.map(async doc => { - const { runGenerators } = createGenerator(doc); + const { runGenerators } = createGenerator(docs); - await runGenerators({ - generators: opts.target, - input: opts.input, - output: opts.output && resolve(opts.output), - version: coerce(opts.version), - releases, - gitRef: opts.gitRef, - threads: parseInt(opts.threads, 10), - }); - }) - ); + await runGenerators({ + generators: opts.target, + input: opts.input, + output: opts.output && resolve(opts.output), + version: coerce(opts.version), + releases, + gitRef: opts.gitRef, + threads: parseInt(opts.threads, 10), + }); }, }; diff --git a/src/generators/metadata/index.mjs b/src/generators/metadata/index.mjs index 0a085f7..02fd53e 100644 --- a/src/generators/metadata/index.mjs +++ b/src/generators/metadata/index.mjs @@ -1,20 +1,11 @@ 'use strict'; -import { u as createTree } from 'unist-builder'; -import { findAfter } from 'unist-util-find-after'; -import { remove } from 'unist-util-remove'; -import { selectAll } from 'unist-util-select'; -import { SKIP, visit } from 'unist-util-visit'; - -import { createNodeSlugger } from '../../utils/slugger/index.mjs'; -import { getRemark } from '../../utils/remark.mjs'; -import createQueries from '../../utils/queries/index.mjs'; -import createMetadata from '../../metadata.mjs'; +import { parseApiDoc } from './utils/parse.mjs'; /** * This generator generates a flattened list of metadata entries from a API doc * - * @typedef {ParserOutput} Input + * @typedef {ParserOutput[]} Input * * @type {GeneratorMetadata} */ @@ -28,140 +19,10 @@ export default { dependsOn: 'ast', /** - * @param {Input} input + * @param {Input} inputs * @returns {Promise} */ - async generate({ file, tree }) { - const { - setHeadingMetadata, - addYAMLMetadata, - updateMarkdownLink, - updateTypeReference, - updateLinkReference, - addStabilityMetadata, - } = createQueries(); - - // Creates an instance of the Remark processor with GFM support - // which is used for stringifying the AST tree back to Markdown - const remarkProcessor = getRemark(); - - /** - * This holds references to all the Metadata entries for a given file - * this is used so we can traverse the AST tree and keep mutating things - * and then stringify the whole api doc file at once without creating sub traversals - * - * Then once we have the whole file parsed, we can split the resulting string into sections - * and seal the Metadata Entries (`.create()`) and return the result to the caller of parae. - * - * @type {Array} - */ - const metadataCollection = []; - - // Creates a new Slugger instance for the current API doc file - const nodeSlugger = createNodeSlugger(); - - // Get all Markdown Footnote definitions from the tree - const markdownDefinitions = selectAll('definition', tree); - - // Get all Markdown Heading entries from the tree - const headingNodes = selectAll('heading', tree); - - // Handles Markdown link references and updates them to be plain links - visit(tree, createQueries.UNIST.isLinkReference, node => - updateLinkReference(node, markdownDefinitions) - ); - - // Removes all the original definitions from the tree as they are not needed - // anymore, since all link references got updated to be plain links - remove(tree, markdownDefinitions); - - // Handles the normalisation URLs that reference to API doc files with .md extension - // to replace the .md into .html, since the API doc files get eventually compiled as HTML - visit(tree, createQueries.UNIST.isMarkdownUrl, node => - updateMarkdownLink(node) - ); - - // If the document has no headings but it has content, we add a fake heading to the top - // so that our parsing logic can work correctly, and generate content for the whole file - if (headingNodes.length === 0 && tree.children.length > 0) { - tree.children.unshift(createTree('heading', { depth: 1 }, [])); - } - - // Handles iterating the tree and creating subtrees for each API doc entry - // where an API doc entry is defined by a Heading Node - // (so all elements after a Heading until the next Heading) - // and then it creates and updates a Metadata entry for each API doc entry - // and then generates the final content for each API doc entry and pushes it to the collection - visit(tree, createQueries.UNIST.isHeading, (headingNode, index) => { - // Creates a new Metadata entry for the current API doc file - const apiEntryMetadata = createMetadata(nodeSlugger); - - // Adds the Metadata of the current Heading Node to the Metadata entry - setHeadingMetadata(headingNode, apiEntryMetadata); - - // We retrieve the immediate next Heading if it exists - // This is used for ensuring that we don't include items that would - // belong only to the next heading to the current Heading metadata - // Note that if there is no next heading, we use the current node as the next one - const nextHeadingNode = - findAfter(tree, index, createQueries.UNIST.isHeading) ?? headingNode; - - // This is the cutover index of the subtree that we should get - // of all the Nodes within the AST tree that belong to this section - // If `next` is equals the current heading, it means there's no next heading - // and we are reaching the end of the document, hence the cutover should be the end of - // the document itself. - const stop = - headingNode === nextHeadingNode - ? tree.children.length - : tree.children.indexOf(nextHeadingNode); - - // Retrieves all the nodes that should belong to the current API docs section - // `index + 1` is used to skip the current Heading Node - const subTree = createTree('root', tree.children.slice(index, stop)); - - // Visits all Stability Index nodes from the current subtree if there's any - // and then apply the Stability Index metadata to the current metadata entry - visit(subTree, createQueries.UNIST.isStabilityNode, node => - addStabilityMetadata(node, apiEntryMetadata) - ); - - // Visits all HTML nodes from the current subtree and if there's any that matches - // our YAML metadata structure, it transforms into YAML metadata - // and then apply the YAML Metadata to the current Metadata entry - visit(subTree, createQueries.UNIST.isYamlNode, node => { - // TODO: Is there always only one YAML node? - apiEntryMetadata.setYamlPosition(node.position); - addYAMLMetadata(node, apiEntryMetadata); - }); - - // Visits all Text nodes from the current subtree and if there's any that matches - // any API doc type reference and then updates the type reference to be a Markdown link - visit(subTree, createQueries.UNIST.isTextWithType, (node, _, parent) => - updateTypeReference(node, parent) - ); - - // Removes already parsed items from the subtree so that they aren't included in the final content - remove(subTree, [createQueries.UNIST.isYamlNode]); - - // Applies the AST transformations to the subtree based on the API doc entry Metadata - // Note that running the transformation on the subtree isn't costly as it is a reduced tree - // and the GFM transformations aren't that heavy - const parsedSubTree = remarkProcessor.runSync(subTree); - - // We seal and create the API doc entry Metadata and push them to the collection - const parsedApiEntryMetadata = apiEntryMetadata.create( - file, - parsedSubTree - ); - - // We push the parsed API doc entry Metadata to the collection - metadataCollection.push(parsedApiEntryMetadata); - - return SKIP; - }); - - // Returns the Metadata entries for the given API doc file - return metadataCollection; + async generate(inputs) { + return inputs.flatMap(input => parseApiDoc(input)); }, }; diff --git a/src/generators/metadata/utils/parse.mjs b/src/generators/metadata/utils/parse.mjs new file mode 100644 index 0000000..520ede8 --- /dev/null +++ b/src/generators/metadata/utils/parse.mjs @@ -0,0 +1,148 @@ +'use strict'; + +import { u as createTree } from 'unist-builder'; +import { findAfter } from 'unist-util-find-after'; +import { remove } from 'unist-util-remove'; +import { selectAll } from 'unist-util-select'; +import { SKIP, visit } from 'unist-util-visit'; + +import createQueries from '../../../utils/queries/index.mjs'; +import { getRemark } from '../../../utils/remark.mjs'; +import { createNodeSlugger } from '../../../utils/slugger/index.mjs'; +import createMetadata from '../../../metadata.mjs'; + +/** + * This generator generates a flattened list of metadata entries from a API doc + * + * @param {ParserOutput} input + * @returns {Promise} + */ +export const parseApiDoc = ({ file, tree }) => { + /** + * This holds references to all the Metadata entries for a given file + * this is used so we can traverse the AST tree and keep mutating things + * and then stringify the whole api doc file at once without creating sub traversals + * + * Then once we have the whole file parsed, we can split the resulting string into sections + * and seal the Metadata Entries (`.create()`) and return the result to the caller of parae. + * + * @type {Array} + */ + const metadataCollection = []; + + const { + setHeadingMetadata, + addYAMLMetadata, + updateMarkdownLink, + updateTypeReference, + updateLinkReference, + addStabilityMetadata, + } = createQueries(); + + // Creates an instance of the Remark processor with GFM support + // which is used for stringifying the AST tree back to Markdown + const remarkProcessor = getRemark(); + + // Creates a new Slugger instance for the current API doc file + const nodeSlugger = createNodeSlugger(); + + // Get all Markdown Footnote definitions from the tree + const markdownDefinitions = selectAll('definition', tree); + + // Get all Markdown Heading entries from the tree + const headingNodes = selectAll('heading', tree); + + // Handles Markdown link references and updates them to be plain links + visit(tree, createQueries.UNIST.isLinkReference, node => + updateLinkReference(node, markdownDefinitions) + ); + + // Removes all the original definitions from the tree as they are not needed + // anymore, since all link references got updated to be plain links + remove(tree, markdownDefinitions); + + // Handles the normalisation URLs that reference to API doc files with .md extension + // to replace the .md into .html, since the API doc files get eventually compiled as HTML + visit(tree, createQueries.UNIST.isMarkdownUrl, node => + updateMarkdownLink(node) + ); + + // If the document has no headings but it has content, we add a fake heading to the top + // so that our parsing logic can work correctly, and generate content for the whole file + if (headingNodes.length === 0 && tree.children.length > 0) { + tree.children.unshift(createTree('heading', { depth: 1 }, [])); + } + + // Handles iterating the tree and creating subtrees for each API doc entry + // where an API doc entry is defined by a Heading Node + // (so all elements after a Heading until the next Heading) + // and then it creates and updates a Metadata entry for each API doc entry + // and then generates the final content for each API doc entry and pushes it to the collection + visit(tree, createQueries.UNIST.isHeading, (headingNode, index) => { + // Creates a new Metadata entry for the current API doc file + const apiEntryMetadata = createMetadata(nodeSlugger); + + // Adds the Metadata of the current Heading Node to the Metadata entry + setHeadingMetadata(headingNode, apiEntryMetadata); + + // We retrieve the immediate next Heading if it exists + // This is used for ensuring that we don't include items that would + // belong only to the next heading to the current Heading metadata + // Note that if there is no next heading, we use the current node as the next one + const nextHeadingNode = + findAfter(tree, index, createQueries.UNIST.isHeading) ?? headingNode; + + // This is the cutover index of the subtree that we should get + // of all the Nodes within the AST tree that belong to this section + // If `next` is equals the current heading, it means there's no next heading + // and we are reaching the end of the document, hence the cutover should be the end of + // the document itself. + const stop = + headingNode === nextHeadingNode + ? tree.children.length + : tree.children.indexOf(nextHeadingNode); + + // Retrieves all the nodes that should belong to the current API docs section + // `index + 1` is used to skip the current Heading Node + const subTree = createTree('root', tree.children.slice(index, stop)); + + // Visits all Stability Index nodes from the current subtree if there's any + // and then apply the Stability Index metadata to the current metadata entry + visit(subTree, createQueries.UNIST.isStabilityNode, node => + addStabilityMetadata(node, apiEntryMetadata) + ); + + // Visits all HTML nodes from the current subtree and if there's any that matches + // our YAML metadata structure, it transforms into YAML metadata + // and then apply the YAML Metadata to the current Metadata entry + visit(subTree, createQueries.UNIST.isYamlNode, node => { + // TODO: Is there always only one YAML node? + apiEntryMetadata.setYamlPosition(node.position); + addYAMLMetadata(node, apiEntryMetadata); + }); + + // Visits all Text nodes from the current subtree and if there's any that matches + // any API doc type reference and then updates the type reference to be a Markdown link + visit(subTree, createQueries.UNIST.isTextWithType, (node, _, parent) => + updateTypeReference(node, parent) + ); + + // Removes already parsed items from the subtree so that they aren't included in the final content + remove(subTree, [createQueries.UNIST.isYamlNode]); + + // Applies the AST transformations to the subtree based on the API doc entry Metadata + // Note that running the transformation on the subtree isn't costly as it is a reduced tree + // and the GFM transformations aren't that heavy + const parsedSubTree = remarkProcessor.runSync(subTree); + + // We seal and create the API doc entry Metadata and push them to the collection + const parsedApiEntryMetadata = apiEntryMetadata.create(file, parsedSubTree); + + // We push the parsed API doc entry Metadata to the collection + metadataCollection.push(parsedApiEntryMetadata); + + return SKIP; + }); + + return metadataCollection; +};