Skip to content

Commit 543ecbd

Browse files
Use unified to parse markdown
1 parent 75c12a8 commit 543ecbd

File tree

3 files changed

+1420
-101
lines changed

3 files changed

+1420
-101
lines changed

node-zerox/src/openAI.ts

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,76 @@
11
import { CompletionArgs, CompletionResponse } from "./types";
22
import { convertKeysToSnakeCase, encodeImageToBase64 } from "./utils";
33
import axios from "axios";
4+
import { nanoid } from "nanoid";
5+
6+
const markdownToJson = async (markdownString: string) => {
7+
/**
8+
* Bypassing typescript transpiler using eval to use dynamic imports
9+
*
10+
* Source: https://stackoverflow.com/a/70546326
11+
*/
12+
const { unified } = await eval(`import('unified')`);
13+
const { default: remarkParse } = await eval(`import('remark-parse')`);
14+
const { remarkGfm } = await eval(`import('remark-gfm')`);
15+
16+
const parsedMd = unified()
17+
.use(remarkParse) // Parse Markdown to AST
18+
.use(remarkGfm)
19+
.parse(markdownString);
20+
21+
const parentIdManager: string[] = [];
22+
23+
let depths = [0];
24+
25+
const jsonObj = parsedMd.children.map((node: any) => {
26+
const isHeading = node.type === "heading";
27+
if (isHeading && node.depth <= (depths.at(-1) || 0)) {
28+
parentIdManager.pop();
29+
// TODO: keep removing depth number till it reaches the one less than node.depth
30+
depths.pop();
31+
}
32+
const processedNode = processNode(node, parentIdManager.at(-1));
33+
34+
if (isHeading) {
35+
parentIdManager.push(processedNode.id);
36+
if (depths.at(-1) !== node.depth) depths.push(node.depth);
37+
}
38+
39+
return processedNode;
40+
});
41+
42+
return jsonObj;
43+
};
44+
45+
const type: Record<string, string> = {
46+
heading: "heading",
47+
text: "text",
48+
};
49+
50+
const processNode = (node: any, parentId?: string) => {
51+
let value: any;
52+
53+
if (node.type === "heading") {
54+
value = node.children
55+
.map((childNode: any) => processText(childNode))
56+
.join(" ");
57+
} else if (node.type === "paragraph") {
58+
value = node.children
59+
.map((childNode: any) => processText(childNode))
60+
.join(" ");
61+
}
62+
63+
return {
64+
id: nanoid(),
65+
parentId,
66+
type: type[node.type as string] || type.text,
67+
value,
68+
};
69+
};
70+
71+
const processText = (node: any) => {
72+
return node.value;
73+
};
474

575
export const getCompletion = async ({
676
apiKey,
@@ -58,6 +128,9 @@ export const getCompletion = async ({
58128

59129
const data = response.data;
60130

131+
// const jsonOutput = await markdownToJson(data.choices[0].message.content);
132+
// console.log("====>>>>", JSON.stringify(jsonOutput, null, 2));
133+
61134
return {
62135
content: data.choices[0].message.content,
63136
inputTokens: data.usage.prompt_tokens,

0 commit comments

Comments
 (0)