Skip to content

feat: parse astro monthly blogs #231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions astro.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ export default defineConfig({
},
experimental: {
serverIslands: true,
contentLayer: true,
contentIntellisense: true,
},
integrations: [
starlight({
Expand Down
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@
"@types/mdast": "4.0.4",
"@types/node": "20.16.3",
"@types/semver": "7.5.8",
"mdast-util-from-markdown": "^2.0.1",
"mdast-util-mdx": "3.0.0",
"mdast-util-to-markdown": "2.1.0",
"micromark-extension-mdxjs": "^3.0.0",
"prettier": "3.3.3",
"prettier-plugin-astro": "0.14.1",
"unist-util-select": "^5.1.0",
"wrangler": "3.73.0"
}
}
56 changes: 39 additions & 17 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/components/Tags.astro
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ interface Props {
tags?: string[];
}
const allResources = await getCollection("resources");
const allExtracted = await getCollection("automatedresources");
console.log(allExtracted);

const referer = Astro.request.headers.get("referer");
const tagsParams =
Expand Down
21 changes: 21 additions & 0 deletions src/content/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { defineCollection, z } from 'astro:content';
import { docsSchema } from '@astrojs/starlight/schema';
import { minVersion, outside, validRange } from 'semver';
import pkg from '../../package.json';
import { astroMonthlyBlogResourceLoader } from '../utils/index.js';

const astroVersion = minVersion(pkg.dependencies.astro)?.version;

Expand Down Expand Up @@ -36,7 +37,27 @@ const resourcesSchema = defineCollection({
}),
});

const automatedresources = defineCollection({
loader: astroMonthlyBlogResourceLoader({
urls: [
'https://raw.githubusercontent.com/withastro/astro.build/main/src/content/blog/whats-new-june-2024.mdx',
'https://raw.githubusercontent.com/withastro/astro.build/main/src/content/blog/whats-new-july-2024.mdx',
'https://raw.githubusercontent.com/withastro/astro.build/main/src/content/blog/whats-new-august-2024.mdx',
],
exclude: [
/github.com/,
/astro.build/,
/x.com/,
/2023.stateofjs.com/,
/astrolicious.dev/,
/astro-tips.dev/,
/reddit.com\/r\/withastro\/$/,
],
}),
});

export const collections = {
docs: starlightSchema,
resources: resourcesSchema,
automatedresources: automatedresources,
};
119 changes: 119 additions & 0 deletions src/utils/astro-monthly-blog-loader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import type { Loader } from 'astro/loaders';

import { AstroError } from 'astro/errors';
import { z } from 'astro/zod';
import { fromMarkdown } from 'mdast-util-from-markdown';
import { mdxFromMarkdown } from 'mdast-util-mdx';
import { mdxjs } from 'micromark-extension-mdxjs';
import { selectAll } from 'unist-util-select';

const USER_CONFIG_SCHEMA = z.object({
urls: z
.string()
.url()
.transform((url) => new URL(url))
.array(),
exclude: z.instanceof(RegExp).array(),
});

type USER_CONFIG_TYPE = z.input<typeof USER_CONFIG_SCHEMA>;

const SCHEMA = z.object({
title: z.string(),
url: z.string().url(),
source_url: z.string().url().optional(),
});

async function fetchResources(params: { url: URL }) {
const path_segment = params.url.pathname.split('/').slice(-1)[0].replace('.mdx', '');
const source_url = `https://astro.build/blog/${path_segment}/`;
const res = await fetch(params.url);
if (!res.ok || !res.body) {
throw new AstroError(`Failed to fetch the blog post from ${params.url.toString()}`);
}
const parsedRes = await res.text();

const tree = fromMarkdown(parsedRes, {
extensions: [mdxjs({})],
mdastExtensions: [mdxFromMarkdown()],
});

const treeLinks = selectAll('link', tree) as unknown[] as {
url: string;
children: { value: string }[];
source_url: string;
}[];

const extractedLinks = new Array<{
url: string;
title: string;
source_url: string;
}>();

for (const link of treeLinks) {
extractedLinks.push({
url: link.url,
title: link.children[0].value,
source_url: source_url,
});
}

const videoGrid = selectAll('[name=YouTubeGrid]', tree);
if (videoGrid[0]) {
// @ts-expect-error
for (const videoLink of videoGrid[0].attributes[0].value.data.estree.body[0].expression
.elements) {
extractedLinks.push({
url: videoLink.properties[0].value.value,
title: videoLink.properties[1].value.value,
source_url: source_url,
});
}
}

return extractedLinks;
}

export function astroMonthlyBlogResourceLoader(config: USER_CONFIG_TYPE): Loader {
const PARSED_CONFIG = USER_CONFIG_SCHEMA.safeParse(config);

if (!PARSED_CONFIG.success) {
throw new AstroError(
`The provided configuration for the Astro Monthly Blog Post loader is invalid.\n${PARSED_CONFIG.error.issues.map((issue) => issue.message).join('\n')}`
);
}

return {
name: 'astro-monthly-blog-links-loader',
schema: SCHEMA,
async load({ store, logger, parseData, generateDigest }) {
store.clear();
for (const url of PARSED_CONFIG.data.urls) {
logger.info(`Getting all links from ${url.toString()}`);
const resources = await fetchResources({ url });

for (const extractedLink of resources) {
if (PARSED_CONFIG.data.exclude.some((exclude) => exclude.test(extractedLink.url)))
continue;

const data = await parseData({
id: extractedLink.url,
data: {
title: extractedLink.title,
url: extractedLink.url,
source_url: extractedLink.source_url,
},
});

const digest = generateDigest(data);

store.set({
id: extractedLink.url,
data,
digest,
});
}
}
},
};
}
1 change: 1 addition & 0 deletions src/utils/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { astroMonthlyBlogResourceLoader } from './astro-monthly-blog-loader.ts';