Skip to content

Commit 7775513

Browse files
committed
Enhance documentation merging script to fix internal links and improve image path handling
- Added functionality to fix internal links by inserting 'pages/' before relative links. - Updated regex patterns to handle markdown and HTML links, ensuring proper path adjustments for relative links while excluding images. - Improved comments for clarity on the changes made to image and link paths.
1 parent 81b4d26 commit 7775513

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

scripts/merge-docs.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def clean_content(lines):
377377
- Converting MDX Callout blocks into Markdown note blocks.
378378
- Removing other MDX component blocks.
379379
- Fixing markdown and HTML image paths (inserting 'public/' before /images/).
380+
- Fixing internal links (inserting 'pages/' before relative links).
380381
"""
381382
cleaned = []
382383
in_code_block = False
@@ -387,6 +388,10 @@ def clean_content(lines):
387388
md_image_pattern = re.compile(r"(!\[[^\]]*\]\()(/images/)", re.IGNORECASE)
388389
# Regex for HTML image tags: <img ... src="/images/...
389390
html_img_pattern = re.compile(r'(<img\s+[^>]*src=["\'])(/images/)', re.IGNORECASE)
391+
# Regex for markdown links: [text](/relative-path) but not [text](http...) or [text](#anchor) or [text](/images/...) or [text](public/images/...)
392+
md_link_pattern = re.compile(r'(\[[^\]]*\]\()(/(?!images/|http|#)[^)]+)', re.IGNORECASE)
393+
# Regex for HTML links: <a href="/relative-path"> but not <a href="http..."> or <a href="#anchor"> or <a href="/images/..."> or <a href="public/images/...">
394+
html_link_pattern = re.compile(r'(<a\s+[^>]*href=["\'])(/(?!images/|http|#)[^"\']+)', re.IGNORECASE)
390395

391396
for line in lines:
392397
if line.strip().startswith("```"):
@@ -430,10 +435,14 @@ def clean_content(lines):
430435
in_component_block = True
431436
continue
432437

433-
# Fix markdown image paths.
434-
line = md_image_pattern.sub(r"\1/public/images/", line)
435-
# Fix HTML image paths.
436-
line = html_img_pattern.sub(r"\1public/images/", line)
438+
# Fix markdown image paths (add public before /images/).
439+
line = md_image_pattern.sub(r"\1public\2", line)
440+
# Fix HTML image paths (add public before /images/).
441+
line = html_img_pattern.sub(r"\1public\2", line)
442+
# Fix markdown links (add /pages/ before relative links, but exclude images).
443+
line = md_link_pattern.sub(r"\1/pages\2", line)
444+
# Fix HTML links (add /pages/ before relative links, but exclude images).
445+
line = html_link_pattern.sub(r"\1/pages\2", line)
437446

438447
cleaned.append(line)
439448
return cleaned

0 commit comments

Comments
 (0)