Skip to content

Commit 7d45fe8

Browse files
committed
Refactor documentation merging script to streamline meta description handling and remove unused functions
- Simplified the assignment of `meta_description` in the `process_dir` function. - Removed obsolete functions related to page collection and URL path resolution to enhance code clarity and maintainability. - Updated the README and llms-full.txt generation logic to directly use the merged content.
1 parent 134827d commit 7d45fe8

File tree

1 file changed

+20
-109
lines changed

1 file changed

+20
-109
lines changed

scripts/merge-docs.py

Lines changed: 20 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def process_dir(dir_path, skip_index=False):
188188
"title": page_title,
189189
"path": resolved,
190190
"heading": heading_text,
191-
"meta_description": meta_description, # Only from _meta.json
191+
"meta_description": meta_description,
192192
}
193193
)
194194
elif os.path.isdir(resolved):
@@ -210,14 +210,10 @@ def process_dir(dir_path, skip_index=False):
210210
if index_node:
211211
group_node["index_path"] = index_node["path"]
212212
group_node["heading"] = index_node.get("heading", group_title)
213-
group_node["meta_description"] = (
214-
meta_description # Only from _meta.json
215-
)
213+
group_node["meta_description"] = meta_description
216214
else:
217215
group_node["heading"] = group_title
218-
group_node["meta_description"] = (
219-
meta_description # Only from _meta.json
220-
)
216+
group_node["meta_description"] = meta_description
221217
nodes.append(group_node)
222218
return nodes
223219

@@ -307,69 +303,6 @@ def generate_llms_toc(nodes, base_url="https://docs.ensembleui.com"):
307303
return lines
308304

309305

310-
def collect_all_pages(nodes):
311-
"""Collect all pages from the structure for full content generation."""
312-
pages = []
313-
314-
for node in nodes:
315-
if "children" in node:
316-
# Add index page if it exists
317-
if node.get("index_path"):
318-
pages.append(
319-
{
320-
"title": node.get("heading", node["title"]),
321-
"path": node["index_path"],
322-
"url_path": get_url_path(node["index_path"]),
323-
}
324-
)
325-
326-
# Add child pages
327-
pages.extend(collect_all_pages(node["children"]))
328-
else:
329-
# This is a standalone page
330-
pages.append(
331-
{
332-
"title": node["title"],
333-
"path": node["path"],
334-
"url_path": get_url_path(node["path"]),
335-
}
336-
)
337-
338-
return pages
339-
340-
341-
def get_url_path(file_path):
342-
"""Convert file path to URL path."""
343-
rel_path = os.path.relpath(file_path, "pages")
344-
url_path = rel_path.replace("\\", "/").replace(".mdx", "").replace(".md", "")
345-
if url_path == "index":
346-
url_path = ""
347-
elif url_path.endswith("/index"):
348-
url_path = url_path[:-6]
349-
return url_path
350-
351-
352-
def generate_full_docs(pages, base_url="https://docs.ensembleui.com"):
353-
"""Generate full documentation content in llms-full.txt format."""
354-
content_blocks = []
355-
356-
for page in pages:
357-
title = page["title"]
358-
file_path = page["path"]
359-
url_path = page["url_path"]
360-
361-
url = f"{base_url}/{url_path}" if url_path else base_url
362-
363-
# Get full content
364-
full_content = get_full_content(file_path)
365-
366-
# Format as does: # Title \n Source: URL \n Content
367-
block = f"# {title}\nSource: {url}\n\n{full_content}\n"
368-
content_blocks.append(block)
369-
370-
return content_blocks
371-
372-
373306
def add_extension_to_link(match):
374307
"""Convert page links to anchor links if content exists in merged docs, otherwise keep external links."""
375308
prefix = match.group(1) # [text](
@@ -590,15 +523,6 @@ def collect_content(nodes, level=1):
590523
return lines
591524

592525

593-
def resolve_entry_path_custom(dir_path, name):
594-
"""Helper to resolve an index entry from the given dir."""
595-
for candidate in [name, name + ".md", name + ".mdx"]:
596-
path = os.path.join(dir_path, candidate)
597-
if os.path.exists(path):
598-
return path
599-
return None
600-
601-
602526
# Base directory settings
603527
repo_root = os.getcwd()
604528
pages_dir = os.path.join(repo_root, "pages")
@@ -607,16 +531,19 @@ def resolve_entry_path_custom(dir_path, name):
607531
# Ensure public directory exists
608532
os.makedirs(public_dir, exist_ok=True)
609533

610-
# Process the pages directory.
534+
# Process the pages directory
611535
structure = process_dir(pages_dir, skip_index=True)
612536

613-
# Read the root index.mdx content to place it at the beginning.
614-
index_path = resolve_entry_path_custom(pages_dir, "index")
537+
# Read the root index.mdx content
538+
index_path = os.path.join(pages_dir, "index.mdx")
539+
if not os.path.exists(index_path):
540+
index_path = os.path.join(pages_dir, "index.md")
541+
615542
index_lines = []
616543
main_title = "Ensemble"
617544
main_description = "Documentation for the Ensemble platform"
618545

619-
if index_path and os.path.isfile(index_path):
546+
if os.path.exists(index_path):
620547
with open(index_path, "r", encoding="utf-8") as f:
621548
raw_index = f.read().splitlines()
622549
index_lines = clean_content(raw_index)
@@ -632,25 +559,27 @@ def resolve_entry_path_custom(dir_path, name):
632559
main_description = description_content
633560

634561
# Generate README.md
635-
# Assemble the final README content.
636562
output_lines = []
637563
if index_lines:
638564
output_lines += index_lines
639565
if output_lines and output_lines[-1] != "":
640566
output_lines.append("")
641-
# Generate the Table of Contents from the sidebar structure.
567+
568+
# Generate the Table of Contents from the sidebar structure
642569
toc = generate_toc(structure)
643570
if toc:
644571
output_lines.append("## Table of Contents")
645572
output_lines.append("")
646573
output_lines += toc
647574
output_lines.append("")
648-
# Append the remaining content in the defined order.
575+
576+
# Append the remaining content in the defined order
649577
output_lines += collect_content(structure)
650578

651-
# Write the merged content to README.md.
579+
# Write the merged content to README.md
580+
readme_content = "\n".join(output_lines)
652581
with open("README.md", "w", encoding="utf-8") as out_file:
653-
out_file.write("\n".join(output_lines))
582+
out_file.write(readme_content)
654583

655584
print("Merged documentation written to README.md")
656585

@@ -674,36 +603,18 @@ def resolve_entry_path_custom(dir_path, name):
674603
toc_lines.append("- [Website](https://ensembleui.com/)")
675604
toc_lines.append("- [Ensemble Studio](https://studio.ensembleui.com/)")
676605
toc_lines.append("- [Chat with us on Discord](https://discord.gg/cEHkJTmn75)")
677-
toc_lines.append(
678-
"- [Join our office hours](https://discord.gg/eJrUWhnRHS?event=1218554330765066310)"
679-
)
606+
toc_lines.append("- [Join our office hours](https://discord.gg/eJrUWhnRHS?event=1218554330765066310)")
680607
toc_lines.append("- [Drop us an email](mailto:[email protected])")
681608

682609
# Write llms.txt
683610
llms_txt_path = os.path.join(public_dir, "llms.txt")
684611
with open(llms_txt_path, "w", encoding="utf-8") as f:
685612
f.write("\n".join(toc_lines))
686613

687-
# Generate llms-full.txt (full content)
688-
all_pages = collect_all_pages(structure)
689-
full_content_blocks = generate_full_docs(all_pages)
690-
691-
# Prepend only the title and description section to llms-full.txt (not the full TOC)
692-
header_section = []
693-
header_section.append(f"# {main_title}")
694-
header_section.append("")
695-
header_section.append(f"{main_description}")
696-
header_section.append("")
697-
header_section.append("---") # Add separator before full content
698-
header_section.append("")
699-
700-
# Combine header with full content
701-
full_content_with_header = header_section + full_content_blocks
702-
614+
# Generate llms-full.txt using the README content directly
703615
llms_full_txt_path = os.path.join(public_dir, "llms-full.txt")
704616
with open(llms_full_txt_path, "w", encoding="utf-8") as f:
705-
f.write("\n".join(full_content_with_header))
617+
f.write(readme_content)
706618

707619
print(f"Generated {llms_txt_path} successfully!")
708620
print(f"Generated {llms_full_txt_path} successfully!")
709-
print(f"Total pages in full docs: {len(all_pages)}")

0 commit comments

Comments
 (0)