diff --git a/scripts/README.md b/scripts/README.md index a2c2b6790ba459..419de1dc166e36 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -13,13 +13,13 @@ This section contains a summary of the scripts available in this directory. For - [build.sh](build.sh) script builds the ZIP archives of the `pages` directory. - [build-index.sh](build-index.sh) script builds the index of available pages. - [check-pr.sh](check-pr.sh) script checks the page's syntax and performs various checks on the PR. +- [check-page-title.py](check-page-title.py) is a Python script to check the consistency between the filenames and the page title. - [deploy.sh](deploy.sh) script deploys the ZIP and PDF archives to the static website repository. - [send-to-bot.py](send-to-bot.py) is a Python script that sends the build or test output to tldr-bot. - [set-alias-page.py](set-alias-page.py) is a Python script to generate or update alias pages. - [set-more-info-link.py](set-more-info-link.py) is a Python script to generate or update more information links across pages. - [set-page-title.py](set-page-title.py) is a Python script to update the title across pages. - [test.sh](test.sh) script runs some basic tests on every PR/commit to ensure the pages are valid and the code is formatted correctly. -- [wrong-filename.sh](wrong-filename.sh) script checks the consistency between the filenames and the page title. - [update-command.py](update-command.py) is a Python script to update the common contents of a command example across all languages. ## Compatibility @@ -31,8 +31,8 @@ The table below shows the compatibility of user-executable scripts with differen | [render.py](pdf/render.py) | ✅ | ✅ | ✅ | | [build-pdf.sh](pdf/build-pdf.sh) | ✅ | ✅ | ❌ (WSL ✅)| | [build.sh](build.sh) | ✅ | ✅ | ❌ (WSL ✅)| +| [check-page-title.py](check-page-title.py) | ✅ | ✅ | ✅ | | [set-alias-pages.py](set-alias-pages.py) | ✅ | ✅ | ✅ | | [set-more-info-link.py](set-more-info-link.py) | ✅ | ✅ | ✅ | | [set-page-title.py](set-page-title.py) | ✅ | ✅ | ✅ | -| [wrong-filename.sh](wrong-filename.sh) | ✅ | ❌ | ❌ (WSL ✅)| | [update-command.py](update-command.py) | ✅ | ✅ | ✅ | diff --git a/scripts/_common.py b/scripts/_common.py index 4d6847748e4a61..90dbc51cb78fb0 100644 --- a/scripts/_common.py +++ b/scripts/_common.py @@ -134,7 +134,59 @@ def test_get_pages_dir(): shutil.rmtree(root, True) -def get_target_paths(page: Path, pages_dirs: Path) -> list[Path]: +def get_page_title(path: Path) -> str: + """ + Determine whether the given path has a title. + + Parameters: + path (Path): Path to a page + + Returns: + str: "" If the path doesn't exit or does not have a title, + otherwise return the page title. + """ + + if not path.exists(): + return "" + with path.open(encoding="utf-8") as f: + first_line = f.readline().strip() + + return first_line.split("#", 1)[-1].strip() + + +def test_get_page_title(): + # Test valid title + root = Path("test_root") + + shutil.rmtree(root, True) + + root.mkdir(exist_ok=True) + + valid_path = root / "test.md" + valid_path.write_text("# Git Clone\nSome content", encoding="utf-8") + assert get_page_title(valid_path) == "Git Clone" + + # Test title with multiple hashes + hash_path = root / "multiple_hash.md" + hash_path.write_text("# Git ### Clone\nSome content", encoding="utf-8") + assert get_page_title(hash_path) == "Git ### Clone" + + # Test empty title + empty_path = root / "empty.md" + empty_path.write_text("#\nSome content", encoding="utf-8") + assert get_page_title(empty_path) == "" + + # Test non-existent file + nonexistent_path = root / "nonexistent.md" + assert get_page_title(nonexistent_path) == "" + + # Test title with leading/trailing spaces + spaces_path = root / "spaces.md" + spaces_path.write_text("# Git Clone \nSome content", encoding="utf-8") + assert get_page_title(spaces_path) == "Git Clone" + + +def get_target_paths(page: Path, pages_dirs: Path, language: str = "") -> list[Path]: """ Get all paths in all languages that match the page. @@ -152,6 +204,14 @@ def get_target_paths(page: Path, pages_dirs: Path) -> list[Path]: arg_platform, arg_page = page.split("/") for pages_dir in pages_dirs: + if "." in pages_dir.name: + _, locale = pages_dir.name.split(".") + else: + locale = "en" + + if language != "" and language != locale: + continue + page_path = pages_dir / arg_platform / arg_page if not page_path.exists(): @@ -173,18 +233,20 @@ def test_get_target_paths(): shutil.os.makedirs(root / "pages" / "common") shutil.os.makedirs(root / "pages.fr" / "common") - file_path = root / "pages" / "common" / "tldr.md" - with open(file_path, "w"): - pass - - file_path = root / "pages.fr" / "common" / "tldr.md" - with open(file_path, "w"): - pass + (root / "pages" / "common" / "tldr.md").touch() + (root / "pages.fr" / "common" / "tldr.md").touch() target_paths = get_target_paths("common/tldr", get_pages_dir(root)) - for path in target_paths: - rel_path = "/".join(path.parts[-3:]) - print(rel_path) + assert len(target_paths) == 2 + assert all(p.name == "tldr.md" for p in target_paths) + + fr_paths = get_target_paths("common/tldr", get_pages_dir(root), "fr") + assert len(fr_paths) == 1 + assert str(fr_paths[0]).endswith("pages.fr/common/tldr.md") + + en_paths = get_target_paths("common/tldr", get_pages_dir(root), "en") + assert len(en_paths) == 1 + assert str(en_paths[0]).endswith("pages/common/tldr.md") shutil.rmtree(root, True) diff --git a/scripts/check-page-title.py b/scripts/check-page-title.py new file mode 100755 index 00000000000000..1c081ce3827580 --- /dev/null +++ b/scripts/check-page-title.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT + +""" +A Python script to check if all page titles match with their filename. + +Usage: + python3 scripts/check-page-title.py [-p PAGE] [-l LANGUAGE] + +Options: + -p, --page PAGE + Specify the page in the format "platform/command". This option allows checking the title for a specific page. + -l, --language LANGUAGE + Specify the language, a POSIX Locale Name in the form of "ll" or "ll_CC" (e.g. "fr" or "pt_BR"). + +Examples: + 1. Check the page title for a specific page: + python3 scripts/check-page-title.py -p common/tar + python3 scripts/check-page-title.py --page common/tar + + 2. Check the page titles for Brazilian Portuguese pages only: + python3 scripts/check-page-title.py -l pt_BR + python3 scripts/check-page-title.py --language pt_BR +""" + +from pathlib import Path +import re +import argparse +from _common import ( + IGNORE_FILES, + Colors, + get_tldr_root, + get_pages_dir, + get_target_paths, + get_page_title, + create_colored_line, +) + +IGNORE_LIST = [ + "exclamation mark", + "caret", + "dollar sign", + "tilde", + "percent sign", + "curly brace", + "history expansion", + "acme.sh dns", + "pacman d", + "pacman database", + "pacman deptest", + "pacman f", + "pacman files", + "pacman q", + "pacman query", + "pacman r", + "pacman remove", + "pacman s", + "pacman sync", + "pacman t", + "pacman u", + "pacman upgrade", + "qm move disk", + "rename", + "umount", +] + + +def normalize_command_name(name: str) -> str: + name = name.lower() + name = re.sub(r"nix3", "nix", name) + name = re.sub(r"\.fish|\.js|\.1", "", name) + name = name.replace("-", " ") + return name.strip() + + +def check_page_title(path: Path) -> str: + """ + Check if the title of a page matches the filename. + """ + command_name_file = normalize_command_name(path.stem) + command_name_page = normalize_command_name(get_page_title(path)) + + if command_name_file != command_name_page and command_name_page not in IGNORE_LIST: + return f"title should be {command_name_file} instead of {command_name_page}" + + return "" + + +def main(): + parser = argparse.ArgumentParser( + description="Check if all page titles match with their filename." + ) + parser.add_argument( + "-p", + "--page", + type=str, + default="", + help='page name in the format "platform/alias_command.md"', + ) + parser.add_argument( + "-l", + "--language", + type=str, + default="", + help='language in the format "ll" or "ll_CC" (e.g. "fr" or "pt_BR")', + ) + + args = parser.parse_args() + + root = get_tldr_root() + pages_dirs = get_pages_dir(root) + + # Use '--page' option + if args.page != "": + target_paths = get_target_paths(args.page, pages_dirs, args.language) + + for path in target_paths: + rel_path = "/".join(path.parts[-3:]) + status = check_page_title(path) + if status != "": + print(create_colored_line(Colors.RED, f"{rel_path}: {status}")) + return + + # Use '--language' option + if not args.language: + # Get all language folders (pages.*) + language_dirs = [d for d in root.iterdir() if d.name.startswith("pages.")] + locales = [d.name.split(".")[1] for d in language_dirs] + else: + locales = [args.language] + + for locale in locales: + en_path = root / "pages" + platforms = [i.name for i in en_path.iterdir() if i.name not in IGNORE_FILES] + + for platform in platforms: + platform_path = en_path / platform + commands = [ + f"{platform}/{page.name}" + for page in platform_path.iterdir() + if page.name not in IGNORE_FILES + ] + + for command in commands: + path = root / (f"pages.{locale}") / command + if path.exists(): + status = check_page_title(path) + if status: + rel_path = "/".join(path.parts[-3:]) + print(create_colored_line(Colors.RED, f"{rel_path} {status}")) + + +if __name__ == "__main__": + main() diff --git a/scripts/set-page-title.py b/scripts/set-page-title.py index 3476a2ed3d871c..a223ce89759433 100755 --- a/scripts/set-page-title.py +++ b/scripts/set-page-title.py @@ -54,6 +54,7 @@ get_tldr_root, get_pages_dir, get_target_paths, + get_page_title, get_locale, get_status, stage, @@ -108,26 +109,6 @@ def set_page_title( return status -def get_page_title(path: Path) -> str: - """ - Determine whether the given path has a title. - - Parameters: - path (Path): Path to a page - - Returns: - str: "" If the path doesn't exit or does not have a title, - otherwise return the page title. - """ - - if not path.exists(): - return "" - with path.open(encoding="utf-8") as f: - first_line = f.readline().strip() - - return first_line.split("#", 1)[-1].strip() - - def sync( root: Path, pages_dirs: list[Path], diff --git a/scripts/wrong-filename.sh b/scripts/wrong-filename.sh deleted file mode 100755 index 31a96f5b4185cf..00000000000000 --- a/scripts/wrong-filename.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# SPDX-License-Identifier: MIT - -# This script checks consistency between the filenames and the page title. -# Usage: ./scripts/wrong-filename.sh - -# Output file for recording inconsistencies -OUTPUT_FILE="inconsistent-filenames.txt" -# Remove existing output file (if any) -rm -f "$OUTPUT_FILE" -touch "$OUTPUT_FILE" - -IGNORE_LIST=("exclamation mark" "caret" "dollar sign" "tilde" "percent sign" "curly brace" "history expansion" "qm move disk" "umount" "rename" "pacman d" "pacman f" "pacman q" "pacman r" "pacman s" "pacman t" "pacman u") - -set -e - -# Iterate through all Markdown files in the 'pages' directories -find pages* -name '*.md' -type f | while read -r path; do - # Extract the expected command name from the filename - COMMAND_NAME_FILE=$(basename "$path" | head -c-4 | sed 's/nix3/nix/' | sed 's/\.fish//' | sed 's/\.js//' | sed 's/\.1//' | tr '-' ' ' | tr '[:upper:]' '[:lower:]') - - # Extract the command name from the first line of the Markdown file - COMMAND_NAME_PAGE=$(head -n1 "$path" | tail -c+3 | sed 's/--//' | tr '-' ' ' | tr '[:upper:]' '[:lower:]') - - # Check if there is a mismatch between filename and content command names - if [[ "$COMMAND_NAME_FILE" != "$COMMAND_NAME_PAGE" && ! ${IGNORE_LIST[*]} =~ $COMMAND_NAME_PAGE ]]; then - echo "Inconsistency found in file: $path: $COMMAND_NAME_PAGE should be $COMMAND_NAME_FILE" >> "$OUTPUT_FILE" - fi -done