diff --git a/.github/scripts/generate_meta_skill.py b/.github/scripts/generate_meta_skill.py index e6e0d2ec5..7759f1d62 100644 --- a/.github/scripts/generate_meta_skill.py +++ b/.github/scripts/generate_meta_skill.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Generate cli-hub-skill/SKILL.md from registry.json and public_registry.json.""" +"""Generate cli-hub-skill/SKILL.md from registry.json, public_registry.json, and matrix_registry.json.""" import json from pathlib import Path from collections import defaultdict @@ -8,6 +8,7 @@ def main(): repo_root = Path(__file__).parent.parent.parent registry_path = repo_root / 'registry.json' public_registry_path = repo_root / 'public_registry.json' + matrix_registry_path = repo_root / 'matrix_registry.json' output_path = repo_root / 'cli-hub-skill' / 'SKILL.md' with open(registry_path) as f: @@ -19,6 +20,12 @@ def main(): public_data = json.load(f) public_clis = public_data.get('clis', []) + matrices = [] + if matrix_registry_path.exists(): + with open(matrix_registry_path) as f: + matrix_data = json.load(f) + matrices = matrix_data.get('matrices', []) + total_count = len(data['clis']) + len(public_clis) # Group harness CLIs by category @@ -65,6 +72,24 @@ def main(): "cli-hub launch [args...]", "```", "", + "## CLI Matrices", + "", + f"`cli-hub` also ships {len(matrices)} curated cross-tool matrices: install one name to pull in a whole workflow kit and read its dedicated SKILL.md.", + "", + "```bash", + "# Browse curated matrices", + "cli-hub matrix list", + "", + "# Inspect one matrix", + "cli-hub matrix info video-creation", + "", + "# Check which providers are available locally", + "cli-hub matrix preflight video-creation --json", + "", + "# Install the whole matrix", + "cli-hub matrix install video-creation", + "```", + "", "## CLI-Anything Harness CLIs", "", f"Stateful, agent-native wrappers for {len(data['clis'])} GUI applications. All support `--json` output, REPL mode, and undo/redo.", @@ -97,15 +122,36 @@ def main(): clis = public_by_category[category] lines.append(f"### {category.title()}") lines.append("") - lines.append("| Name | Description | Entry Point | Install |") - lines.append("|------|-------------|-------------|---------|") + lines.append("| Name | Description | Entry Point | Install | Skill |") + lines.append("|------|-------------|-------------|---------|-------|") for cli in sorted(clis, key=lambda x: x['name']): name = cli['display_name'] desc = cli['description'] entry = f"`{cli['entry_point']}`" install = f"`cli-hub install {cli['name']}`" - lines.append(f"| **{name}** | {desc} | {entry} | {install} |") + skill = cli.get('skill_md') or '—' + skill_cell = f"`{skill}`" if not str(skill).startswith("http") else skill + lines.append(f"| **{name}** | {desc} | {entry} | {install} | {skill_cell} |") + + lines.append("") + + if matrices: + lines.extend([ + "## Curated Matrices", + "", + "Each matrix is a curated multi-CLI workflow pulled from the CLI Matrix. Installing a matrix installs all member CLIs and points you at a matrix-specific SKILL.md.", + "", + "| Matrix | Description | CLIs | Install | Skill |", + "|--------|-------------|------|---------|-------|", + ]) + + for matrix in sorted(matrices, key=lambda x: x['name']): + skill = matrix.get('skill_md') or '—' + install = f"`cli-hub matrix install {matrix['name']}`" + lines.append( + f"| **{matrix['display_name']}** | {matrix['description']} | {len(matrix.get('clis', []))} | {install} | `{skill}` |" + ) lines.append("") @@ -119,6 +165,7 @@ def main(): "- **uv CLIs**: installed via `uv tool install`", "- **brew/script CLIs**: installed via the tool's native installer", "- **bundled CLIs**: detected from PATH (pre-installed with the host app)", + "- **Matrices**: install a curated set of harness and public CLIs in one command", "", "## Harness CLI Usage Pattern", "", @@ -152,7 +199,10 @@ def main(): output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text('\n'.join(lines) + '\n') - print(f"Generated meta-skill with {len(data['clis'])} harness CLIs + {len(public_clis)} public CLIs ({total_count} total) at {output_path}") + print( + f"Generated meta-skill with {len(data['clis'])} harness CLIs + " + f"{len(public_clis)} public CLIs + {len(matrices)} matrices at {output_path}" + ) if __name__ == '__main__': main() diff --git a/.github/workflows/deploy-pages.yml b/.github/workflows/deploy-pages.yml index b57c00bc7..8e55a3edc 100644 --- a/.github/workflows/deploy-pages.yml +++ b/.github/workflows/deploy-pages.yml @@ -8,7 +8,9 @@ on: - '*/agent-harness/**' - 'registry.json' - 'public_registry.json' + - 'matrix_registry.json' - 'cli-hub/**' + - 'cli-hub-matrix/**' - 'docs/hub/**' - '.github/workflows/deploy-pages.yml' - '.github/scripts/update_registry_dates.py' @@ -63,12 +65,25 @@ jobs: cp registry.json docs/hub/registry.json cp public_registry.json docs/hub/public_registry.json + - name: Copy matrix_registry.json to hub for cli-hub access + run: cp matrix_registry.json docs/hub/matrix_registry.json + - name: Build with Jekyll uses: actions/jekyll-build-pages@v1 with: source: ./docs/hub destination: ./docs/_site + # Copied after the Jekyll build so SKILL.md front matter is served + # verbatim instead of being converted to HTML. Published at + # https://hkuds.github.io/CLI-Anything/matrix//{SKILL.md,references/,scripts/} + # (consumed by cli_hub.matrix_skill's published-URL fallback). + - name: Copy matrix skill content into site + run: | + sudo mkdir -p docs/_site/matrix + sudo rsync -a --exclude='__pycache__' --exclude='*.pyc' --exclude='*.pyo' \ + cli-hub-matrix/ docs/_site/matrix/ + - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: diff --git a/.gitignore b/.gitignore index 8c199271e..bab309f16 100644 --- a/.gitignore +++ b/.gitignore @@ -326,18 +326,14 @@ assets/gen_typing_gif.py !/registry.json !/public_registry.json !/matrix_registry.json -!/docs/ -/docs/* -!/docs/PREVIEW_PROTOCOL.md -!/docs/PREVIEW_PROGRESS.md -!/docs/FREECAD_VIDEO_REFERENCE.md -!/docs/PREVIEW_MECHANISM_PROGRESS.md -!/docs/scripts/ -!/docs/scripts/** -/docs/scripts/__pycache__/ -/docs/scripts/**/*.pyc -!/docs/hub/ -/docs/hub/registry-dates.json +# docs/* is always ignored — working documents stay local and are never committed. +# (Previously-tracked files under docs/ remain in the index until explicitly removed.) +/docs/ +# CLI-Matrix is unshipped/confidential — never track anything under docs/cli-matrix/. +/docs/cli-matrix/ + +# Build-time vendored matrix skill content (generated by cli-hub setup.py build_py/sdist) +/cli-hub/cli_hub/_matrix_data/ !/notebooklm/ /notebooklm/* /notebooklm/.* diff --git a/audacity/agent-harness/cli_anything/audacity/audacity_cli.py b/audacity/agent-harness/cli_anything/audacity/audacity_cli.py index 84f4e9328..ec0bd14ae 100644 --- a/audacity/agent-harness/cli_anything/audacity/audacity_cli.py +++ b/audacity/agent-harness/cli_anything/audacity/audacity_cli.py @@ -40,6 +40,7 @@ _session: Optional[Session] = None _json_output = False _repl_mode = False +_dry_run = False def get_session() -> Session: @@ -63,6 +64,15 @@ def output(data, message: str = ""): click.echo(str(data)) +def autosave_session_if_needed() -> None: + """Persist one-shot mutations immediately when working from a project file.""" + if _repl_mode or _dry_run: + return + sess = get_session() + if sess.has_project() and sess._modified and sess.project_path: + sess.save_session() + + def _print_dict(d: dict, indent: int = 0): prefix = " " * indent for k, v in d.items(): @@ -129,8 +139,9 @@ def cli(ctx, use_json, project_path, dry_run): Run without a subcommand to enter interactive REPL mode. """ - global _json_output + global _json_output, _dry_run _json_output = use_json + _dry_run = dry_run if project_path: sess = get_session() @@ -229,6 +240,7 @@ def project_settings(sample_rate, bit_depth, channels): sess.snapshot("Change settings") result = proj_mod.set_settings(proj, sample_rate, bit_depth, channels) output(result, "Settings updated:") + autosave_session_if_needed() else: output(proj.get("settings", {}), "Project settings:") @@ -264,6 +276,7 @@ def track_add(name, track_type, volume, pan): volume=volume, pan=pan, ) output(result, f"Added track: {result['name']}") + autosave_session_if_needed() @track.command("remove") @@ -275,6 +288,7 @@ def track_remove(index): sess.snapshot(f"Remove track {index}") removed = track_mod.remove_track(sess.get_project(), index) output(removed, f"Removed track: {removed.get('name', '')}") + autosave_session_if_needed() @track.command("list") @@ -298,6 +312,7 @@ def track_set(index, prop, value): result = track_mod.set_track_property(sess.get_project(), index, prop, value) output({"track": index, "property": prop, "value": value}, f"Set track {index} {prop} = {value}") + autosave_session_if_needed() # -- Clip Commands --------------------------------------------------------- @@ -336,6 +351,7 @@ def clip_add(track_index, source, name, start, end, trim_start, trim_end, volume trim_start=trim_start, trim_end=trim_end, volume=volume, ) output(result, f"Added clip: {result['name']}") + autosave_session_if_needed() @clip.command("remove") @@ -348,6 +364,7 @@ def clip_remove(track_index, clip_index): sess.snapshot(f"Remove clip {clip_index} from track {track_index}") removed = clip_mod.remove_clip(sess.get_project(), track_index, clip_index) output(removed, f"Removed clip: {removed.get('name', '')}") + autosave_session_if_needed() @clip.command("trim") @@ -365,6 +382,7 @@ def clip_trim(track_index, clip_index, trim_start, trim_end): trim_start=trim_start, trim_end=trim_end, ) output(result, "Clip trimmed") + autosave_session_if_needed() @clip.command("split") @@ -380,6 +398,7 @@ def clip_split(track_index, clip_index, split_time): sess.get_project(), track_index, clip_index, split_time, ) output(result, f"Split clip into 2 parts at {split_time}s") + autosave_session_if_needed() @clip.command("move") @@ -395,6 +414,7 @@ def clip_move(track_index, clip_index, new_start): sess.get_project(), track_index, clip_index, new_start, ) output(result, f"Moved clip to {new_start}s") + autosave_session_if_needed() @clip.command("list") @@ -455,6 +475,7 @@ def effect_add(name, track_index, param): sess.snapshot(f"Add effect {name} to track {track_index}") result = fx_mod.add_effect(sess.get_project(), name, track_index, params) output(result, f"Added effect: {name}") + autosave_session_if_needed() @effect_group.command("remove") @@ -467,6 +488,7 @@ def effect_remove(effect_index, track_index): sess.snapshot(f"Remove effect {effect_index} from track {track_index}") result = fx_mod.remove_effect(sess.get_project(), effect_index, track_index) output(result, f"Removed effect {effect_index}") + autosave_session_if_needed() @effect_group.command("set") @@ -486,6 +508,7 @@ def effect_set(effect_index, param, value, track_index): fx_mod.set_effect_param(sess.get_project(), effect_index, param, value, track_index) output({"effect": effect_index, "param": param, "value": value}, f"Set effect {effect_index} {param} = {value}") + autosave_session_if_needed() @effect_group.command("list") @@ -514,6 +537,7 @@ def selection_set(start, end): sess = get_session() result = sel_mod.set_selection(sess.get_project(), start, end) output(result, f"Selection: {start}s - {end}s") + autosave_session_if_needed() @selection.command("all") @@ -523,6 +547,7 @@ def selection_all(): sess = get_session() result = sel_mod.select_all(sess.get_project()) output(result, "Selected all") + autosave_session_if_needed() @selection.command("none") @@ -532,6 +557,7 @@ def selection_none(): sess = get_session() result = sel_mod.select_none(sess.get_project()) output(result, "Selection cleared") + autosave_session_if_needed() @selection.command("info") @@ -561,6 +587,7 @@ def label_add(start, end, text): sess.snapshot(f"Add label at {start}") result = label_mod.add_label(sess.get_project(), start, end, text) output(result, f"Added label: {text or f'at {start}s'}") + autosave_session_if_needed() @label.command("remove") @@ -572,6 +599,7 @@ def label_remove(index): sess.snapshot(f"Remove label {index}") removed = label_mod.remove_label(sess.get_project(), index) output(removed, f"Removed label: {removed.get('text', '')}") + autosave_session_if_needed() @label.command("list") @@ -671,6 +699,7 @@ def session_undo(): sess = get_session() desc = sess.undo() output({"undone": desc}, f"Undone: {desc}") + autosave_session_if_needed() @session_group.command("redo") @@ -680,6 +709,7 @@ def session_redo(): sess = get_session() desc = sess.redo() output({"redone": desc}, f"Redone: {desc}") + autosave_session_if_needed() @session_group.command("history") diff --git a/audacity/agent-harness/cli_anything/audacity/tests/test_full_e2e.py b/audacity/agent-harness/cli_anything/audacity/tests/test_full_e2e.py index cafc23ade..6339f2971 100644 --- a/audacity/agent-harness/cli_anything/audacity/tests/test_full_e2e.py +++ b/audacity/agent-harness/cli_anything/audacity/tests/test_full_e2e.py @@ -707,6 +707,26 @@ def test_cli_export_presets(self): assert result.returncode == 0 assert "wav" in result.stdout.lower() + def test_cli_project_mutations_persist_to_disk(self, tmp_dir, sine_wav): + project_path = os.path.join(tmp_dir, "persist.json") + + result = self._run_cli(["project", "new", "--name", "Persist", "-o", project_path]) + assert result.returncode == 0 + assert os.path.exists(project_path) + + result = self._run_cli(["--project", project_path, "track", "add", "--name", "Music"]) + assert result.returncode == 0 + with open(project_path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + assert len(payload["tracks"]) == 1 + assert payload["tracks"][0]["name"] == "Music" + + result = self._run_cli(["--project", project_path, "clip", "add", "0", sine_wav]) + assert result.returncode == 0 + with open(project_path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + assert len(payload["tracks"][0]["clips"]) == 1 + # ── True Backend E2E Tests (requires SoX installed) ────────────── diff --git a/cli-hub-matrix/3d-cad/SKILL.md b/cli-hub-matrix/3d-cad/SKILL.md new file mode 100644 index 000000000..04791fc43 --- /dev/null +++ b/cli-hub-matrix/3d-cad/SKILL.md @@ -0,0 +1,232 @@ +--- +name: cli-hub-matrix-3d-cad +description: >- + Capability-based multi-tool matrix for 3D modeling, CAD, point clouds, rendering, + GPU debugging, and fabrication. Covers mesh/parametric/photogrammetry and the + path from idea to printed part or game-ready asset. +--- + +# 3D & CAD Matrix (S3 — v2 capability-based) + +Scenario **S3**. Strongest technical scenario in the matrix by depth — Blender + FreeCAD + CloudCompare + RenderDoc is rare coverage. Structural gaps: **texturing** and **fabrication slicing**. + +Schema: [`docs/cli-matrix/matrix_registry.schema.md`](../../docs/cli-matrix/matrix_registry.schema.md). Matrix plan: [`docs/cli-matrix/cli-matrix-plan.md`](../../docs/cli-matrix/cli-matrix-plan.md). + +## Install + +```bash +cli-hub matrix install 3d-cad +cli-hub matrix info 3d-cad +cli-hub matrix preflight 3d-cad --json +``` + +--- + +## Provider selection constraints + +1. Use preflight as an availability report, not as a provider selector. +2. Treat provider order as documentation order only. +3. Choose from user requirements, output quality bar, offline needs, credential state, install cost, and provider notes. +4. Escalate to paid or metered APIs only when credentials are already present or the user explicitly consents. + +Offline context? Most of this matrix is offline-first; rendering and photogrammetry are the exceptions. + +--- + +## Preflight + +Run `cli-hub matrix preflight 3d-cad --json` first. Use the manual block below for extra probes or older `cli-hub` versions. + +```bash +cli-hub list --json +python - <<'PY' +import importlib.util +for m in ("trimesh","pymeshlab","open3d","cadquery","build123d","solid", + "pygltflib","laspy","pyvista","bpy","rawpy","pycam"): + print(m, importlib.util.find_spec(m) is not None) +PY +for b in blender freecad meshroom colmap openmvg openmvs prusaslicer slic3r \ + cura CuraEngine apitrace vulkaninfo renderdoc OpenSCAD; do + command -v "$b" >/dev/null && echo "$b: yes" || echo "$b: no" +done +for e in POLYCAM_API_KEY LUMA_API_KEY ONSHAPE_ACCESS_KEY FUSION360_TOKEN \ + OCTANE_TOKEN; do + [ -n "${!e}" ] && echo "$e: set" || echo "$e: unset" +done +``` + +--- + +## Suggest-to-user template + +``` +To enable via , please set . + Cost: + Quality: +Reply 'skip' to fall back to . +``` + +Example: *To enable cloud photogrammetry via Luma AI, please set `LUMA_API_KEY`. Cost: metered per-scan. Quality: sota. Reply 'skip' to fall back to local `Meshroom` (slower, free).* + +--- + +## Capabilities + +### `model.mesh` — mesh modeling / editing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-blender` | harness-cli | Blender installed | free | sota | yes | +| `trimesh` | python | pkg | free | high | yes | +| `pymeshlab` | python | pkg | free | high | yes | +| `open3d` | python | pkg | free | high | yes | +| `pygalmesh` | python | pkg + CGAL | free | high | yes | + +### `model.parametric` — parametric CAD + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-freecad` | harness-cli | FreeCAD installed | free | sota | yes | +| `cadquery` | python | pkg | free | high | yes | +| `build123d` | python | pkg | free | high | yes | +| `OpenSCAD` | native | binary | free | high | yes | +| `SolidPython` | python | pkg + OpenSCAD | free | good | yes | +| Onshape API | api | `ONSHAPE_ACCESS_KEY` | paid | sota | no | +| Fusion 360 API | api | `FUSION360_TOKEN` | paid | sota | no | + +### `model.sculpt` — digital sculpting + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-blender` (sculpt mode, scripted) | harness-cli | Blender | free | good | yes | +| ZBrush / Nomad | — | — | — | — | — | + +Known gap — agent-driven sculpting is impractical beyond Blender scripts. + +### `pointcloud.process` — point cloud ingest / clean / register + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-cloudcompare` | harness-cli | CloudCompare | free | sota | yes | +| `cli-anything-cloudanalyzer` | harness-cli | harness installed | free | high | yes | +| `open3d` | python | pkg | free | high | yes | +| `laspy` | python | pkg | free | high | yes | +| `pdal` | native | binary | free | sota | yes | +| `pyvista` | python | pkg | free | high | yes | + +### `photogrammetry.reconstruct` — images → 3D mesh + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `Meshroom` | native | binary | free | high | yes | +| `colmap` | native | binary | free | high | yes | +| `OpenMVG` + `OpenMVS` | native | binaries | free | high | yes | +| RealityCapture | native | commercial license | paid | sota | yes | +| Polycam API | api | `POLYCAM_API_KEY` | metered | sota | no | +| Luma AI | api | `LUMA_API_KEY` | metered | sota | no | + +### `material.texture` — PBR texturing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-blender` (shader nodes, scripted) | harness-cli | Blender | free | good | yes | +| `bpy` PBR synthesis | python | pkg | free | good | yes | +| `Pillow` procedural maps | python | pkg | free | basic | yes | +| Substance 3D / Polycam Materials / Adobe Sampler | — | — | — | — | — | + +**Known gap** — no agent-native path approaches Substance quality. + +### `render.preview` — fast viewport renders + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-blender` (Eevee) | harness-cli | Blender | free | high | yes | +| `cli-anything-godot` (viewport screenshot) | harness-cli | Godot | free | good | yes | + +### `render.offline` — path-traced / final renders + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-blender` (Cycles) | harness-cli | Blender + GPU | free | sota | yes | +| `bpy` + Cycles | python | pkg + GPU | free | sota | yes | +| Octane Cloud | api | `OCTANE_TOKEN` | paid | sota | no | +| RenderMan on offline farm | — | license + farm | paid | sota | depends | + +### `gpu.debug` — capture / inspect GPU frames + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-renderdoc` | harness-cli | RenderDoc installed | free | sota | yes | +| `apitrace` | native | binary | free | high | yes | +| `vulkan-tools` | native | binary | free | good | yes | + +### `fabricate.slice` — 3D print slicer (mesh → gcode) + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `PrusaSlicer` CLI | native | binary | free | sota | yes | +| `CuraEngine` | native | binary | free | high | yes | +| `slic3r` | native | binary | free | good | yes | +| Bambu Studio / OrcaSlicer (API-driven) | — | — | — | — | — | + +**Known gap** — no agent-native harness, though the natives are scriptable. + +### `fabricate.cam` — CAM / CNC toolpaths + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-freecad` (Path workbench, scripted) | harness-cli | FreeCAD | free | good | yes | +| `pycam` | python | pkg | free | good | yes | +| `kiri:moto` | native | binary | free | good | yes | +| Fusion 360 CAM / Onshape CAM | api | tokens | paid | sota | no | + +### `export.engine` — handoff to game / viz engine + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-godot` | harness-cli | Godot | free | high | yes | +| `cli-anything-blender` (glTF/FBX export) | harness-cli | Blender | free | sota | yes | +| `pygltflib` | python | pkg | free | high | yes | +| `trimesh` glTF writer | python | pkg | free | good | yes | +| Unity Asset Pipeline / Unreal Datasmith | — | — | — | — | — | + +--- + +## Recipes + +- **`printable-part`** — parametric CAD → mesh → slice → gcode. + Uses: `model.parametric`, `model.mesh`, `fabricate.slice`. + +- **`game-asset`** — model → texture → export to engine. + Uses: `model.mesh`, `material.texture`, `export.engine`, (optional `render.preview`). + +- **`reality-scan`** — photos → photogrammetry → clean mesh → export. + Uses: `photogrammetry.reconstruct`, `pointcloud.process`, `model.mesh`, `export.engine`. + +- **`cnc-part`** — parametric part → CAM toolpath → gcode. + Uses: `model.parametric`, `fabricate.cam`. + +- **`product-viz-still`** — CAD → import → scene → offline render. + Uses: `model.parametric`, `model.mesh`, `material.texture`, `render.offline`. + +- **`gpu-bug-repro`** — capture frame → analyze → patch shader. + Uses: `gpu.debug`, `render.preview`. + +--- + +## Known gaps + +- **`material.texture`** — no open equivalent to Substance; Blender shader nodes are the current ceiling. +- **`fabricate.slice`** — PrusaSlicer/Cura natives work but want a harness-level wrapper. +- **`model.sculpt`** — impractical for agents today. +- **Unity / Unreal export** — only Godot covered on the harness side. + +--- + +## Agent guidance + +- **Parametric first, mesh later.** Start in `model.parametric` when the object has dimensions; `model.mesh` when it's organic. +- **Always bake before export.** When handing off to `export.engine`, apply modifiers and bake materials — runtime engines don't read Blender shader graphs. +- **Check manifold before slicing.** `fabricate.slice` fails silently on non-manifold meshes; use `trimesh.Trimesh.is_volume` or pymeshlab's manifold check first. +- **Offline-capable baseline.** This matrix is highly offline-capable — API escalation is rarely needed except for photogrammetry-at-scale. +- **Workspace discipline.** Keep `.FCStd` / `.blend` / `.ply` / `.stl` / `.gcode` in one project folder so the chain of transformations is reproducible. diff --git a/cli-hub-matrix/game-development/SKILL.md b/cli-hub-matrix/game-development/SKILL.md new file mode 100644 index 000000000..01fa37088 --- /dev/null +++ b/cli-hub-matrix/game-development/SKILL.md @@ -0,0 +1,213 @@ +--- +name: cli-hub-matrix-game-development +description: >- + Capability-based multi-tool matrix for game development: engine, 3D/2D/audio assets, + AI-generated assets, agent playtesting, packaging, store publishing, and telemetry. +--- + +# Game Development Matrix (S4 — v2 capability-based) + +Scenario **S4**. All asset-creation capabilities covered via the S3/S5 stacks; the structural gap is **distribution** (Steam/itch/console stores) and alternative engines (Unity/Unreal). + +Schema: [`docs/cli-matrix/matrix_registry.schema.md`](../../docs/cli-matrix/matrix_registry.schema.md). Matrix plan: [`docs/cli-matrix/cli-matrix-plan.md`](../../docs/cli-matrix/cli-matrix-plan.md). + +## Install + +```bash +cli-hub matrix install game-development +cli-hub matrix info game-development +cli-hub matrix preflight game-development --json +``` + +--- + +## Provider selection constraints + +1. Use preflight as an availability report, not as a provider selector. +2. Treat provider order as documentation order only. +3. Choose from user requirements, output quality bar, offline needs, credential state, install cost, and provider notes. +4. Escalate to paid or metered APIs only when credentials are already present or the user explicitly consents. + +--- + +## Preflight + +Run `cli-hub matrix preflight game-development --json` first. Use the manual block below for extra probes or older `cli-hub` versions. + +```bash +cli-hub list --json +python - <<'PY' +import importlib.util +for m in ("pygame","arcade","panda3d","ursina","trimesh","pygltflib","PIL", + "pydub","librosa","music21","mido","abjad","diffusers","replicate", + "posthog","gymnasium"): + print(m, importlib.util.find_spec(m) is not None) +PY +for b in godot blender aseprite butler steamcmd ffmpeg pyinstaller; do + command -v "$b" >/dev/null && echo "$b: yes" || echo "$b: no" +done +for e in STEAM_USERNAME STEAM_PASSWORD ITCH_API_KEY EPIC_DEV_TOKEN \ + PLAY_CONSOLE_KEY APP_STORE_CONNECT_KEY \ + REPLICATE_API_TOKEN OPENAI_API_KEY STABILITY_API_KEY IDEOGRAM_API_KEY \ + POSTHOG_API_KEY GAMEANALYTICS_KEY PLAYFAB_TITLE_KEY SENTRY_DSN; do + [ -n "${!e}" ] && echo "$e: set" || echo "$e: unset" +done +``` + +--- + +## Suggest-to-user template + +``` +To enable via , please set . + Cost: + Quality: +Reply 'skip' to fall back to . +``` + +Example: *To publish to Steam, please install `steamcmd` and set `STEAM_USERNAME` + `STEAM_PASSWORD` (use a dedicated build account to limit credential risk). Cost: one-time $100 Steamworks fee. Reply 'skip' to fall back to itch.io via `butler` which only needs `ITCH_API_KEY`.* + +--- + +## Capabilities + +### `game.engine` — engine authoring / project manipulation + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-godot` | harness-cli | Godot installed | free | sota | yes | +| `pygame` | python | pkg | free | good | yes | +| `arcade` | python | pkg | free | good | yes | +| `panda3d` | python | pkg | free | high | yes | +| `ursina` | python | pkg | free | good | yes | +| Unity / Unreal | — | — | — | — | — | + +### `asset.3d` — 3D game assets + +See S3 matrix. Primary: `cli-anything-blender`, `cli-anything-freecad`, `trimesh`, `pygltflib`. + +### `asset.2d` — sprites, tiles, UI art + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-krita` | harness-cli | Krita installed | free | sota | yes | +| `cli-anything-inkscape` | harness-cli | Inkscape installed | free | sota | yes | +| `Pillow` | python | pkg | free | good | yes | +| `aseprite` | native | binary (commercial) | paid | sota | yes | +| `pyxel` | python | pkg | free | good | yes | +| OpenAI GPT-Image-1 | api | `OPENAI_API_KEY` | metered | sota | no | +| Scenario GG | api | `SCENARIO_API_KEY` | paid | sota | no | +| Leonardo AI | api | `LEONARDO_API_KEY` | paid | high | no | + +### `asset.audio` — sfx and gameplay audio + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-audacity` | harness-cli | Audacity | free | high | yes | +| `pydub` / `librosa` / `soundfile` | python | pkg | free | good | yes | +| `sfxr` / `jsfxr` | native | binary | free | good | yes | +| ElevenLabs SFX | api | `ELEVENLABS_API_KEY` | paid | sota | no | +| Mubert | api | `MUBERT_API_KEY` | metered | high | no | + +### `asset.notation` — music notation / score export + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-musescore` | harness-cli | MuseScore installed | free | sota | yes | +| `music21` | python | pkg | free | high | yes | +| `mido` | python | pkg | free | good | yes | +| `abjad` | python | pkg | free | high | yes | + +### `ai.gen-asset` — AI-generated art/audio for games + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-comfyui` | harness-cli | ComfyUI installed | free | high | yes | +| `jimeng` | public-cli | bin | metered | high | no | +| `diffusers` | python | pkg + weights | free | good | yes | +| `replicate` | python | `REPLICATE_API_TOKEN` | metered | high | no | +| Scenario GG | api | `SCENARIO_API_KEY` | paid | sota | no | +| Leonardo AI | api | `LEONARDO_API_KEY` | paid | high | no | +| OpenAI GPT-Image-1 | api | `OPENAI_API_KEY` | metered | sota | no | +| Stability | api | `STABILITY_API_KEY` | metered | high | no | + +### `playtest.agent` — agent-driven play / testing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-slay-the-spire-ii` | harness-cli | StS II + bridge mod | free | sota | yes | +| `gymnasium` envs | python | pkg | free | high | yes | +| Headless game + scripted input (e.g. Godot `--headless` with GUT) | harness-cli | Godot | free | good | yes | + +Distinctive capability — very few agent stacks can drive a live GUI game loop. + +### `build.package` — package a game binary / bundle + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-godot` (export presets) | harness-cli | Godot export templates | free | sota | yes | +| `pyinstaller` (pygame/arcade) | python | pkg | free | good | yes | +| `butler` (itch packager) | native | binary | free | high | yes | + +### `publish.store` — upload a build to a store + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `steamcmd` | native | binary + Steam account + app ID | paid (fee) | sota | no | +| `butler` (itch.io) | native | binary + `ITCH_API_KEY` | free | high | no | +| Epic Games Services | api | `EPIC_DEV_TOKEN` | paid | high | no | +| Google Play Developer | api | `PLAY_CONSOLE_KEY` + service account | paid | high | no | +| App Store Connect | api | `APP_STORE_CONNECT_KEY` | paid | high | no | + +**Known gap** — no harness-level abstraction; escalate via the suggest-to-user template with platform-specific preflight. + +### `telemetry.ingest` — gameplay analytics / crash reports + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `sentry` | public-cli | bin + `SENTRY_DSN` | free-paid | high | no | +| `posthog` | python | pkg + `POSTHOG_API_KEY` | free-paid | high | no | +| GameAnalytics | api | `GAMEANALYTICS_KEY` | free | good | no | +| Unity Analytics | api | Unity project token | metered | high | no | +| PlayFab | api | `PLAYFAB_TITLE_KEY` | metered | high | no | + +--- + +## Recipes + +- **`godot-jam-game`** — minimal end-to-end for a game jam. + Uses: `game.engine`, `asset.2d`, `asset.audio`, `build.package`, (optional `publish.store`). + +- **`agent-bot`** — train / evaluate an agent against an existing game. + Uses: `playtest.agent`, `telemetry.ingest`. + +- **`ai-indie-short`** — AI-generated art + simple engine loop. + Uses: `ai.gen-asset`, `asset.2d`, `game.engine`, `asset.audio`, `build.package`. + +- **`steam-release`** — ship to Steam with crash reporting. + Uses: `build.package`, `publish.store`, `telemetry.ingest`. + +- **`music-driven-game`** — composed score integrated with gameplay. + Uses: `asset.notation`, `asset.audio`, `game.engine`. + +- **`reality-to-game`** — photogrammetry asset → import → ship. + Uses: (S3 `photogrammetry.reconstruct`), `asset.3d`, `game.engine`, `build.package`. + +--- + +## Known gaps + +- **`publish.store`** — Steam/itch/console wrappers are native-only; escalate with explicit credentials. +- **`game.engine` for Unity/Unreal** — only Godot harness exists. +- **`telemetry.ingest`** — Sentry works; no first-party for GameAnalytics/PlayFab. +- **`asset.notation`** — uniquely strong (MuseScore) but rarely used in practice; keep as a differentiator. + +--- + +## Agent guidance + +- **Start in Godot** unless the user has explicitly asked for Unity/Unreal — it's the only engine we harness. +- **Keep asset pipelines deterministic** — re-runnable seeds for `ai.gen-asset`, versioned source `.aseprite`/`.krita` files, scriptable Blender bakes. +- **`playtest.agent` needs a bridge.** Most commercial games lack API access; agents should ask the user if modding is permitted before attempting. +- **Version builds immediately.** `build.package` output goes into `builds//` with a manifest so `publish.store` has deterministic uploads. +- **Don't auto-publish.** `publish.store` must always be gated behind explicit user confirmation — uploading a broken build is expensive to roll back. diff --git a/cli-hub-matrix/image-design/SKILL.md b/cli-hub-matrix/image-design/SKILL.md new file mode 100644 index 000000000..5e8931a72 --- /dev/null +++ b/cli-hub-matrix/image-design/SKILL.md @@ -0,0 +1,218 @@ +--- +name: cli-hub-matrix-image-design +description: >- + Capability-based multi-tool matrix for image and graphic design: AI generation, + raster/vector editing, UI mockups, diagrams, upscaling, photo library, and publishing. +--- + +# Image & Graphic Design Matrix (S5 — v2 capability-based) + +Scenario **S5**. Deepest creation coverage after S2 — raster (GIMP + Krita), vector (Inkscape), AI (ComfyUI + Jimeng + 7 APIs), diagram (drawio + mermaid). **Figma** is the headline gap (also affects S7). + +Schema: [`docs/cli-matrix/matrix_registry.schema.md`](../../docs/cli-matrix/matrix_registry.schema.md). Matrix plan: [`docs/cli-matrix/cli-matrix-plan.md`](../../docs/cli-matrix/cli-matrix-plan.md). + +## Install + +```bash +cli-hub matrix install image-design +cli-hub matrix info image-design +cli-hub matrix preflight image-design --json +``` + +--- + +## Provider selection constraints + +1. Use preflight as an availability report, not as a provider selector. +2. Treat provider order as documentation order only. +3. Choose from user requirements, output quality bar, offline needs, credential state, install cost, and provider notes. +4. Escalate to paid or metered APIs only when credentials are already present or the user explicitly consents. AI image is paid-API-diverse, so ask which provider the user wants when several credentialed choices are available. + +Offline context? GIMP, Krita, Inkscape, and local `diffusers` cover almost everything offline. + +--- + +## Preflight + +Run `cli-hub matrix preflight image-design --json` first. Use the manual block below for extra probes or older `cli-hub` versions. + +```bash +cli-hub list --json +python - <<'PY' +import importlib.util +for m in ("PIL","cv2","skimage","rembg","svgwrite","cairosvg","svgpathtools","vpype", + "diffusers","replicate","rawpy"): + print(m, importlib.util.find_spec(m) is not None) +PY +for b in inkscape gimp krita drawio magick convert exiftool upscayl darktable-cli \ + rawtherapee-cli graphviz dot mermaid plantuml hugo mkdocs; do + command -v "$b" >/dev/null && echo "$b: yes" || echo "$b: no" +done +for e in OPENAI_API_KEY STABILITY_API_KEY IDEOGRAM_API_KEY RECRAFT_API_KEY \ + GOOGLE_API_KEY MIDJOURNEY_TOKEN FIGMA_TOKEN CANVA_API_KEY PENPOT_TOKEN \ + REPLICATE_API_TOKEN TOPAZ_API_KEY REMOVE_BG_API_KEY FIREFLY_TOKEN; do + [ -n "${!e}" ] && echo "$e: set" || echo "$e: unset" +done +``` + +--- + +## Suggest-to-user template + +``` +To enable via , please set . + Cost: + Quality: +Reply 'skip' to fall back to . +``` + +Examples: +- *To enable sota AI image generation via OpenAI GPT-Image-1, please set `OPENAI_API_KEY`. Cost: metered per-image. Quality: sota for photoreal + text rendering. Reply 'skip' to fall back to local `comfyui` (FLUX/SDXL).* +- *To round-trip UI mockups with Figma, please set `FIGMA_TOKEN`. Cost: free tier works for most users. Quality: sota (design system aware). Reply 'skip' to fall back to `sketch` or `drawio` wireframes.* + +--- + +## Capabilities + +### `visual.generate` — AI image generation + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-comfyui` | harness-cli | ComfyUI + weights | free | high | yes | +| `jimeng` | public-cli | `dreamina` bin | metered | high | no | +| `diffusers` (SD/SDXL/FLUX) | python | pkg + weights + GPU | free | good-high | yes | +| `replicate` | python | `REPLICATE_API_TOKEN` | metered | high | no | +| OpenAI GPT-Image-1 | api | `OPENAI_API_KEY` | metered | sota | no | +| Ideogram | api | `IDEOGRAM_API_KEY` | metered | sota | no | +| Stability AI | api | `STABILITY_API_KEY` | metered | high | no | +| Recraft | api | `RECRAFT_API_KEY` | paid | sota | no | +| Google Imagen / Nano Banana | api | `GOOGLE_API_KEY` | metered | high | no | +| Midjourney (Discord bridge) | api | `MIDJOURNEY_TOKEN` | paid | sota | no | +| Adobe Firefly | api | `FIREFLY_TOKEN` | paid | high | no | + +### `visual.edit.raster` — raster editing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-gimp` | harness-cli | GIMP installed | free | sota | yes | +| `cli-anything-krita` | harness-cli | Krita installed | free | sota | yes | +| `Pillow` | python | pkg | free | good | yes | +| `opencv-python` | python | pkg | free | high | yes | +| `scikit-image` | python | pkg | free | high | yes | +| `rembg` (background removal) | python | pkg | free | high | yes | +| `remove.bg` | api | `REMOVE_BG_API_KEY` | metered | sota | no | +| Adobe Firefly | api | `FIREFLY_TOKEN` | paid | sota | no | +| Cutout.pro | api | token | metered | high | no | + +### `visual.edit.vector` — vector editing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-inkscape` | harness-cli | Inkscape installed | free | sota | yes | +| `svgwrite` | python | pkg | free | good | yes | +| `cairosvg` | python | pkg | free | good | yes | +| `svgpathtools` | python | pkg | free | high | yes | +| `vpype` (vector post-processing) | python | pkg | free | high | yes | + +### `visual.mockup` — UI / mockup / wireframe + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-sketch` | harness-cli | Sketch installed (macOS) | paid app | sota | yes | +| `cli-anything-drawio` | harness-cli | drawio installed | free | high | yes | +| `drawio-desktop` | native | binary | free | high | yes | +| `penpot` | native | binary / self-host | free | high | yes | +| Figma API | api | `FIGMA_TOKEN` | free tier | sota | no | +| Canva API | api | `CANVA_API_KEY` | paid | high | no | +| Penpot API | api | `PENPOT_TOKEN` | free | high | no | + +**Known gap** — Figma is the cross-scenario gap (S5 and S7). + +### `visual.diagram` — diagrams / flowcharts + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-drawio` | harness-cli | drawio | free | sota | yes | +| `cli-anything-mermaid` | harness-cli | mermaid-cli | free | high | yes | +| `graphviz` (`dot`) | native | binary | free | high | yes | +| `plantuml` | native | binary + java | free | high | yes | + +### `visual.upscale` — super-resolution + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `Real-ESRGAN` | python | pkg + weights | free | high | yes | +| `GFPGAN` (faces) | python | pkg + weights | free | high | yes | +| `upscayl` | native | binary | free | high | yes | +| Topaz | api | `TOPAZ_API_KEY` | paid | sota | no | +| Replicate upscalers | python | `REPLICATE_API_TOKEN` | metered | high | no | + +### `photo.library` — DAM / photo organization + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `exiftool` + `Pillow` | native+python | binary+pkg | free | high | yes | +| `digikam` | native | binary | free | high | yes | +| `photoprism` | native | binary / self-host | free | high | yes | +| Google Photos API | api | OAuth | free tier | high | no | + +### `photo.develop` — RAW processing + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `rawpy` | python | pkg | free | high | yes | +| `darktable-cli` | native | binary | free | sota | yes | +| `rawtherapee-cli` | native | binary | free | sota | yes | +| Adobe Lightroom API | api | Adobe creds | paid | sota | no | + +### `publish.cms` — publish images / articles to a CMS + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `contentful` | public-cli | bin + token | metered | high | no | +| `sanity` | public-cli | bin + token | metered | high | no | +| `hugo` / `mkdocs` | native/python | binary/pkg | free | high | yes | +| WordPress REST | api | app password | free | high | no | + +--- + +## Recipes + +- **`social-card`** — generate + compose a social sharing image. + Uses: `visual.generate`, `visual.edit.raster`, `publish.cms`. + +- **`logo-set`** — design a logo + size variants. + Uses: `visual.edit.vector`, `visual.edit.raster` (rasterize), batch export. + +- **`ui-wireframe`** — mockup a UI flow. + Uses: `visual.mockup`, optional `visual.edit.vector` polish. + +- **`photo-batch`** — RAW ingest → develop → edit → library → publish. + Uses: `photo.develop`, `visual.edit.raster`, `photo.library`, `publish.cms`. + +- **`icon-pack`** — systematic icon production. + Uses: `visual.edit.vector`, batch rasterize via `cairosvg`. + +- **`thumbnail-set`** — thumbnails for a video/article series. + Uses: `visual.generate` (hero imagery), `visual.edit.raster` (layout, type), batch variant export. + +- **`ai-concept-art`** — generate → upscale → touch up. + Uses: `visual.generate`, `visual.upscale`, `visual.edit.raster`. + +--- + +## Known gaps + +- **Figma** — cross-scenario gap (UI/UX loop for S5 and S7). API escalation works but we lack a native harness. +- **`photo.library` / `photo.develop`** — strong native CLIs, no harness. +- **Midjourney** — Discord bridge only; no clean API. + +--- + +## Agent guidance + +- **Match generation provider to intent.** GPT-Image-1 for text-in-image and photoreal; Ideogram for poster/typography; FLUX for artistic; Midjourney for stylized beauty shots; Stability for control+speed. +- **Vector-first for design systems** — logos, icons, illustrations start in `visual.edit.vector`, rasterize last. +- **Preserve color profiles.** `exiftool` + `Pillow` handles ICC; when moving between `rawpy` and `Pillow`, convert to sRGB at the boundary. +- **AI + edit chain.** Agents should plan: generate → background removal (`rembg`) → composite (`visual.edit.raster`) → export. Skipping cleanup is the most common visible defect. +- **Workspace discipline.** Keep generation seeds + prompts in a manifest alongside output PNGs so variations are reproducible. diff --git a/cli-hub-matrix/knowledge-research/SKILL.md b/cli-hub-matrix/knowledge-research/SKILL.md new file mode 100644 index 000000000..688f997be --- /dev/null +++ b/cli-hub-matrix/knowledge-research/SKILL.md @@ -0,0 +1,241 @@ +--- +name: cli-hub-matrix-knowledge-research +description: >- + Capability-based multi-tool matrix for research, note-taking, document authoring, + and publishing. Agents compose providers (CLI-Anything harnesses, public CLIs, + Python libs, native binaries, cloud APIs) per capability. +--- + +# Knowledge / Office / Research Matrix (S2 — v2 capability-based) + +Scenario **S2**. Strongest coverage in the current ecosystem: search, reference management, notes, synthesis, documents, and diagrams all have first-party harnesses; Python/native fallbacks fill the rest. + +Schema: [`docs/cli-matrix/matrix_registry.schema.md`](../../docs/cli-matrix/matrix_registry.schema.md). Matrix plan: [`docs/cli-matrix/cli-matrix-plan.md`](../../docs/cli-matrix/cli-matrix-plan.md). + +## Install + +```bash +cli-hub matrix install knowledge-research +cli-hub matrix info knowledge-research +cli-hub matrix preflight knowledge-research --json +``` + +--- + +## Provider selection constraints + +1. Use preflight as an availability report, not as a provider selector. +2. Treat provider order as documentation order only. +3. Choose from user requirements, output quality bar, offline needs, credential state, install cost, and provider notes. +4. Escalate to paid or metered APIs only when credentials are already present or the user explicitly consents. + +Offline context? Filter to `offline: true` providers. + +--- + +## Preflight + +Run `cli-hub matrix preflight knowledge-research --json` first. Use the manual block below for extra probes or older `cli-hub` versions. + +```bash +cli-hub list --json +python - <<'PY' +import importlib.util +for m in ("pyzotero","bibtexparser","trafilatura","readability","httpx","playwright", + "python_docx","openpyxl","pptx","reportlab","pypdf","pdfplumber","weasyprint", + "langchain","llama_index","haystack"): + print(m, importlib.util.find_spec(m) is not None) +PY +for b in pandoc qpdf pdftk exiftool latexmk tex dot mermaid plantuml hugo mkdocs jekyll; do + command -v "$b" >/dev/null && echo "$b: yes" || echo "$b: no" +done +for e in OPENAI_API_KEY ANTHROPIC_API_KEY GOOGLE_API_KEY PERPLEXITY_API_KEY TAVILY_API_KEY \ + SERPAPI_API_KEY BRAVE_API_KEY FIRECRAWL_API_KEY \ + NOTION_API_KEY SEMANTIC_SCHOLAR_API_KEY WORDPRESS_TOKEN GHOST_ADMIN_KEY; do + [ -n "${!e}" ] && echo "$e: set" || echo "$e: unset" +done +``` + +--- + +## Suggest-to-user template + +``` +To enable via , please set . + Cost: + Quality: +Reply 'skip' to fall back to . +``` + +Example: *To enable live Notion sync via the Notion API, please set `NOTION_API_KEY`. Cost: free tier suffices for most users. Quality: sota (round-trips block/page IDs). Reply 'skip' to fall back to local Obsidian vault + manual paste.* + +--- + +## Capabilities + +### `research.search` — web / literature search + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-exa` | harness-cli | `EXA_API_KEY` | metered | sota | no | +| `cli-anything-browser` / `safari` / `clibrowser` | harness-cli | browser installed | free | high | no | +| `duckduckgo-search` | python | pkg | free | good | no | +| `googlesearch-python` | python | pkg | free | good | no | +| Perplexity API | api | `PERPLEXITY_API_KEY` | paid | sota | no | +| Tavily | api | `TAVILY_API_KEY` | metered | high | no | +| Brave Search | api | `BRAVE_API_KEY` | metered | good | no | +| Google Custom Search | api | `GOOGLE_API_KEY` | metered | high | no | + +### `research.retrieve` — fetch article text / scrape pages + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-browser` / `safari` / `clibrowser` | harness-cli | harness installed | free | high | no | +| `trafilatura` | python | pkg | free | high | yes | +| `readability-lxml` | python | pkg | free | good | yes | +| `httpx` + manual parse | python | pkg | free | basic | yes | +| `playwright` | python | pkg + browsers | free | sota | no | +| Firecrawl | api | `FIRECRAWL_API_KEY` | metered | sota | no | +| ScrapingBee | api | `SCRAPINGBEE_API_KEY` | paid | high | no | + +### `research.reference` — citations, BibTeX, references + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-zotero` | harness-cli | Zotero installed | free | sota | yes | +| `pyzotero` | python | pkg + Zotero key | free | high | no | +| `habanero` (Crossref) | python | pkg | free | high | no | +| `bibtexparser` | python | pkg | free | good | yes | +| Crossref REST | api | none | free | high | no | +| Semantic Scholar | api | `SEMANTIC_SCHOLAR_API_KEY` (optional) | free | high | no | + +### `knowledge.note` — note-taking / PKM + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-obsidian` | harness-cli | Obsidian installed | free | sota | yes | +| `cli-anything-mubu` | harness-cli | Mubu installed | free | high | no | +| `obsidian-cli` | public-cli | bin + vault path | free | high | yes | +| raw markdown + `git` | native | `git` | free | good | yes | +| `mdformat` | python | pkg | free | good | yes | +| Notion API | api | `NOTION_API_KEY` | free | sota | no | +| Roam / Logseq sync | — | — | — | — | — | + +### `knowledge.outline` — outlining + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-mubu` | harness-cli | Mubu installed | free | high | no | +| OPML via `lxml` | python | pkg | free | good | yes | +| YAML outlines + `yq` | native | binary | free | good | yes | + +### `knowledge.synthesize` — multi-doc synthesis / summarization + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-notebooklm` | harness-cli | harness installed | metered | sota | no | +| `cli-anything-intelwatch` | harness-cli | harness installed | free-metered | high | partial | +| `langchain` / `llama_index` + local LLM | python | pkg + ollama | free | high | yes | +| `haystack` | python | pkg | free | good | partial | +| Claude / OpenAI / Gemini (long context) | api | model key | metered | sota | no | + +### `document.author` — write docs (DOCX, XLSX, PPTX, PDF) + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-libreoffice` | harness-cli | LibreOffice installed | free | high | yes | +| `cli-anything-anygen` | harness-cli | harness installed | free | high | yes | +| `python-docx` / `openpyxl` / `python-pptx` / `reportlab` | python | pkg | free | good | yes | +| Google Docs API | api | OAuth token | metered | sota | no | +| Microsoft Graph | api | Azure app + token | metered | sota | no | + +### `document.format` — convert / reformat + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `pandoc` | native | binary | free | sota | yes | +| `cli-anything-libreoffice` (headless convert) | harness-cli | LibreOffice | free | high | yes | +| `docx2pdf` | python | pkg + Word/LO | free | good | yes | +| `weasyprint` | python | pkg | free | high | yes | +| CloudConvert | api | `CLOUDCONVERT_API_KEY` | metered | sota | no | + +### `document.pdf` — manipulate PDFs (split, merge, OCR, extract) + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `qpdf` / `pdftk` | native | binary | free | sota | yes | +| `pypdf` | python | pkg | free | high | yes | +| `pdfplumber` | python | pkg | free | high | yes | +| `pdf2image` | python | pkg + poppler | free | good | yes | +| `ocrmypdf` | native | binary + tesseract | free | high | yes | +| Adobe PDF Services | api | Adobe creds | paid | sota | no | + +### `diagram.create` — diagrams, flowcharts, architecture + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-drawio` | harness-cli | drawio installed | free | sota | yes | +| `cli-anything-mermaid` | harness-cli | mermaid-cli | free | high | yes | +| `graphviz` (`dot`) | native | binary | free | high | yes | +| `plantuml` | native | binary | free | high | yes | + +### `publish.web` — publish articles / blog posts + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `contentful` | public-cli | bin + space token | metered | high | no | +| `sanity` | public-cli | bin + project token | metered | high | no | +| `hugo` / `mkdocs` / `jekyll` / `pelican` | native/python | binary/pkg | free | high | yes | +| WordPress REST | api | app password | free | high | no | +| Ghost Admin | api | `GHOST_ADMIN_KEY` | free | high | no | +| Medium (legacy) | api | token | free | good | no | + +### `publish.latex` — build PDFs from LaTeX + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `latexmk` + `texlive` | native | binaries | free | sota | yes | +| `pylatex` | python | pkg + texlive | free | high | yes | +| Overleaf API | api | `OVERLEAF_API_KEY` | paid | sota | no | +| Tectonic | native | binary | free | high | yes | + +--- + +## Recipes + +- **`literature-review`** — survey a topic, extract key papers, synthesize. + Uses: `research.search`, `research.retrieve`, `research.reference`, `knowledge.synthesize`, `document.author`. + +- **`meeting-to-doc`** — turn a recorded meeting into a structured document. + Uses: `text.transcribe` (via S1 matrix), `knowledge.synthesize`, `document.author`, `document.format`. + +- **`blog-post`** — research → write → publish. + Uses: `research.search`, `research.retrieve`, `knowledge.synthesize`, `document.author`, `publish.web`. + +- **`paper-draft`** — research + citations + LaTeX. + Uses: `research.reference`, `knowledge.synthesize`, `document.author`, `publish.latex`. + +- **`competitive-brief`** — fetch competitor pages, extract positioning, produce a briefing deck. + Uses: `research.retrieve`, `knowledge.synthesize`, `document.author` (PPTX). + +- **`pdf-book-digest`** — ingest a PDF book, OCR if needed, produce chapter summaries + quote file. + Uses: `document.pdf`, `knowledge.synthesize`, `knowledge.note`. + +--- + +## Known gaps + +- **Notion** — Notion API exists as an escalation, but there is no harness for local-first Notion workflows. Recurs in S11 (team comms). +- **Google Docs / Microsoft 365 live editing** — API path only. +- **LaTeX harness** — `latexmk` + Tectonic cover the native path, but no agent-native wrapper yet. +- **Translation** — no first-party CLI; Python fallback and APIs only. + +--- + +## Agent guidance + +- **Run preflight once per session.** Many capabilities have strong Python or native paths that bypass the need to install anything. +- **Prefer local** for sensitive docs (legal, health, internal): use Obsidian + local LLM + `whisper` rather than cloud APIs. +- **Citations are load-bearing** — for academic work always route through `research.reference` so bibliographies stay consistent. +- **Chain `document.format` at the end**, not the middle: author in the richest source format (markdown or LaTeX) and convert last. +- **Workspace discipline** — one directory per project; keep `refs.bib`, source markdown, figures, and output PDFs colocated. diff --git a/cli-hub-matrix/video-creation/SKILL.md b/cli-hub-matrix/video-creation/SKILL.md new file mode 100644 index 000000000..26f14b7a1 --- /dev/null +++ b/cli-hub-matrix/video-creation/SKILL.md @@ -0,0 +1,536 @@ +--- +name: cli-hub-matrix-video-creation +description: >- + Capability-based multi-tool matrix for video production. Agents pick providers + (CLI-Anything harnesses, public CLIs, Python libs, native binaries, cloud APIs) + per capability rather than marching through fixed stages, including storyboard + planning, story/audio direction, source triage, internet video/music + search/download, capture/generation, analysis, sound design, high-end caption + design, NLE/render doctor investigation, review, and packaging. +--- + +# Video Creation Matrix (v3 — capability-based) + +This matrix describes **capabilities** the agent can compose on demand — not a fixed pipeline. A "video creation" workflow picks a *recipe* (which capabilities it needs) and, per capability, picks a *provider* from the task requirements and preflight facts below. + +Schema: [`docs/cli-matrix/matrix_registry.schema.md`](../../docs/cli-matrix/matrix_registry.schema.md). + +## Install (installable portion) + +```bash +cli-hub matrix install video-creation # installs registered matrix CLIs only +cli-hub matrix info video-creation # inspect providers & recipes +cli-hub matrix preflight video-creation # check available providers in this environment +``` + +Not everything in this matrix is installed by `cli-hub matrix install`. Cloud APIs, Python packages, native binaries, third-party public CLIs, and external skills are first-class providers too, but install them only after the task actually needs that provider. +Do not hand-write `pip install ...#subdirectory=...` for CLI-Anything matrix members; install the supported harnesses through `cli-hub matrix install video-creation`, then use preflight to see what else is already available. + +--- + +## Provider selection constraints (agent: evaluate per capability) + +1. Use preflight as an availability report, not as a provider selector. +2. Choose providers from the user's goal, quality bar, budget, offline needs, credential state, install cost, and requested workflow. +3. Treat registry/provider order as documentation order only; do not assume the first provider is the correct one. +4. Install Python libs, native binaries, harness CLIs, public CLIs, or agent skills only when they fit the task constraints. +5. Escalate to paid or metered APIs only when the user has supplied credentials or explicitly consents. Never silently call a paid API. +6. If a task falls into a genre with a mandatory provider rule, the provider is not optional. Do not substitute a cheaper/easier fallback just because it is locally convenient. + +Offline context? Filter to `offline: true` providers only. + +--- + +## Preflight (run once per session, cache the result) + +Run the built-in matrix preflight first: + +```bash +cli-hub matrix preflight video-creation --json +cli-hub matrix preflight video-creation --capability composite.assemble +cli-hub matrix preflight video-creation --offline +``` + +If you need raw checks or are running without the latest `cli-hub`, use the manual block: + +```bash +cli-hub list --json +python - <<'PY' +import importlib.util +for m in ("moviepy","whisper","pydub","PIL","edge_tts","pysrt","pysubs2","yt_dlp","spotdl","scenedetect","paddleocr","twelvelabs"): + print(m, importlib.util.find_spec(m) is not None) +PY +for b in ffmpeg ffprobe sox convert magick screencapture yt-dlp spotdl scdl bandcamp-dl you-get lux BBDown scenedetect mediainfo ffmpeg-quality-metrics paddleocr hyperframes; do command -v "$b" >/dev/null && echo "$b: yes" || echo "$b: no"; done +for e in RUNWAY_API_KEY KLING_API_KEY PIKA_API_KEY SEEDANCE_API_KEY \ + ELEVENLABS_API_KEY MINIMAX_API_KEY OPENAI_API_KEY GOOGLE_CLOUD_PROJECT \ + ASSEMBLYAI_API_KEY DEEPGRAM_API_KEY \ + SUNO_API_KEY UDIO_API_KEY IDEOGRAM_API_KEY STABILITY_API_KEY \ + TWELVELABS_API_KEY GOOGLE_APPLICATION_CREDENTIALS; do + [ -n "${!e}" ] && echo "$e: set" || echo "$e: unset" +done +``` + +--- + +## Suggest-to-user template (agent uses verbatim when escalating) + +``` +To enable via , please set . + Cost: + Quality: +Reply 'skip' to fall back to . +``` + +Examples: + +- *To enable cinematic AI video via Runway Gen-4, please set `RUNWAY_API_KEY`. Cost: ~$0.05/sec as of 2026-04. Quality: sota. Reply 'skip' to fall back to `generate-veo-video` or `jimeng` if configured.* +- *To enable ByteDance Seedance video generation, please set `SEEDANCE_API_KEY`. Cost: metered per-clip. Quality: sota for realistic motion. Reply 'skip' to fall back to `jimeng` (Dreamina) which shares the ByteDance model family.* + +--- + +## Capabilities + +### `script.storyboard` — brief to creative direction, script, shot list, timing, and asset plan + +Use this before generation, search, capture, or assembly when the user gives a vague concept or asks for a complete video. By default, do the planning directly as agent work: produce a structured brief, global creative direction, narrative/emotional arc, audio arc, script/narration, shot list, timing map, asset requirements, and reviewable storyboard before spending time on downloads or generation. + +For any non-trivial video, read [`references/story-structure-audio.md`](references/story-structure-audio.md) and save `creative_direction.md` before final assembly. This is mandatory for trailers, sports/music montages, film commentary, found-footage edits, product launch videos, and any output where flat random clips or boring music would fail the brief. + +Hard gate: do not begin final assembly for a non-trivial video until `creative_direction.md` exists. `plan.md` is not a substitute. The file must include target duration or requested duration range, output language, a shot-role table with time, beat, source/shot role, audio event, caption/title role, and failure risk; reject plans where the ending/final act has no payoff, climax, reveal, useful recap, or deliberate unresolved hook appropriate to the genre. Language rule: if the user specifies an output language, use it for all agent-authored viewer-facing content; otherwise use the language the user is using in the conversation. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| Agent-native planning | agent-native | none | free | high | yes | +| `storyboard-creation` skill | agent-skill | installed skill | free | high | yes | +| `remotion-best-practices` skill | agent-skill | installed skill | free | high for code-driven motion video | yes | + +Selection: + +- Start with agent-native planning for normal scripts, shot lists, timing maps, and asset plans. +- Include `creative_direction.md` with the one-sentence promise, story arc, emotional curve, audio arc, cut-density curve, visual motif, source roles, and no-flatness guardrails. +- Use `storyboard-creation` only when you need explicit storyboard-panel conventions, camera-angle grammar, continuity checks, or animatic planning. +- Use `remotion-best-practices` only when the storyboard will be implemented as Remotion/React motion-video code. + +### `video.search` — discover candidate internet footage + +Use this before `video.download` when the user asks for found footage, B-roll, public-domain clips, stock footage, YouTube/Bilibili material, or named movie/TV/game/anime moments. Prefer free/open sources first and record source URL, license, creator, and attribution requirement before editing. + +For found-footage deliverables or platform-origin claims, read [`references/source-triage.md`](references/source-triage.md) and classify each candidate as direct platform source, verified platform-origin transport, or weak mirror. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| Web search + source filters | web-search | online search access | free | good | no | + +Search discipline: + +- For reusable/commercial-safe B-roll, start with Wikimedia Commons, YouTube Creative Commons, or other browser-searchable stock/public-domain pages and keep attribution metadata beside every downloaded file. +- For Bilibili/YouTube reference or fan-edit workflows, search specific scene names rather than generic terms. Add quality modifiers such as `1080p`, `4K`, `HD`, `BD`, `蓝光`, or `高清`. +- For Bilibili, use targeted web search such as `site:bilibili.com "Game of Thrones" S3E09 BV`; standalone `/video/BV...` uploads are usually easier to process than geo-restricted bangumi URLs. +- Do not treat downloadability as permission. If license or user authorization is unclear, ask before using the footage in a deliverable. + +### `video.download` — download/import web video into the workspace + +Use this after `video.search` identifies candidate URLs, or directly when the user gives URLs. Keep all raw downloads in one `sources/` directory, save `sources.json` with URL/license/creator/provenance, and normalize filenames before downstream editing. + +For internet footage, `sources.json` must record the platform URL, transport URL if different, command, cookie-file path if used, local file, probe summary, selected ranges, source role, license/rights notes, and quality caveats. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `yt-dlp` | public-cli | `yt-dlp` + `ffmpeg` | free | high | no | +| `you-get` | public-cli | `you-get` bin | free | good | no | +| `lux` | public-cli | `lux` + `ffmpeg` | free | good | no | +| `BBDown` | public-cli | `BBDown` + `ffmpeg` | free | high for Bilibili | no | + +Operational notes: + +- Prefer `yt-dlp -f "bestvideo[height>=1080]+bestaudio/best" --merge-output-format mp4` for YouTube/Bilibili URLs when quality matters. Use cookies only when the user has authorized access to the content. +- For Bilibili audio-only extraction, avoid `yt-dlp -x --audio-format mp3`; download the raw m4a and convert with `ffmpeg`, then verify volume before using it. +- Use `BBDown` when Bilibili-specific metadata, subtitles, danmaku, playlists, or high-quality member streams are central to the task. + +### `music.search` — discover existing songs, BGM, or clean audio sources + +Use this before `music.download` when the user wants an existing song, soundtrack cue, royalty-free track, platform audio, or a specific cover/version. Keep the search about the requested music, not just any available audio. Record title, artist, platform URL, uploader, source type, version notes, rights/licensing, and attribution in `music_sources.json`. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| Web search + source filters | web-search | online search access | free | good | no | +| `yt-dlp` search extractors | public-cli | `yt-dlp` | free | good | no | +| `spotdl` metadata/search | public-cli | `spotdl` | free | good for Spotify-linked songs | no | + +Search discipline: + +- For clean audio, look for the artist's official upload, label upload, official audio/MV, verified lyric video, or a standalone audio upload with no extra descriptors. +- If the user asks for a specific version such as `女生版`, duet, piano, acoustic, live, DJ, karaoke, or instrumental, verify the title/uploader/metadata names that exact version before committing. +- Reject candidate titles that imply the wrong source: `remix`, `cover`, `fan edit`, `AMV`, `MAD`, `mashup`, compilation, trailer mix, or unrelated soundtrack/OST unless the user explicitly asked for that variant. +- For tie-in songs and promo tracks, assume dialogue/voiceover/SFX bleed is possible. Search alternatives with `{artist} {song} 纯音乐`, `{song} 无对白`, `{song} 歌词版`, `official audio`, or `lyric video` before asking the user to accept a risky source. +- Do not treat downloadability as permission. If rights, license, or user authorization is unclear, ask before using the music in a deliverable. + +### `music.download` — download/import existing music into the workspace + +Use this after `music.search` identifies a candidate or directly when the user supplies a music URL/local file. Keep raw downloads in `sources/music/`, save `music_sources.json`, and create a normalized working file for editing. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `yt-dlp` audio download | public-cli | `yt-dlp` + `ffmpeg` | free | high | no | +| `spotdl` | public-cli | `spotdl` + `ffmpeg` | free | good for Spotify-linked metadata | no | +| `scdl` | public-cli | `scdl` | free | good for SoundCloud | no | +| `bandcamp-dl` | public-cli | `bandcamp-dl` | free | good for Bandcamp | no | +| local file import + `ffmpeg` | native | `ffmpeg` | free | high | yes | + +Operational notes: + +- Use only music the user is authorized to use or that is clearly licensed for the deliverable. Do not bypass DRM, paywalls, or access controls. +- For general audio URLs, download source audio first, then convert explicitly: + +```bash +yt-dlp -f "bestaudio[ext=m4a]/bestaudio" -o "sources/music/audio_raw.%(ext)s" "URL" +ffmpeg -i sources/music/audio_raw.m4a -vn -c:a libmp3lame -q:a 0 sources/music/audio.mp3 +``` + +- For Bilibili audio-only downloads, do **not** use `yt-dlp -x --audio-format mp3`; it can produce a valid-looking but nearly silent MP3. Download the raw m4a, convert with `ffmpeg`, then verify volume: + +```bash +yt-dlp -f 30280 -o "sources/music/audio_raw.%(ext)s" "BILIBILI_URL" +ffmpeg -i sources/music/audio_raw.m4a -c:a libmp3lame -q:a 0 sources/music/audio.mp3 +ffmpeg -i sources/music/audio.mp3 -af volumedetect -f null - 2>&1 | grep mean_volume +``` + +Reject files with mean volume below roughly `-40dB` unless silence is expected. + +- Sample-listen at least three points before editing, especially for promo/OST/tie-in songs: + +```bash +ffplay -ss 30 -t 5 -autoexit sources/music/audio.mp3 +ffplay -ss 90 -t 5 -autoexit sources/music/audio.mp3 +ffplay -ss 150 -t 5 -autoexit sources/music/audio.mp3 +``` + +- Normalize or loudness-match only after verifying the source is the right song/version and has no dialogue/SFX bleed. + +### `visual.capture` — record screen / webcam / window + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-openscreen` | harness-cli | harness installed | free | high | yes | +| `cli-anything-obs-studio` | harness-cli | OBS installed | free | high | yes | +| `ffmpeg -f x11grab` / `avfoundation` | native | `ffmpeg` | free | high | yes | +| `screencapture` | native | macOS | free | high | yes | +| `mss` / `pyautogui` + `cv2` | python | pkgs | free | good | yes | + +### `visual.generate` — produce a video clip from prompt/reference + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `generate-veo-video` | public-cli | `generate-veo` bin + Google creds | metered | high | no | +| `jimeng` | public-cli | `dreamina` bin + Dreamina login | metered | high | no | +| Runway Gen-4 | api | `RUNWAY_API_KEY` | paid | sota | no | +| Kling | api | `KLING_API_KEY` | paid | high | no | +| Pika | api | `PIKA_API_KEY` | paid | good | no | +| Seedance | api | `SEEDANCE_API_KEY` | paid | sota | no | + +### `audio.capture` — record and clean audio tracks + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-audacity` | harness-cli | Audacity installed | free | high | yes | +| `sox` / `ffmpeg` | native | binary | free | high | yes | +| `pydub` / `soundfile` / `librosa` / `noisereduce` | python | pkgs | free | good | yes | + +### `audio.synthesize` — text-to-speech / voice + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `minimax-cli` | public-cli | bin + MiniMax key | metered | high | no | +| `elevenlabs` | public-cli | bin + `ELEVENLABS_API_KEY` | paid | sota | no | +| OpenAI TTS | api | `OPENAI_API_KEY` | metered | high | no | +| Google Cloud TTS | api | `GOOGLE_CLOUD_PROJECT` | metered | high | no | +| `edge-tts` | python | pkg | free | good | no | + +### `music.generate` — generated music / BGM + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `suno` | public-cli | bin + Suno account | metered | sota | no | +| `minimax-cli` | public-cli | bin + MiniMax key | metered | high | no | +| Udio | api | `UDIO_API_KEY` | paid | sota | no | + +Use `music.search` + `music.download` instead when the user asks for an existing song, official upload, platform audio, soundtrack cue, royalty-free track, or a specific cover/version. + +Music and SFX must follow the story/audio arc in `creative_direction.md`. For polished 60+ second videos, choose a real main music strategy first: either AI-generated music from a music provider, downloaded relevant/authorized music via `music.search` + `music.download`, or strong source ambience when the genre is documentary/ambient. Avoid one flat loop from start to finish; plan section changes such as intro, buildup, drop, dip, final lift, source-audio reveal, or final resolve. + +### `sound.design` — hits, risers, score sections, mix dynamics, and final audio arc + +Use this when a video needs trailer hits, whooshes, risers, drones, heartbeat gaps, sub drops, crowd/source accents, or locally generated score elements. Do not hide sound design inside `music.generate`; a music bed is not a designed mix. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| Agent-native sound plan | agent-native | `references/sound-design.md` | free | high | yes | +| `ffmpeg` / `sox` procedural stems | native | binary | free | good | yes | +| `pydub` / `numpy` procedural stems | python | pkgs | free | good | yes | +| Generated music provider | public-cli/api | chosen `music.generate` provider | metered/paid | high | no | +| Downloaded/authorized music | public-cli/skill | `music.search` + `music.download` or `music-downloader` skill | varies | high when relevant | no | + +Deliverables for polished edits: `sound_design.md` with stems, cue times, story function, ducking notes, and section loudness targets; separate WAV stems when generated locally; and per-section loudness checks. The ending/final act should have intentional audio shape such as escalation, silence/hold, hit/drop, source-audio reveal, or resolve when the genre calls for it. Read [`references/sound-design.md`](references/sound-design.md) for sports, commentary, trailer, and final-act patterns. + +Source-audio gate: before mixing downloaded/captured clip audio with new music or narration, classify every used range as `silent_or_mute`, `ambience_keep`, `dialogue_keep`, `music_only`, `mixed_music_speech`, or `needs_separation`. Keep one foreground voice and one intentional music bed at a time. If source speech/music overlaps new narration/music, mute, duck, make the source foreground, run separation, or reject the range; do not hide the conflict behind "source texture." + +Procedural-audio gate: locally generated audio is acceptable for short SFX, UI ticks, impacts, pulses, and risers, but it must not become the default main music bed for polished 60+ second videos. Prefer AI-generated music or downloaded relevant/authorized music for the main bed. Locally generated noise, risers, and whooshes must be filtered, enveloped, gain-staged, and sample-reviewed. Do not use raw Gaussian/full-band noise as a music bed or repeated transition effect; hiss/sizzle in the promoted final is a critical issue even if `silencedetect` looks normal. + +### `media.analyze` — segment, label, OCR, and search footage + +Use this after download/capture and before edit planning when there are many clips or when the edit depends on finding specific shots. Output should be a scene library with time ranges, keyframes, visible text, people/objects/actions where available, usability notes, and searchable tags. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| PySceneDetect `scenedetect` | public-cli | `scenedetect` + `ffmpeg` | free | high | yes | +| Google Cloud Video Intelligence | api | GCP creds | metered | sota | no | +| TwelveLabs video search/index | api | `TWELVELABS_API_KEY` + `twelvelabs` pkg | metered | sota | no | +| PaddleOCR on sampled keyframes | public-cli/python | `paddleocr` pkg or bin | free | good overall; high for visible text | yes | + +Default path: + +- Use PySceneDetect for general local cut detection and keyframe extraction. +- Use PaddleOCR only on sampled keyframes or subtitle regions; it complements scene detection rather than replacing it. +- Use Google Video Intelligence or TwelveLabs when you need high-quality object/person/action/text labels or semantic search over a larger footage library and the user accepts cloud processing/cost. + +Found-footage gate: + +- Build `scene_library.json` before cutting, with source file, start/end, resolution, visual description, shot role, motion level, faces/action/objects, source text/watermarks/subtitles, quality notes, and rejection reason when skipped. +- Create contact sheets for each source and selected ranges. Scan for countdown cards, credits, hardcoded captions, watermarks, large ranking numbers, and source title cards. +- Do not use a source just because it downloaded. Reject or crop/mask ranges that are low-resolution, static, duplicated, credit-card-heavy, subtitle-dominated, or off-theme. Use [`references/source-triage.md`](references/source-triage.md) for the rejection checklist. + +### `text.transcribe` — speech → text / subtitles + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-videocaptioner` | harness-cli | harness installed | free | high | yes | +| `openai-whisper` | python | pkg + model download | free | high | yes | +| `stable-ts` / `faster-whisper` | python | pkg + model download | free | high | yes | +| AssemblyAI | api | `ASSEMBLYAI_API_KEY` | paid | sota | no | +| Deepgram | api | `DEEPGRAM_API_KEY` | paid | sota | no | +| Google Speech-to-Text | api | GCP creds | metered | high | no | + +Local ASR notes: + +- Preflight package checks do not prove Whisper model weights are already cached or that runtime will fit the task. +- Prefer small/base CPU models for quick local drafts; use larger models only when the user accepts the time/resource tradeoff or the machine is known to handle it. +- Use paid/cloud ASR when long recordings, diarization, timestamps, or quality requirements make local model setup a poor fit. +- Do not use Whisper `.en` models unless the user explicitly says the audio is English; `.en` models translate non-English speech into English. + +### `text.caption` — design, time, render, and investigate visible captions + +Use this after `text.transcribe` or agent-written script/narration timing, and before `composite.overlay` / `package.encode`. This is for viewer-facing subtitles, creator captions, trailer title hits, lyrics/karaoke, lower-third text, and other timed text that must look high-end. It is not just "burn SRT at the end." + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| Captions reference module | agent-native | `references/captions.md` | free | high | yes | +| ASS + `ffmpeg subtitles` | native | `ffmpeg` + fonts | free | high | yes | +| HyperFrames captions workflow | agent-skill | `npx skills add heygen-com/hyperframes --skill hyperframes`; Node.js + `ffmpeg` | free | high for kinetic/digital captions | yes | +| `pysubs2` ASS authoring | python | `pysubs2` pkg + `ffmpeg` | free | high | yes | +| MoviePy/Pillow transparent overlays | python | `moviepy` + `PIL` | free | good when custom layout is required | yes | + +Caption discipline: + +- Read [`references/captions.md`](references/captions.md) for any polished caption/subtitle/lyric/lower-third work. It defines the lifecycle, genre presets, typography, safe-zone rules, and caption doctor review. +- Keep transcription (`text.transcribe`) separate from caption design (`text.caption`). A raw SRT is source material, not a finished caption package. +- Keep caption roles separate: narration subtitle, story-beat label, chapter/title card, source translation, and lyric/karaoke. Do not turn every story beat into a narration subtitle. +- Produce `captions.source.json`, `captions_style.md`, `captions.ass` or equivalent render source, caption-heavy preview frames/contact sheet, and `captions_qc.md` for non-trivial videos. +- Prefer ASS + `ffmpeg` for deterministic subtitle burn-in on edited footage. Use the installed HyperFrames skill when captions are part of a mandatory HyperFrames workflow: digital product/site/app launch videos, UI-heavy presentations, HTML/CSS/GSAP motion compositions, karaoke, or audio-reactive typography. +- If captions are generated after an NLE master, burn them onto the exact final master and regenerate review frames from the exact promoted final path. +- Ban persistent debug-like widgets unless explicitly part of the design, such as `MISSION SUBTITLE`, `MISSION LOG`, or fixed label tags that appear on every caption. +- For voice captions, sync to voice/audio timing, not shot timing. If a narration track exists, derive subtitles from the narration transcript or ASR/forced alignment; manually timed summary captions are title cards, not voice subtitles. Verify narration duration versus voice-caption coverage. +- Honor the output-language rule across narration, subtitles, title cards, callouts, UI labels created by the agent, and CTA text. Source-language subtitles may be translated or avoided, but do not let source or instruction language leak into agent-authored viewer-facing text. +- For ASS burn-in, set `PlayResX`/`PlayResY` to the actual final render resolution or prove the scaling keeps text readable. A 1080p ASS design burned into 720p without resizing is a failure. +- If source footage has hardcoded captions, broadcast tickers, watermarks, or lower thirds, choose a safe zone, crop/mask, or replace the range. Do not stack authored subtitles over source subtitles. +- Fail the render if captions are clipped, stale, off-sync, too small, low-contrast, covering faces/action/source subtitles, missing CJK glyphs, visually mismatched to the genre, or if the video only makes sense by reading labels. + +### `composite.assemble` — timeline, cuts, transitions, export + +For non-trivial videos, do not assemble a final timeline until `script.storyboard` has produced `creative_direction.md`. Timeline order should follow the story/audio arc and source roles from [`references/story-structure-audio.md`](references/story-structure-audio.md), not source-file order or arbitrary clip variety. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-kdenlive` | harness-cli | Kdenlive installed | free | high | yes | +| `cli-anything-shotcut` | harness-cli | Shotcut installed | free | high | yes | +| `moviepy` | python | pkg + `ffmpeg` | free | good | yes | +| `ffmpeg-python` | python | pkg + `ffmpeg` | free | high | yes | +| `ffmpeg concat/filter_complex` | native | `ffmpeg` | free | high | yes | +| HyperFrames skill/CLI | agent-skill | installed `hyperframes` skill; Node.js + `ffmpeg` | free | high for digital/UI launch videos | yes | + +HyperFrames hard gate: for digital product launches, website/product-page videos, SaaS/app demos, UI-heavy product presentations, animated feature reels, HTML/CSS/GSAP-driven motion videos, and product videos where interface motion is the main storytelling surface, load and use the installed `hyperframes` skill as the primary `composite.assemble` authoring workflow. A "HyperFrames-style" MoviePy/Pillow/ffmpeg/browser imitation is not an acceptable substitute when this gate applies. + +When the HyperFrames gate applies: + +- Read the installed HyperFrames skill before implementation and follow its workflow. +- Produce a real HyperFrames project/render artifact, not only a static HTML reference. +- Prove the video topic and value proposition in the first 5-8 seconds, then follow a product arc: user problem, real UI action, feature reveal, proof, outcome, and CTA/payoff. +- Use `ffmpeg`, MoviePy, or NLE tools only for source preparation, final muxing, caption burn-in when explicitly needed, or doctor investigation; they cannot replace HyperFrames as the main authoring surface. +- If HyperFrames cannot run, stop and report the blocker or ask the user to change the requirement. Do not silently fall back. + +Use `remotion-best-practices` instead only when the user explicitly requires Remotion/React as the implementation surface. + +For Shotcut/Kdenlive harness workflows, use the provider-boundary pattern in [`references/nle-shotcut-kdenlive.md`](references/nle-shotcut-kdenlive.md): `ffmpeg` for stable mezzanine clips, crops, speed ramps, and text-card segments when needed; the NLE harness for project/timeline/tracks/transitions/render; then `ffmpeg` for ASS burn-in if captions are post-NLE, fps/SAR/DAR/profile/color normalization, final muxing, and doctor investigation. + +Assembly discipline: + +- Every selected shot needs a job: setup, reveal, escalation, contrast, proof, impact, or payoff. +- For most edited pieces, introduce meaningful changes in tension, density, audio texture, title system, setting, character focus, or edit pattern often enough that the structure does not feel accidental or flat. Slow/ambient/real-time genres may justify longer holds. +- Chapter cards and title hits must advance the story beat; they cannot be decorative separators for random clips. +- Transitions and motion graphics need a story job. Repeated wipes, random kinetic text, and generic animations are failures when the viewer cannot tell the topic or why the transition happened. +- Do not assume a clean mezzanine segment contains captions, chapter cards, or later overlay stages. Choose and document one caption stage before render. +- For HyperFrames-gated genres, fail the assembly if the final render was produced by an imitation workflow instead of the installed HyperFrames skill. +- For data, UI, charts, maps, captions, and explainers, keep the frame stable unless camera motion has a documented visual purpose. Global sinusoidal crop drift, jitter, shake, or fake handheld motion over readable data/text is a critical issue requiring revision or a documented reason. +- After NLE render, check actual duration versus intended timeline duration, stream fps versus project fps, SAR/DAR, partial MP4 or missing moov, video coverage through the audio tail, and displayed content speed versus audio. +- If a render is technically valid but feels like flat montage, revise `creative_direction.md` and rebuild the timeline instead of adding more filters. + +### `composite.overlay` — composite captions, watermark, picture-in-picture + +Use this to apply already-designed caption/subtitle assets, watermarks, or picture-in-picture layers. For user-visible subtitles/captions, run `text.caption` first so the overlay step receives a deliberate ASS/HTML/PNG/NLE caption package instead of an ugly default SRT. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `ffmpeg -vf subtitles=...` | native | `ffmpeg` | free | high | yes | +| `moviepy` (CompositeVideoClip) | python | pkg | free | good | yes | + +### `package.thumbnail` — thumbnail / social card + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `cli-anything-gimp` / `krita` / `inkscape` | harness-cli | installed | free | high | yes | +| `Pillow` | python | pkg | free | good | yes | +| `cairosvg` / `html2image` | python | pkg | free | good | yes | +| OpenAI GPT-Image-1 | api | `OPENAI_API_KEY` | metered | sota | no | +| Google Nano Banana | api | GCP creds | metered | high | no | +| Ideogram | api | `IDEOGRAM_API_KEY` | metered | high | no | +| Stability AI | api | `STABILITY_API_KEY` | metered | high | no | +| `ffmpeg -ss ... -frames:v 1` | native | `ffmpeg` | free | basic | yes | +| `convert` / `magick` | native | ImageMagick | free | good | yes | + +### `package.encode` — final mux, codec, container + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `ffmpeg` | native | binary | free | sota | yes | + +### `quality.review` — technical and editorial investigation before delivery + +Run this after every render and before presenting a final file. The goal is not a binary test suite; it is an evidence-gathering pass that helps the agent inspect the exact file, understand suspicious signals, and make a contextual editorial decision. + +| Provider | Kind | Requires | Cost | Quality | Offline | +|---|---|---|---|---|---| +| `ffmpeg` / `ffprobe` investigation filters | native | `ffmpeg` + `ffprobe` | free | high | yes | +| MediaInfo CLI | native | `mediainfo` | free | high | yes | +| `ffmpeg-quality-metrics` / VMAF | public-cli | `ffmpeg-quality-metrics` + `ffmpeg` | free | high with reference | yes | +| Video doctor helper | bundled-script | `scripts/video_doctor.py` + `ffmpeg`/`ffprobe` | free | high for investigation | yes | + +Investigation checks: + +- Use `scripts/video_doctor.py` to produce probe, caption, source, frame, tail, audio, and lint reports. Doctor reports are evidence and investigation signals, not pass/fail verdicts. +- Compare final duration against the user brief and `creative_direction.md`; if it differs, inspect whether the plan or final render is wrong before promotion. +- Verify the final viewer-facing language against the output-language rule: narration, authored captions, title cards, charts, labels, and CTAs must use the requested language, or the conversation language when no language was specified. +- FFmpeg filters: `blackdetect`, `silencedetect`, `freezedetect`, `cropdetect`, `ebur128` / `loudnorm`. +- Extract first/mid/last frames, 2s contact sheets, subtitle-heavy frames, and final-10s tail sheets; inspect visually or via OCR for text overflow, watermarks, black frames, wrong crops, frozen tails, wrong-speed tails, and audio-only tails. +- For narration videos, run the caption doctor against the narration audio and media dimensions; inspect signals about caption role, voice alignment, planned language coverage, captions extending beyond narration, mismatched ASS PlayRes, small effective font size, generic styling, or source-subtitle collision risk. +- For generated or heavily processed audio, run `scripts/video_doctor.py audio` with `--scan-path` on the task directory, listen to exported snippets, and review high-frequency/low-frequency/loudness signals. `silencedetect` and `volumedetect` passing does not mean the mix is listenable. +- When source audio is used, also pass `--sources-manifest`, `--music-manifest`, and `--sound-design` to the audio doctor so it can flag missing audio roles and export snippets around source-audio/new-music/new-narration overlap windows. +- For data/UI/chart sections, inspect short frame strips around animation segments for unintended global shake or drifting crops. +- Use VMAF/SSIM/PSNR only when a reference video exists; they are not general no-reference quality checks. +- For non-trivial videos, run the story/audio review rubric in [`references/story-structure-audio.md`](references/story-structure-audio.md): premise clarity, escalation, turning point, high point, source roles, music sections, audio events, and final payoff. +- For polished videos, run the art-direction gates in [`references/art-direction-review.md`](references/art-direction-review.md): aesthetics, captions-muted readability, final-act payoff/hook/resolve, justified card time, sparse labels, synchronized audio hits, and intentional shot repetition. +- For HyperFrames or other linter-based motion workflows, run `scripts/video_doctor.py lint` on lint logs and record whether warnings were fixed, accepted with reason, or left as blockers. +- For every promoted final, follow [`references/render-doctor.md`](references/render-doctor.md): hash the candidate/default, regenerate frames from the promoted path, update reports, and scan reports for stale paths or superseded source names. + +The bundled helper is `scripts/video_doctor.py`. It has subcommands for `probe`, `captions`, `sources`, `frames`, `tail`, `audio`, and `lint`; agents must read the generated signals and then inspect the referenced evidence. + +### `publish.upload` — deliver to a platform + +Currently a **known gap** (see below). Agents should surface this to the user. + +--- + +## Recipes + +Recipes declare *which capabilities a workflow needs* — not the order. Choose providers per capability from the task constraints and preflight facts. + +- **`ai-short`** — fully-generative social short. + Uses: `script.storyboard`, `visual.generate`, `audio.synthesize`, `music.generate` or `music.search` / `music.download`, `sound.design` for trailer/social impact edits, `text.caption` when captions/title hits are part of the brief, `composite.assemble`, `composite.overlay`, `package.thumbnail`, `quality.review`, `package.encode`. + +- **`screencast-tutorial`** — walkthrough with narration + subs. + Uses: `script.storyboard`, `visual.capture`, `audio.capture`, `text.transcribe`, `text.caption`, `composite.overlay`, `package.thumbnail`, `quality.review`, `package.encode`. + +- **`talking-head-explainer`** — webcam + b-roll + captions. + Uses: `script.storyboard`, `visual.capture`, `video.search` / `video.download` or `visual.generate` (b-roll), `music.search` / `music.download` or `music.generate`, `sound.design` when chapter changes or emphasis hits matter, `media.analyze`, `audio.capture`, `text.transcribe`, `text.caption`, `composite.assemble`, `composite.overlay`, `package.thumbnail`, `quality.review`, `package.encode`. + +- **`podcast-to-video`** — audio-first, visualize + caption. + Uses: `script.storyboard`, `audio.capture`, `text.transcribe`, `text.caption`, `package.thumbnail`, `composite.overlay`, `composite.assemble`, `quality.review`, `package.encode`. + +- **`found-footage-montage`** — source internet clips, curate, then edit. + Uses: `script.storyboard`, `video.search`, `video.download`, `media.analyze`, `sound.design`, `text.transcribe` (optional), `text.caption` when captions/title hits are part of the edit, `composite.assemble`, `composite.overlay`, `package.thumbnail`, `quality.review`, `package.encode`. + +- **`existing-song-music-video`** — build an edit around a user-specified or discovered song. + Uses: `music.search`, `music.download`, `script.storyboard`, `video.search`, `video.download`, `media.analyze`, `sound.design` for hits/source accents that do not fight the song, `text.caption` (optional lyrics/karaoke), `composite.assemble`, `composite.overlay`, `package.thumbnail`, `quality.review`, `package.encode`. + +- **`digital-product-launch`** — product/site-driven launch video with animated UI, typography, and motion graphics. + Uses: `script.storyboard`, `visual.capture`, `audio.synthesize` (optional), `music.generate` or `music.search` / `music.download`, `sound.design` for launches with cue hits or section shifts, `text.caption` for brand-safe kinetic captions/title hits, `composite.assemble` via mandatory installed HyperFrames skill, `composite.overlay`, `package.thumbnail`, `package.encode`, `quality.review`. + +--- + +## Known gaps + +- **`publish.upload`** — no first-party or public CLI for YouTube/TikTok/Bilibili/Instagram yet. *Workaround:* instruct the user to upload manually via the web UI, or escalate to a custom script using each platform's v3 API with an OAuth token the user supplies. +- **`visual.generate` — top-tier "cinematic"** — available only via paid APIs (Runway, Kling, Seedance); local GPU/weights deployment is intentionally not part of this matrix. +- **`rights.provenance`** — no automated license/TOS/provenance verifier. *Workaround:* save `sources.json` / `music_sources.json` with URL, creator, license, intended use, attribution text, transport evidence, and quality caveats; ask the user before using unclear or restricted media. Use [`references/source-triage.md`](references/source-triage.md) for evidence levels. +- **`agent-skill.preflight`** — external agent skills may appear in this matrix before they are installed locally. *Workaround:* use the source/install table below and load the external `SKILL.md` only when that workflow is actually needed. + +--- + +## Reference modules, external skills, and tool sources + +Consult these only when the task needs the focused workflow; keep this matrix as the router and load the reference, external skill, or tool only if its workflow matches. + +| Reference/tool | Use when | Source or install | +|---|---|---| +| `advanced-video-downloader` | User supplies YouTube/Bilibili/TikTok/etc. URLs and needs download, playlist handling, music/audio extraction, cookies, or transcription | `npx skills add https://github.com/jst-well-dan/skill-box --skill advanced-video-downloader` | +| `music-downloader` | Need to find/download real music, BGM, soundtrack cues, platform audio, or authorized music rather than faking a full music bed procedurally | `npx skills add https://github.com/nymbo/skills --skill music-downloader` | +| `references/story-structure-audio.md` | Non-trivial video needs a global arc, internal logic, source roles, story ups/downs, music/audio sections, or flat-montage prevention | Local reference module: [`references/story-structure-audio.md`](references/story-structure-audio.md) | +| `references/captions.md` | Any polished visible captions, subtitles, lyrics, karaoke, lower thirds, trailer title hits, or caption investigation | Local reference module: [`references/captions.md`](references/captions.md) | +| `references/source-triage.md` | Found-footage source selection, platform-origin evidence, rights/provenance fields, source rejection, and contact-sheet requirements | Local reference module: [`references/source-triage.md`](references/source-triage.md) | +| `references/nle-shotcut-kdenlive.md` | Shotcut/Kdenlive timeline work, provider boundaries, mezzanine conventions, render resilience, and known NLE failure modes | Local reference module: [`references/nle-shotcut-kdenlive.md`](references/nle-shotcut-kdenlive.md) | +| `references/sound-design.md` | Trailer hits, risers, sports accents, commentary mix changes, final-act audio shape, and section loudness review | Local reference module: [`references/sound-design.md`](references/sound-design.md) | +| `references/art-direction-review.md` | Genre-specific naive-output traps, contact-sheet review, captions-muted review, and art gates before promotion | Local reference module: [`references/art-direction-review.md`](references/art-direction-review.md) | +| `references/render-doctor.md` | Final-path doctor workflow, probe/frame/tail/caption/source investigation, promotion discipline, and stale-report checks | Local reference module: [`references/render-doctor.md`](references/render-doctor.md) | +| `scripts/video_doctor.py` | Non-binary investigation helper for media facts, review frames, tail signals, sources, captions, audio listenability signals, and procedural-audio evidence | Local script: [`scripts/video_doctor.py`](scripts/video_doctor.py) | +| `storyboard-creation` | Need shot grammar, camera angles, storyboard panels, continuity, or animatic planning | `pnpm dlx add-skill https://github.com/inference-sh/skills/tree/HEAD/guides/video/storyboard-creation` | +| `remotion-best-practices` | Need to implement the storyboard as Remotion code | `npx skills add https://github.com/remotion-dev/skills --skill remotion-best-practices`; SKILL.md: | +| `hyperframes` | Mandatory for website/product-page videos, digital product launches, SaaS/app demos, UI-heavy product presentations, animated feature reels, and whole-video HTML/CSS/GSAP motion compositions | Installed local skill. If missing in a future environment, install it with `npx skills add heygen-com/hyperframes --skill hyperframes`; do not substitute another renderer when this skill is mandatory. Captions reference: | +| `ffmpeg-quality-metrics` | Need VMAF/SSIM/PSNR against a reference video | `pipx install ffmpeg-quality-metrics`; source: | + +--- + +## Agent guidance + +- **Run the preflight block once**, then consult the cached result when picking providers. +- **For non-trivial videos, start with `creative_direction.md`.** Use the story/audio reference module to define the whole arc before acquisition and assembly: premise, emotional curve, audio arc, cut-density curve, source roles, turning point, climax, and payoff. +- **Language rule:** if the user specifies a video language, use it. Otherwise use the conversation language for all agent-authored narration, captions, titles, labels, and CTAs; source-language text must be translated, avoided, or explained when it matters. +- **For internet footage, run `video.search` before `video.download`** unless the user already supplied URLs. Keep provenance metadata with every source file. +- **For found-footage edits, build `scene_library.json` before cutting.** Use source triage and contact sheets to reject weak, static, card-heavy, subtitle-dominated, or off-theme footage. +- **For existing music, run `music.search` before `music.download`** unless the user already supplied a URL/local file. Verify the song version and listen for dialogue/SFX bleed before beat analysis or editing. +- **For polished music beds, prefer real music.** Use AI-generated music or downloaded relevant/authorized music as the main bed unless the brief calls for ambient/source-audio-led work. Procedural audio is for SFX/accent stems, not a cheap replacement for a full soundtrack. +- **For high-energy edits, run `sound.design` as a separate pass.** A music bed alone is not a designed mix; create cue times, stems, ducking notes, and section loudness targets. +- **For visible captions, use `text.caption` as its own design pass.** Do not hand off raw SRT to `composite.overlay` and call it done. Use the captions reference module for genre style, grouping, safe placement, font/glyph checks, hard exits, and caption-heavy preview frames. +- **For narrated work, captions must follow the voice.** Generate or align subtitles from the final narration/audio, then run `scripts/video_doctor.py captions` against the exact final dimensions and narration file. Read the doctor signals and inspect frames/audio before deciding what to revise. Summary captions belong in a separate title/callout style. +- **For subtitle/caption localization, translate directly as agent work** unless the user explicitly requires a specific translation service, offline translation, or human/legal review. Preserve timestamps, maintain names/terms consistently, and verify CJK glyph rendering in overlays. +- **For non-trivial work, do `script.storyboard` before acquisition/generation**, usually as agent-native planning. Load an external storyboard skill only when its focused format is worth the extra install/context. Do not replace the global story/audio plan with a list of clips. +- **For digital product/site/app launch videos, use HyperFrames under `composite.assemble`.** When the genre is a UI-heavy product launch, SaaS/app demo, product presentation, animated feature reel, or whole-video HTML/CSS/GSAP composition, HyperFrames is mandatory. Do not use MoviePy/Pillow/ffmpeg, a static HTML mock, or a "HyperFrames-style" imitation as the primary authoring surface. +- **Using the right tool is not enough.** The final must still satisfy brief compliance, topic clarity, duration, story arc, caption sync, audio listenability, and stable-motion review. +- **For final audio, listen and run the audio doctor.** Use `scripts/video_doctor.py audio --scan-path ` for polished edits, then inspect the snippets. A technically valid track can still fail if it sounds like hiss, test tones, synthetic pulse wallpaper, or repeated boom hits. +- **Use Remotion guidance only for code-driven motion graphics.** Do not install it for ordinary cuts, overlays, or NLE work. +- **Choose Shotcut, Kdenlive, MoviePy, or raw `ffmpeg` by edit complexity and availability.** NLE harnesses fit timeline-heavy edits; MoviePy/ffmpeg fit deterministic programmatic cuts and overlays. +- **When using Shotcut/Kdenlive, keep provider boundaries explicit.** Preprocess stable mezzanine/source clips with `ffmpeg`, author the timeline in the NLE, then use `ffmpeg` for post-NLE captions, muxing, normalization, and doctor evidence as needed. +- **Prefer `--json`** for harness CLI output when chaining tools. +- **Promote only verified final paths.** Probe the exact file being delivered, regenerate review frames from it, hash it, and make reports refer to that same path. +- **Doctor helpers are not verdicts.** A clean doctor report can still hide an editorial failure, and a strong doctor signal can be acceptable with context. Read the evidence and document the judgment. +- **Independent review must seek defects.** A review that only repeats ffprobe/black/freeze/silence checks is incomplete; it must cite visual/caption/audio/story risks from exact-final evidence before giving a verdict. +- **Escalate explicitly.** When a paid API would materially improve quality, use the suggest-to-user template. Do not silently burn credits. +- **Recipes ≠ order.** A recipe says what's needed; pick a sensible order for the specific task. Most videos should transcribe *after* the final cut, not before; screencasts often capture audio + video simultaneously. +- **Workspace discipline.** Keep all intermediate assets under one directory so cross-tool references stay stable. diff --git a/cli-hub-matrix/video-creation/references/art-direction-review.md b/cli-hub-matrix/video-creation/references/art-direction-review.md new file mode 100644 index 000000000..068c763d9 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/art-direction-review.md @@ -0,0 +1,92 @@ +# Art Direction Review + +Use this when a video can be technically valid but still feel naive, flat, over-labeled, or assembled from unrelated clips. This is mandatory for high-quality trailers, sports/music montages, film commentary, found-footage edits, and final deliverables after human critique. + +## Review Assets + +Generate review frames from the exact final path: + +- First 10 seconds contact sheet. +- Every-2-seconds contact sheet. +- Dense final-act sheet. The default final-act sample can be the last 20%, but judge it as an ending/payoff sample, not a fixed rule. +- Caption-heavy sheet. +- First, middle, and last single frames. + +Then review in three passes: + +1. Normal pass: does it feel intentional without reading reports? +2. Captions-muted pass: does story and emotion still read without labels? +3. No-text mental pass: if title cards and labels vanished, would the edit still have visual cause/effect? +4. Brief-compliance pass: does the final duration, topic, tool choice, language, and genre match the user request and `creative_direction.md`? +5. Aesthetic pass (`美学&审美`): do composition, typography, color, motion rhythm, and restraint feel designed for the genre rather than default, random, or template-like? + +## Aesthetic Bar + +- Composition has hierarchy: the eye knows what to read/watch first, second, and third. +- Typography is intentional: font, size, weight, line breaks, and caption plates match the genre. +- Motion has taste: every zoom, shake, wipe, reveal, or bounce has a story or rhythm purpose. +- Color and contrast are controlled; avoid one-note palettes, muddy overlays, and default black-outline subtitles. +- Repetition is either a callback/motif or removed. Similar frames across a contact sheet are a warning, not proof of stability. +- A technically valid render that looks naive, generic, or confusing is a failed creative review. + +## Genre Failure Modes + +Sports hype: + +- Stock-montage feel with no athlete pressure or emotional stakes. +- Odd color grade that makes real footage look artificial. +- Source ranking/countdown cards, platform UI, or low-quality inserts that distract from the sports moment. +- Final moment lacks a signature impact. + +Film commentary: + +- Generic recap instead of an argument or interpretive arc. +- Source order dictates the video rather than the thesis. +- Opening caption/hook is truncated or too late. +- Cards replace narration structure instead of supporting it. + +Sci-fi/game/mecha trailer: + +- Too many title cards, chapter cards, or HUD/debug labels. +- No continuity of protagonist, threat, or objective. +- Reused setup/dialogue shots that feel accidental. +- Final image is not cinematic or memorable. +- Labels explain stakes that the edit never visualizes. + +Digital/product launch: + +- Decorative kinetic text hides the actual product. +- UI states are too cropped, blurry, or fast to inspect. +- Sound hits and visual changes are unsynchronized. +- The product/topic is not clear in the first 5-8 seconds. +- The edit is a sequence of generic animations instead of a product arc: problem, real UI action, feature reveal, proof, outcome, CTA. +- Repeated wipes/transitions happen without story function. + +Data/news/science explainer: + +- Global shake, crop drift, fake handheld motion, or jitter moves charts, maps, labels, or subtitles. +- Chart text is too small to read at final resolution. +- Debug-like labels remain visible, such as renderer/composition labels that are not intended for the viewer. +- Footage, charts, and narration do not explain each other. + +## Art Gates Before Promotion + +Fail or revise if: + +- The final duration is outside the requested range or materially different from the plan without approval. +- The ending/final act is not a climax, payoff, useful recap, hook, or deliberate unresolved ending appropriate to the genre. +- Title cards consume more than roughly 15% of runtime unless the genre explicitly needs more. +- Labels appear so often that the output reads like a storyboard, debug overlay, or pitch deck. +- Repeated shots are not deliberate callbacks. +- Important action is hidden by captions, source subtitles, watermarks, or cards. +- Voice captions do not match the spoken narration. +- Data/UI/chart/caption sections shake or drift without a deliberate, documented reason. +- Audio has no section changes or visible hit synchronization. +- The last frame is black, throwaway, or unrelated to the promised premise. +- The independent review cannot name real weaknesses or residual risks from exact-final frames, captions, audio, or story structure. + +## Response To Review Defects + +When a human or fresh-context reviewer flags a visible defect, reproduce it with screenshots or contact sheets before editing reports. Treat "technically passes but feels naive" as a real failure: revise `creative_direction.md`, scene selection, sound design, or timeline structure instead of adding more labels and effects. + +Independent review stance: review to find defects first. A PASS is acceptable only after listing the strongest risks checked, the evidence inspected, and why those risks are tolerable for the brief. diff --git a/cli-hub-matrix/video-creation/references/captions.md b/cli-hub-matrix/video-creation/references/captions.md new file mode 100644 index 000000000..5c0d439d7 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/captions.md @@ -0,0 +1,206 @@ +# Video Captions + +Use this reference when captions are part of the viewer-facing video design, not just an accessibility sidecar. In the video-creation matrix, it sits after `text.transcribe` or agent-written script work and before `composite.overlay`, `package.encode`, and `quality.review`. + +Default subtitle output is not acceptable for polished videos. Captions must feel designed for the video genre, synchronized to the edit, readable on the target platform, and visually integrated with the footage. + +## Workflow + +1. **Define the caption job.** Record aspect ratio, delivery platform, language(s), transcript source, whether word timings are available, and caption role: accessibility subtitles, creator captions, film commentary, trailer title hits, lyrics/karaoke, tutorial labels, or product launch typography. +2. **Build a clean timed source.** Produce `captions.source.json` with text, start, end, optional words, speaker, role, and emphasis. Use `.srt` only as an interchange format; keep JSON/ASS as the design source of truth. If a narration track exists, build narration subtitles from the narration audio/transcript or ASR/forced alignment, not from hand-timed story summaries. +3. **Select a style preset.** Pick from the genre presets below, then adapt to the footage palette, subject position, and music/edit energy. Do not use generic white text with a black stroke unless the user explicitly asks for plain subtitles. +4. **Design safe placement.** Extract representative frames for caption-heavy moments, mark face/action/logo/source-subtitle regions, and choose one or two safe zones. Captions must not cover faces, important action, hardcoded subtitles, UI controls, or source watermarks. +5. **Render with the right authoring path.** Use ASS+ffmpeg for deterministic subtitles, the installed HyperFrames skill for kinetic captions inside HyperFrames-gated digital/UI videos, NLE overlay tracks for timeline-heavy edits, or transparent PNG/MoviePy overlays only when custom layout is necessary and no mandatory authoring provider applies. +6. **Investigate before delivery.** Save `captions_doctor.json`, a caption review note, and preview frames/contact sheets. Use the doctor signals to decide what to inspect next; do not treat the helper as a binary verdict. + +## Transcript Rules + +- Never use Whisper `.en` models unless the user explicitly says the audio is English. `.en` models translate non-English audio into English instead of transcribing it. +- Treat prompt language and deliverable language as separate fields. If the user specifies a video language, use it for all authored subtitles, title cards, callouts, and narrator captions. If no output language is specified, use the language the user is using in the conversation. +- If the user gives a script, time captions against the final narration/audio, not against the draft text. +- When using TTS, keep the generated audio and any word/subtitle sidecar. If the TTS provider cannot emit word timings, run ASR/forced alignment on the final narration file before treating the captions as subtitles. +- Summary captions, chapter labels, and data callouts must use separate roles/styles from narration subtitles. They may support the edit, but they do not satisfy a "subtitles must match the voice" requirement. +- If translating/localizing captions, do the translation as agent work unless the user requires a specific service. Preserve timestamps, names, terms, tone, and line breaks; then review glyph rendering. +- Keep one caption group visible at a time for spoken captions. Trailer title hits and lower thirds may coexist only if they occupy distinct zones and do not compete for reading priority. +- Break groups on sentence boundaries, semantic phrases, beat hits, or pauses longer than about 150 ms. + +## Design Recipe + +Use this before writing ASS/HTML styles; do not leave default subtitle styling in a polished final. + +- Pick an intentional font: `Inter`, `Aptos`, `IBM Plex Sans`, `Source Sans 3`, or a brand font for Latin; `Noto Sans CJK` / Source Han Sans for CJK. `Arial`/`DejaVu Sans` are fallback fonts, not a design choice unless documented. +- Use two to four named styles: `Narration`, `Keyword`, `Chapter`, `SourceTranslation`, `DataLabel`. Avoid one anonymous `Caption` style for everything. +- Spoken captions should be readable but not oversized: roughly 38-52 px at 720p, 48-76 px at 1080p, and larger for vertical social. Title hits can be larger. +- Prefer a subtle translucent plate, soft shadow, or tuned glow over a thick default black outline. Match the plate radius/opacity to the genre: quiet documentary plates, sharp product chips, bold social bars, cinematic thin accents. +- Group text by meaning, not by raw subtitle chunks. Aim for 3-8 words per group for kinetic/social/product captions, or one short semantic phrase per line for documentary narration. +- Add hierarchy: one accent color for names, numbers, verbs, or product terms; keep body text neutral. Do not rainbow-highlight every word. +- Place captions where the image has negative space. Recheck first/middle/last caption-heavy frames after burn-in; move, crop, or replace source ranges if captions fight faces, UI, source subtitles, or watermarks. +- Motion should have a job: reveal a keyword, land on a beat, follow a cursor/action, or mark a chapter. Random bounce/typewriter effects usually make captions look cheap. + +ASS static-caption starter patterns: + +```text +Narration: clean font, 44-58px at 1080p equivalent, BorderStyle=3 translucent back plate, Outline=0-1, Shadow=0-2, Alignment=2/8 based on safe zone. +Keyword: same family, bold, one accent color, short duration, placed near the base caption or relevant object. +Chapter: larger display weight, no paragraph text, appears only at real story turns. +``` + +## Deliverables + +Every captioned video should keep these files near the render: + +- `captions.source.json` — timed text source with role/emphasis metadata. +- `captions_style.md` — selected preset, font, palette, placement, animation, and deviations. +- `captions.ass` or equivalent render source; optionally `captions.srt` for accessibility/export. +- `captions_preview_frames/` or `review_frames/` — first/middle/last and caption-dense samples. +- `captions_doctor.json` — output from `scripts/video_doctor.py captions`. +- `captions_review.md` — readability, sync, overflow, safe-zone, font, style review, final resolution, narration duration when relevant, source-subtitle collision notes, and how doctor signals were interpreted. + +## Style Presets + +### Sports Hype / Music Montage + +- **Use for:** dunk reels, football/soccer hype, workout edits, esports frag montages. +- **Text density:** 1-3 words per hit; avoid paragraph subtitles. +- **Typography:** heavy condensed sans, 800-900 weight; CJK needs a bold high-legibility font such as Noto Sans CJK/Source Han Sans. +- **Look:** dark or neutral caption plate, one aggressive accent, subtle glow or shadow, no thick default outline. +- **Motion:** beat-locked slam, scale-pop, wipe, quick flash, or kinetic word replacement. Use fast exits and hard kills. +- **Placement:** lower third for readable captions; center or upper-center only for short title hits between action beats. + +### Film Commentary / Recap + +- **Use for:** movie explanation, anime recap, documentary narration, story analysis. +- **Text density:** 6-14 Chinese characters per line or 28-40 Latin characters per line; max two lines. +- **Typography:** elegant sans or restrained serif for chapter cards; normal captions should stay highly readable. +- **Look:** cinematic neutral/warm palette, soft shadow, thin accent line or small chapter label, no loud bouncing. +- **Motion:** gentle fade/slide for narration; stronger title cards only at chapter boundaries. +- **Placement:** bottom safe zone unless source subtitles or important faces occupy it; use upper safe zone only when necessary. + +### Sci-Fi / Game / Mecha Trailer + +- **Use for:** CG trailers, game launch edits, anime/mecha/sci-fi hype. +- **Text density:** short bilingual title hits, faction labels, mission-style captions, 2-4 word bursts. +- **Typography:** square/tech sans, condensed bold, or clean mono for data-like labels; keep letter spacing at 0 for body captions. +- **Look:** high-contrast white/near-white plus one neon accent; restrained glow, scanline, bracket, or HUD framing. +- **Motion:** glitch, scan reveal, mask wipe, chromatic nudge, or hard cut on impact. Avoid random particles and cheap typewriter spam. +- **Placement:** title hits can use center frame during low-action beats; subtitles stay bottom/side safe and never cover spectacle. + +### Product / Digital Launch + +- **Use for:** website-to-video, app demos, SaaS launches, Remotion/HyperFrames motion pieces. +- **Text density:** short product claims, feature labels, metric callouts, step captions. +- **Typography:** match `design.md` or product brand fonts. If missing, use a modern sans with consistent weights. +- **Look:** brand palette, clean contrast, precise spacing, polished cards/chips only when the product UI style supports them. +- **Motion:** layout-first kinetic type, marker sweeps, reveal masks, scroll/pointer sync, audio-reactive emphasis if it supports the beat. +- **Implementation:** HyperFrames is mandatory when the video is a product/site/app launch, UI-heavy presentation, animated feature reel, or whole-video HTML/CSS/GSAP motion composition. Use the installed `hyperframes` skill and read its captions guidance before authoring kinetic synced text; do not substitute plain HTML capture, MoviePy/Pillow, ffmpeg filters, or NLE-only captions when this gate applies. + +### Tutorial / Explainer + +- **Use for:** screencasts, app walkthroughs, educational clips, code demos. +- **Text density:** clear phrases; prefer fewer captions when screen text already carries meaning. +- **Typography:** clean sans; mono only for code terms. Use exact UI labels and command names. +- **Look:** quiet, high contrast, no decorative motion that distracts from the action. +- **Motion:** quick fade/slide, pointer-aligned callouts, occasional highlight boxes. +- **Placement:** avoid covering cursor targets, terminal prompts, menus, code, or UI labels. + +### Lyrics / Karaoke + +- **Use for:** music videos, lyric edits, singalong, rhythm shorts. +- **Text density:** phrase or word-level timing; lyrics should follow musical phrasing, not sentence grammar. +- **Typography:** genre-matched display font for hooks, readable sans for verses. +- **Look:** one active-word treatment plus one base caption style. Avoid rainbow karaoke unless the user asks for it. +- **Motion:** per-word color fill, underline sweep, scale emphasis, or mask reveal. Keep sync tighter than normal speech captions. +- **Placement:** bottom or center-lower; make sure fast cuts do not leave stale lyrics from the previous phrase. + +### Vertical Social / Talking Head + +- **Use for:** shorts, Reels, TikTok-style hooks, selfie explainers. +- **Text density:** 2-5 words per group; emphasize hooks, numbers, names, and claims. +- **Typography:** bold rounded or bold grotesk, large enough for phone viewing. +- **Look:** strong foreground/background contrast, restrained pill/plate, one accent color for keywords. +- **Motion:** pop/slide per group, occasional keyword bounce; avoid constant elastic motion. +- **Placement:** lower-middle or mid-lower safe zone, but never over mouth/face. Leave platform UI margins. + +## Typography And Layout + +- Size for final pixels, not editor preview. For 16:9 1080p, spoken captions usually land around 48-76 px; hype/title hits can be 80-130 px. For vertical 1080x1920, spoken groups usually need 64-104 px. +- For 720p landscape, spoken captions below about 36 px are usually too small unless the delivery context is large-screen only and review frames prove readability. +- Set ASS `PlayResX` and `PlayResY` to the actual final render resolution. If you author at 1920x1080 and deliver 1280x720, resize the style values or prove the effective font size remains readable. +- Use max width: about 70-82% of landscape width, 78-88% of portrait width. Reduce width further when words scale above 1.0. +- Set explicit line height around 1.05-1.18. Do not rely on default browser or ASS line spacing. +- Use real safe margins: at least 5% from frame edges, more for vertical social platform UI. +- Verify fonts render all glyphs. CJK tofu boxes, missing punctuation, broken emoji, or fallback font jumps are critical issues to revise or justify. +- Prefer shadow, blur, backing plate, or glow tuned to the footage over a thick black outline. If using ASS outlines, keep them intentional and proportional. +- Keep body caption letter spacing at 0. Avoid negative tracking. Display title cards may use deliberate tracking only if it improves the genre look. + +## Source Subtitle Collisions + +Found footage often contains hardcoded subtitles, tickers, logos, and broadcast lower thirds. Before burn-in, review caption-heavy frames and mark occupied zones. + +- Prefer upper/side safe zones when the source already uses bottom subtitles. +- Crop, blur, or mask nonessential source subtitles only when it does not harm the footage. +- Replace ranges where source text makes authored subtitles unreadable. +- Do not stack authored narration subtitles over source-language subtitles and call the result complete. + +## Authoring Paths + +### ASS + FFmpeg + +Use when the final output is normal edited footage and captions need deterministic burn-in. + +```bash +ffmpeg -i input.mp4 -vf "subtitles=captions.ass:fontsdir=fonts" -c:a copy output_captioned.mp4 +``` + +ASS is the preferred interchange for styled subtitles because it supports font, size, outline, shadow, position, and per-event timing. Keep `captions.ass` readable and style-named (`Narration`, `Keyword`, `Chapter`, `LyricActive`) instead of generating anonymous styles. + +### HyperFrames / HTML Captions + +Use when captions are part of a digital motion composition, product launch, audio-reactive typography, karaoke, or per-word kinetic text system. + +- Install/load only when the workflow needs it: `npx skills add heygen-com/hyperframes --skill hyperframes`. +- Build end-state layout first, then animate into/out of that layout. +- Use deterministic timelines, no random/time-based logic, and no infinite repeats. +- Use fit-to-width logic for dynamic text and a hard timeline kill at each group end so old captions cannot remain visible. + +### NLE Overlay Tracks + +Use Shotcut/Kdenlive when the whole edit already lives in an NLE timeline. For complex captions, pre-render transparent overlays or ASS-burned intermediate clips, then bring them into the NLE to avoid fragile text filter behavior. + +### Python / MoviePy / Pillow + +Use only when the project already uses a Python render path and needs custom layout that ASS cannot express. Render transparent text layers at final resolution, inspect frames, and avoid rebuilding a low-end subtitle engine from scratch. + +## Doctor Review + +Run the caption doctor to gather evidence, then inspect the relevant frames and audio. These signals usually require revision or a written justification: + +- Captions feel like generic subtitles pasted on top of the video. +- Voice captions use a default-looking font/style with no genre fit, hierarchy, safe-zone reasoning, or caption-heavy frame review. +- Any caption is clipped, outside safe margins, too small on the target platform, or unreadable against the footage. +- Captions cover faces, important action, source subtitles, UI controls, or watermarks without a documented reason. +- Text lingers past its end time, overlaps the next caption group unintentionally, or appears before the spoken line. +- Voice captions are hand-timed story summaries instead of timed narration text. +- Voice-caption coverage extends far beyond the narration audio, or leaves large narrated sections without subtitles. +- The planned caption role is narrative coverage, but the final has a large unreviewed gap between authored narration/caption coverage and media duration. +- ASS PlayRes does not match the final video and the effective font size is too small. +- CJK/Latin font fallback is inconsistent, missing glyphs appear, or punctuation wraps badly. +- Style conflicts with genre: bouncy social captions on serious film commentary, plain subtitles on a hype montage, loud karaoke on a tutorial, etc. +- The final render was delivered without caption-heavy sample frames or a written caption review note. + +Minimum investigation steps: + +```bash +mkdir -p caption_review_frames +python cli-hub-matrix/video-creation/scripts/video_doctor.py captions captions.ass \ + --media final.mp4 \ + --narration narration.mp3 \ + --output-language "" \ + --json > captions_doctor.json +python cli-hub-matrix/video-creation/scripts/video_doctor.py frames final.mp4 caption_review_frames \ + --json > caption_review_frames/frames_doctor.json +``` + +Also extract exact frames around first caption, densest caption section, last caption, and any style transition. Inspect them visually before calling the video done. + +If the brief expects narration or intertitles to carry the full story, add `--expect-authored-coverage` so the doctor emits coverage-gap signals. Do not write "caption doctor passed." Write what signals appeared, which frames/audio were inspected, and why the final caption treatment is acceptable or what was revised. diff --git a/cli-hub-matrix/video-creation/references/nle-shotcut-kdenlive.md b/cli-hub-matrix/video-creation/references/nle-shotcut-kdenlive.md new file mode 100644 index 000000000..003caaa88 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/nle-shotcut-kdenlive.md @@ -0,0 +1,82 @@ +# Shotcut And Kdenlive NLE Workflow + +Use this when an edit benefits from a real timeline: multiple tracks, transitions, reusable project files, source crops, long audio beds, or harness-controlled NLE rendering. + +## When To Choose An NLE + +Prefer Shotcut or Kdenlive when: + +- The timeline has many clips, transitions, or layered tracks. +- The user may want a project file they can reopen. +- Manual review or future editability matters. +- MoviePy/ffmpeg filtergraphs are becoming brittle. + +Prefer MoviePy or raw `ffmpeg` when: + +- The edit is deterministic and simple. +- The task is mostly concat, burn-in, resize, trim, mux, or thumbnail extraction. +- The environment cannot run long NLE jobs reliably. + +## Provider Boundary Pattern + +Use a clear boundary between tools: + +1. `ffmpeg`: create stable mezzanine clips, source crops, speed-ramp snippets, and title-card video segments when NLE text filters are risky. +2. NLE harness: create the project, import media, arrange tracks, add transitions/filters, and render a master. +3. `ffmpeg`: burn ASS captions if the caption pass is post-NLE, normalize fps/SAR/DAR/profile/color tags, mux final audio, and run doctor investigation. + +Do not assume a clean mezzanine segment contains later captions, chapter cards, watermarks, or overlays. Choose one caption stage and document it before rendering. + +## Mezzanine Conventions + +Use boring, stable clip properties: + +- Constant frame rate matching the final project, usually 24, 25, 30, or 60 fps. +- `yuv420p`, square pixels, known dimensions, and normalized rotation. +- Clean crop/scale decisions made before NLE import. +- Audio-less video snippets unless audio from that source is intentionally used. +- Short filenames with no spaces when harnesses or MLT paths are fragile. + +Example: + +```bash +ffmpeg -y -i source.mp4 \ + -ss 12.3 -t 5.7 \ + -vf "scale=1920:1080:force_original_aspect_ratio=increase,crop=1920:1080,fps=30,setsar=1" \ + -an -c:v libx264 -crf 16 -preset medium -pix_fmt yuv420p mezz/shot_001.mp4 +``` + +## Render Resilience + +For long renders: + +- Write progress logs from the harness or render command. +- Keep the previous good output until the new candidate has been investigated and accepted. +- Probe the rendered file before promotion. +- Full-decode the candidate to catch missing `moov`, corrupt packets, and partial MP4s. +- If the environment kills long renders, render shorter chunks and assemble with `ffmpeg`. +- Never update reports before the exact promoted final file exists. + +Checks after every NLE render: + +- Actual duration versus intended timeline duration. +- Video stream fps versus project fps. +- SAR/DAR and final dimensions. +- Video track coverage through the audio tail. +- Whether content speed visually matches audio after any normalization. +- Whether captions/chapter cards survived the selected caption stage. + +## Known Failure Modes + +| Symptom | Likely cause | Response | +|---|---|---| +| Black or frozen tail | Video track shorter than audio or render ended on empty track. | Extend/fill video track, trim audio, rerender, run `video_doctor.py tail` and inspect the tail contact sheet. | +| Valid MP4 with bad playback | Partial render, missing `moov`, bad timestamps. | Full-decode, rerender or remux from a healthy master. | +| Wrong speed after normalization | FPS/timebase changed after NLE render. | Compare duration/fps before and after normalization; avoid filters that rewrite timing unintentionally. | +| Captions disappeared | Clean mezzanine was rendered before caption overlay stage. | Burn captions after NLE master or include captions inside NLE by design. | +| Source cards or hardcoded subs dominate | Source triage skipped or ranges were selected from overview cards. | Rebuild scene library and reject/crop those ranges. | +| PNG overlays render with black backing | Alpha/format handling issue in MLT path. | Pre-render cards as video with alpha-safe settings or use ffmpeg overlay. | + +## Promotion + +Promote only after `video_doctor.py probe`, `video_doctor.py frames`, and relevant tail/caption/source doctor reports have been read and accepted in context. Hash the candidate and final path, regenerate review assets from the final path, and update reports last. Do not write "doctor passed"; record which signals were investigated and why the final is acceptable. diff --git a/cli-hub-matrix/video-creation/references/render-doctor.md b/cli-hub-matrix/video-creation/references/render-doctor.md new file mode 100644 index 000000000..35b6c3e94 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/render-doctor.md @@ -0,0 +1,116 @@ +# Render Doctor And Final Promotion + +Use this after every render and before presenting a final MP4. Technical validity is not enough, but a binary QC script is also not enough. The render doctor gathers evidence from the exact file being delivered so the agent can investigate issues in context. + +## Doctor Principle + +`scripts/video_doctor.py` reports facts and investigation signals. It does not decide pass/fail. A nonzero exit means the doctor could not run. A zero exit means the report was produced, not that the video is good. + +Agents must read the report, open the referenced frames/logs/audio where relevant, and decide whether the signal is acceptable for the brief. + +## Baseline Doctor Commands + +```bash +mkdir -p doctor +python cli-hub-matrix/video-creation/scripts/video_doctor.py probe final.mp4 --json > doctor/probe.json +python cli-hub-matrix/video-creation/scripts/video_doctor.py frames final.mp4 doctor/review_frames --json > doctor/frames.json +python cli-hub-matrix/video-creation/scripts/video_doctor.py tail final.mp4 doctor/tail --json > doctor/tail.json +python cli-hub-matrix/video-creation/scripts/video_doctor.py audio final.mp4 doctor/audio --scan-path . --json > doctor/audio.json +``` + +For captioned narration: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py captions captions.ass \ + --media final.mp4 \ + --narration narration.mp3 \ + --output-language "" \ + --json > doctor/captions.json +``` + +When the brief expects narration or intertitles to carry the full story, add `--expect-authored-coverage` to surface coverage-gap signals. Omit it for spot captions, music videos, ambient edits, source-audio-led scenes, or videos where captions are intentionally partial. + +For source manifests: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py sources sources.json \ + --root . \ + --json > doctor/sources.json +``` + +For HyperFrames or other linter-backed motion projects: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py lint render_lint.log \ + --disposition lint_disposition.md \ + --json > doctor/lint.json +``` + +Optional low-level probes are still useful when a signal needs deeper inspection: + +```bash +ffmpeg -hide_banner -i final.mp4 -vf blackdetect=d=0.15:pic_th=0.98 -an -f null - 2> doctor/blackdetect.log +ffmpeg -hide_banner -i final.mp4 -af silencedetect=n=-45dB:d=0.5 -vn -f null - 2> doctor/silencedetect.log +ffmpeg -hide_banner -i final.mp4 -vf freezedetect=n=0.003:d=1.0 -an -f null - 2> doctor/freezedetect.log +ffmpeg -v error -i final.mp4 -f null - 2> doctor/decode_errors.log +``` + +## What To Read + +Read doctor reports in this order: + +1. `probe`: duration, dimensions, fps, audio/video streams, aspect ratio clues. +2. `frames`: first/middle/last frames, every-2s contact sheet, and a dense final-act sheet. The default final-act sample is the last 20% only as an inspection heuristic. +3. `tail`: black/freeze/static-tail signals and tail contact sheet. +4. `captions`: PlayRes versus final dimensions, effective font size, generic style, caption role, voice-caption span versus narration/media, repeated/debug text signals, and optional authored-coverage checks when the brief needs them. +5. `sources`: provenance fields, source existence, selected ranges, story roles, source-text/watermark risks, quality caveats. +6. `audio`: loudness, high/low-band clues, exported listening snippets, and procedural-audio artifact scan. +7. `lint`: linter warnings/errors and whether the agent fixed, accepted, or blocked on them. + +## Investigation Signals + +Strong signals deserve manual inspection, not automatic rejection: + +- Final duration differs from the requested range or `creative_direction.md`. +- First review frames do not reveal the topic/product/value proposition. +- Tail report points to black/frozen/static frames. +- Captions appear to extend far beyond narration, are authored at a different resolution, or are too small at final size. +- Voice/narration captions end long before the media ends when the brief expects authored narration or intertitles to carry the full story. +- Frame doctor reports very low visual diversity, especially in the final-act sample. +- Source ranges contain hardcoded subtitles, broadcast graphics, watermarks, or platform UI without crop/mask/safe-zone mitigation. +- Linter warnings are present without a written disposition. +- Source manifest lacks selected ranges, story roles, platform evidence, or rights notes. +- Generated or processed audio has harsh high-frequency hiss/sizzle, even if silence/volume scans look normal. +- Audio doctor finds procedural-audio artifacts where a long polished edit should have AI-generated music, downloaded relevant/authorized music, or source-audio-led ambience. +- Low-frequency pulse or repeated impact hits dominate the mix and make the soundtrack feel like test tones instead of music. +- Data/UI/chart/map/caption regions visibly shake or drift without a documented purpose. +- Reports point to a path other than the promoted final. + +For each signal, write what you inspected and the decision: accepted with reason, revised, or escalated. + +## Promotion Discipline + +1. Keep the previous known-good output. +2. Run doctor reports on the candidate. +3. Inspect referenced frames/logs/audio directly. +4. If revising, render a new candidate and rerun the relevant doctor command. +5. After choosing a promoted final, hash it: + +```bash +sha256sum final.mp4 > doctor/final_sha256.txt +``` + +6. Regenerate `probe`, `frames`, and any relevant caption/tail/source reports from the final path. +7. Scan reports for stale source names, old final paths, old hashes, and superseded versions. + +## Reporting + +A final review note should say: + +- Which doctor commands were run. +- Which signals appeared. +- Which frames/logs/audio segments were inspected. +- What was revised or accepted with context. +- For independent reviews, the top visible risks or weaknesses with frame/timestamp evidence; a review that only repeats technical checks is incomplete. + +Do not write "doctor passed." Write what the doctor helped you learn. diff --git a/cli-hub-matrix/video-creation/references/sound-design.md b/cli-hub-matrix/video-creation/references/sound-design.md new file mode 100644 index 000000000..d89242f1f --- /dev/null +++ b/cli-hub-matrix/video-creation/references/sound-design.md @@ -0,0 +1,137 @@ +# Sound Design For Video Edits + +Use this when a video needs more than a single music bed: trailer hits, risers, whooshes, drones, heartbeats, crowd/source accents, silence gaps, or narration-aware ducking. + +## Required Deliverable + +For polished edits, create `sound_design.md` with: + +- Section map: time ranges, emotion, music role, and loudness target. +- Main music strategy: AI-generated music, downloaded relevant/authorized music, source ambience-led, or user-supplied music. +- Cue list: exact time, stem/file, story function, and visual sync point. +- Source Audio Policy when any downloaded/captured clip audio is used: time range, source file, `audio_role`, keep reason, overlap policy, processing, and review snippet. +- Ducking notes for narration, dialogue, source audio, or captions that need clarity. +- Generated-stem notes, including tools and parameters. +- Review notes for section loudness, true peak, final-act shape, and a listening pass that explicitly checks hiss, sizzle, clipping, and narration intelligibility. + +Generated procedural stems should be separate WAV files named by role, for example `pulse.wav`, `riser_01.wav`, `impact_03.wav`, `drone_low.wav`, `heartbeat.wav`, or `whoosh_fast.wav`. Mark them as generated in `music_sources.json` or `sound_design.md`. + +## Procedural Audio Guardrails + +Procedural audio is useful for short UI hits, pulses, and risers, but raw noise is easy to make unlistenable. + +- For polished videos around 60 seconds or longer, do not make a NumPy/ffmpeg/sox procedural bed the default main soundtrack. Use AI-generated music or downloaded relevant/authorized music as the main bed unless the brief explicitly asks for procedural/generative sound or the piece is intentionally source-ambience-led. +- Treat procedural audio as SFX/accent stems first: UI ticks, soft impacts, short risers, transition whooshes, drones, pulses, or a brief final hit. +- Do not use unfiltered Gaussian/full-band noise as a music bed, riser, or repeated whoosh. +- Shape noise with a short envelope, band-limit it, keep it low in the mix, and reserve it for transitional moments. +- Prefer tonal UI clicks, soft impacts, filtered sweeps, and real ambience over constant hiss. +- Export stems separately before mixing so a noisy stem can be muted or replaced without rebuilding the edit. +- If the user reports sizzling/noise, treat it as a critical issue even when `silencedetect`, `volumedetect`, or decode checks look normal. + +Bad smell examples: + +- `np.sin` low drones plus `rng.normal` risers as the entire music bed. +- Repeating sub hits every few seconds to fake pacing. +- "Procedural score" in `music_sources.json` with no real music, AI-generated music, or source-audio-led rationale. +- Source ambience mixed so low that it is only texture while synthetic pulses dominate the edit. + +## Audio Roles + +| Role | Purpose | +|---|---| +| Music bed | Continuity, tone, pacing. | +| Pulse | Gives cuts a motor without overwhelming narration. | +| Riser | Builds into a reveal, chapter change, or final hit. | +| Impact/sub drop | Marks a beat, dunk, reveal, title, or scene turn. | +| Whoosh | Motivates motion graphics, fast pans, or chapter transitions. | +| Drone | Adds dread, scale, or unresolved tension. | +| Heartbeat/silence | Creates a hold before a payoff. | +| Source audio | Proves authenticity or gives a scene a human edge. | +| Narration | Leads story logic; music should move around it. | + +## Source Audio Overlap Rules + +Classify every source-audio range before mixing it. Do not use vague labels such as "source texture" unless the range has been checked and is truly ambience. + +| Audio role | Use | +|---|---| +| `silent_or_mute` | Source audio is unused, irrelevant, noisy, copyrighted music, platform intro, or would conflict with the designed mix. | +| `ambience_keep` | Natural room/machine/crowd/market sound with no dominant music or speech; may sit quietly under narration/music. | +| `dialogue_keep` | Source speech is story-critical; make it foreground and subtitle/translate it instead of talking over it. | +| `music_only` | Source music is the intended bed for that window, or mute it when adding a separate music bed. | +| `mixed_music_speech` | Speech and music are tangled; do not stack new narration/music without separation, replacement, or a source-only decision. | +| `needs_separation` | Use a separation step such as Demucs/Spleeter/UVR or choose a cleaner range before final mix. | + +Keep one foreground voice at a time: agent narration or source dialogue, not both. Keep one intentional music bed at a time: source music or added music, not both. If a source clip has music and you add a new bed, mute the source audio by default unless the written policy says source-only, ducked, or separated. If a source clip has commentary and you add narration, mute/replace the source voice or let the source voice lead with subtitles. + +Suggested `sound_design.md` table: + +| Time | Source audio | Audio role | Keep reason | Overlap policy | Processing | Review snippet | +|---|---|---|---|---|---|---| +| 12-18s | `sources/clip.mp4` | `ambience_keep` | real machine room tone | low under music, no speech/music detected | high-pass, low-pass, duck under narration | `qc/audio/source_overlap_01.wav` | + +## Patterns + +Trailer final act: + +- Quiet hold or drop-out. +- Riser into denser cuts. +- Several synchronized hits. +- One impact or silence gap. +- Unresolved sting, final image, or hard resolve. + +Sports hype: + +- Cut on downbeats and visible impacts. +- Use crowd/source accents sparingly to increase authenticity. +- Include one energy dip before the signature final hit. +- Avoid score loops that ignore athletic motion. + +Film commentary: + +- Narration leads the mix. +- Chapter score changes signal argument turns. +- Source audio reveals should be short and meaningful. +- Do not let effects fight dialogue or subtitles. + +Digital/product launch: + +- Small UI whooshes and tactile hits work better than oversized trailer booms. +- Sync audio changes to product state changes, not decorative text. + +## Mix Review + +Run whole-file and section-level loudness checks: + +```bash +ffmpeg -hide_banner -i final.mp4 -af ebur128 -f null - +ffmpeg -hide_banner -i final.mp4 -af volumedetect -f null - +``` + +Review the opening, a dense middle section, and the ending/final act. Revise or justify the mix if the ending has no intentional shape for the genre, if narration is buried, if hits are visually unsynchronized, or if the same loop runs unchanged from start to finish without a deliberate calm/ambient reason. + +For hiss/sizzle checks, also isolate the high band and listen or measure: + +```bash +ffmpeg -hide_banner -i final.mp4 -af highpass=f=6000,volumedetect -f null - +``` + +A high-band measurement is only a clue; the final decision is the listening pass. + +Use the bundled audio doctor to collect these clues and export listening snippets: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py audio final.mp4 qc/audio --scan-path . +``` + +When source audio is used, pass the manifests and sound design file so the doctor can export overlap snippets: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py audio final.mp4 qc/audio \ + --scan-path . \ + --sources-manifest sources.json \ + --music-manifest music_sources.json \ + --sound-design sound_design.md +``` + +Read the signals in context. Revise or justify the mix if the doctor finds procedural-audio artifacts, dominant low-frequency pulse, strong high-frequency hiss/sizzle, clipped peaks, flat loudness range, missing source-audio roles, or source audio layered under music/narration without a clear mute/duck/source-only/separation policy. For polished long edits, the normal fix is to replace the main bed with AI-generated music or downloaded relevant/authorized music, keep procedural stems only as low-volume accents, and keep source audio only where its role is explicit. diff --git a/cli-hub-matrix/video-creation/references/source-triage.md b/cli-hub-matrix/video-creation/references/source-triage.md new file mode 100644 index 000000000..4b92eadf0 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/source-triage.md @@ -0,0 +1,123 @@ +# Source Triage For Found-Footage Video + +Use this when a video depends on internet footage, public-domain clips, platform-origin media, or named scenes. The goal is to avoid cutting with weak sources just because they downloaded. + +## Evidence Levels + +Classify every source before editing: + +| Level | Meaning | Use | +|---|---|---| +| Direct platform source | Downloaded from the original or intended platform URL. | Preferred when user supplied the URL or the platform source is authorized and reachable. | +| Verified platform-origin transport | Downloaded through another host that preserves credible source metadata, such as a Wikimedia transcode with source information or a `ytarchive:` capture. | Accept when direct platform access fails but the transport proves origin well enough for the brief. | +| Weak mirror | Reupload, compilation, fan edit, generic mirror, or unverifiable clip. | Use only when the user accepts the caveat and the final deliverable is appropriate for that risk. | + +Downloadability is not permission. Do not bypass DRM, paywalls, login restrictions, or access controls. Use cookies only when the user has authorized access to the content. + +## Required Manifest Fields + +For `sources.json`, include one object per source: + +```json +{ + "id": "source_short_name", + "platform_url": "https://...", + "transport_url": "https://...", + "evidence_level": "direct-platform|verified-transport|weak-mirror", + "download_command": "yt-dlp ...", + "cookie_file": "path or null", + "local_file": "sources/source_short_name.mp4", + "probe": { + "duration": 123.45, + "width": 1920, + "height": 1080, + "fps": "30000/1001", + "video_bitrate": 8000000, + "audio_bitrate": 160000 + }, + "selected_ranges": [ + { + "start": 12.3, + "end": 18.8, + "role": "setup|reveal|impact|contrast|proof|payoff", + "audio_role": "silent_or_mute|ambience_keep|dialogue_keep|music_only|mixed_music_speech|needs_separation", + "source_music_present": "none|light|dominant|unknown", + "speech_needed": false, + "overlap_policy": "mute_source|keep_ambience_low|source_dialogue_foreground|source_music_only|duck_new_music|separate_vocals|reject_range", + "separation_tool": "none|demucs|spleeter|uvr|api", + "quality_notes": "why this range survives triage", + "risk": "watermark/source subtitle/soft crop/etc." + } + ], + "creator": "name if known", + "license": "license or unknown", + "rights_notes": "authorization/attribution/caveat", + "quality_caveat": "none or specific caveat" +} +``` + +For music, keep equivalent details in `music_sources.json`. + +Audio fields are required when a selected range has an audio stream and the final uses any source sound. Classify source audio before editing: + +- `silent_or_mute`: irrelevant audio, platform intro, pure source BGM that would fight added music, or source commentary that would fight new narration. +- `ambience_keep`: natural ambience with no dominant music or speech. +- `dialogue_keep`: source speech is needed; it must become foreground with subtitles/translation, not background under new narration. +- `music_only`: source music is the intentional bed for that window; do not add another full music bed there. +- `mixed_music_speech` / `needs_separation`: use Demucs, Spleeter, UVR, an approved API, or reject the range before adding new music or narration. + +If the source audio is only used for authenticity, prove it is ambience. Do not label speech/music bleed as "texture." + +## Triage Workflow + +1. Probe every source before watching it in detail: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py probe sources/source.mp4 +``` + +2. Make a source overview contact sheet: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py frames sources/source.mp4 review/source_name +``` + +3. After drafting `sources.json`, run the source doctor: + +```bash +python cli-hub-matrix/video-creation/scripts/video_doctor.py sources sources.json --root . +``` + +4. Read the doctor signals for missing provenance, stale files, weak ranges, low-quality sources, missing audio roles, and risky source-audio overlap policies. The doctor is not a rights verifier; it is a prompt for investigation. +5. Mark likely usable ranges with a story role. A range without a role is not selected footage. +6. Make contact sheets for selected ranges or dense action sections. +7. Reject bad ranges before timeline work; do not push the problem into color, crop, captions, or NLE effects. + +When source footage has hardcoded subtitles, broadcast graphics, watermarks, or platform UI, record the risk and mitigation in the selected range: safe-zone placement, crop, mask/blur, replacement, or a written reason it is acceptable. The source doctor will flag text-heavy risks that lack mitigation notes. + +## Rejection Checklist + +Reject or crop/mask ranges that have: + +- Too-low resolution for the final format, unless the low-fi look is intentional. +- Static shots that do not support the beat. +- Countdown cards, ranking cards, source title cards, end credits, sponsor cards, or large hardcoded numbers. +- Hardcoded captions/subtitles that fight the final captions. +- Watermarks or platform UI that cannot be justified or safely cropped. +- Repeated content from another selected range unless it is a deliberate callback. +- Off-theme action, wrong character/team/object, or generic stock feel. +- Weak platform evidence when the brief requires real YouTube/Bilibili/source provenance. +- Audio with dialogue/SFX bleed when the source is supposed to be clean music. +- Source music stacked under a new music bed without a source-only, ducking, mute, or separation decision. +- Source commentary stacked under new narration; choose one foreground voice or separate/replace the source audio. + +## Contact Sheets To Keep + +For found-footage edits, keep these under `review/` or equivalent: + +- One overview sheet per raw source. +- One sheet of all selected ranges. +- One final used-ranges sheet after assembly. +- One dense final-act sheet for trailer/sports/music edits, using the final section size that fits the genre. + +Reports must name the exact final source files and ranges used. If a source is replaced after review, regenerate the sheets and update the manifest. diff --git a/cli-hub-matrix/video-creation/references/story-structure-audio.md b/cli-hub-matrix/video-creation/references/story-structure-audio.md new file mode 100644 index 000000000..d1cd1a0c5 --- /dev/null +++ b/cli-hub-matrix/video-creation/references/story-structure-audio.md @@ -0,0 +1,117 @@ +# Story Structure And Audio Direction + +Use this reference for non-trivial videos: trailers, sports hype edits, film commentary, found-footage montage, product launch videos, music videos, documentary shorts, and anything longer than a simple clip trim. The goal is to prevent flat montage: clips with no internal logic, no emotional rise/fall, and a dull continuous audio bed. + +Before editing, write `creative_direction.md`. Do not start final assembly until it has enough detail that another agent could understand the whole video without seeing your timeline. + +## Required Creative Direction + +`creative_direction.md` must include: + +- **One-sentence promise:** what the viewer should feel or understand by the end. +- **Audience/platform:** who watches it, where it plays, and expected energy/density. +- **Genre contract:** trailer, sports hype, movie recap, tutorial, launch film, lyric edit, etc. +- **Story arc:** beginning, escalation, turning point, climax, resolution. +- **Emotional curve:** e.g. curiosity -> tension -> threat -> release -> triumph. +- **Audio arc:** main music strategy, music sections, narration/source-audio role, silence/breaks, hits, risers, drops, and final resolve. +- **Cut-density curve:** where cuts are slow, where they accelerate, where the edit breathes. +- **Visual motif:** repeated image idea, color/shape/title system, or source pattern that ties the video together. +- **Source roles:** which clips/sources establish world, conflict, evidence, spectacle, payoff, or texture. +- **No-flatness guardrails:** explicit choices that prevent random clip order, monotone music, repeated footage, and title cards with no narrative function. + +## Planning Order + +1. **Define the spine.** Pick a clear premise and a final payoff before selecting shots. +2. **Choose the arc shape.** Use one of the patterns below, then adapt it. +3. **Map audio first.** Choose whether the main bed is AI-generated music, downloaded relevant/authorized music, user-supplied music, or source ambience. Then mark music/narration sections and major beat hits. The edit should respond to audio structure, not just play under it. +4. **Assign shot jobs.** Every selected segment needs a narrative job: setup, reveal, escalation, contrast, proof, impact, or payoff. +5. **Only then assemble.** Build rough cut by story beats, not source order. + +## Arc Patterns + +### Trailer / Sci-Fi / Game / Mecha + +- **0-10% Hook:** emergency, question, impossible image, or cold-open impact. +- **10-30% World + threat:** establish where we are and what is wrong. +- **30-55% Escalation:** stakes rise; shots become shorter; sound design grows. +- **55-75% Drop / reversal:** major title hit, combat drop, reveal, or turning point. +- **75-92% Final run:** fastest density, strongest music section, clear image progression. +- **92-100% Lockup:** final line/title/logo, short breathe-out, no abrupt ending. + +Audio: avoid a single flat loop. Use intro pulse, riser, drop, breakdown or half-time bridge, final lift, and final tail. Layer source audio, impacts, whooshes, short silence, and low hits where appropriate. + +### Sports Hype / Action Montage + +- **Hook:** strongest visual in first 3-5 seconds, not a slow intro. +- **Identity:** show athlete/team/sport vocabulary. +- **Pressure:** misses, defense, conflict, preparation, crowd, rivalry, or fatigue. +- **Breakthrough:** first clear win/impact/drop. +- **Run:** dense sequence of best moments with increasing cut density. +- **Signature finish:** final highlight, title, or crowd reaction. + +Audio: map hard cuts to kicks/snares/downbeats. Use crowd/source hits selectively. Include at least one energy dip so the final section feels earned. + +### Film Commentary / Recap + +- **Cold hook:** what makes the film/scene worth watching. +- **Setup:** characters, context, conflict. +- **Complication:** why the situation worsens. +- **Turn:** decision, reveal, or emotional pivot. +- **Payoff:** consequence and interpretation. +- **Closing thought:** short final thesis, not just "the end." + +Audio: narration leads. Music should support chapters, not fight speech. Use small score changes at chapter boundaries, emotional dips for turning points, and source audio only when it clarifies a moment. + +### Product / Digital Launch + +- **Problem:** pain or opportunity. +- **Reveal:** product/name/visual identity. +- **Proof:** feature demo, metric, workflow, or before/after. +- **Momentum:** multiple capabilities or use cases, increasingly fast. +- **Outcome:** user benefit, launch line, CTA/title lockup. + +Audio: match UI motion to music phrases. Use clicks, swipes, risers, and soft impacts only when they reinforce product action. + +### Tutorial / Explainer + +- **Promise:** what the viewer will learn. +- **Map:** the steps or mental model. +- **Action:** demonstrate steps with clear cause/effect. +- **Check:** show result or common failure. +- **Wrap:** concise recap. + +Audio: keep music low and simple. Rhythm comes from edits, cursor movement, callouts, and narration pacing, not big trailer hits. + +## Audio Direction Rules + +- Do not use one unchanging music bed from start to finish unless the genre explicitly demands calm continuity. +- Mark at least three audio events in any polished 60+ second video: hit, riser, drop, pause, texture change, source-audio reveal, or final resolve. +- Use silence as a design element. A 200-500 ms dip before a drop can make the next shot feel stronger than another constant hit. +- Narration, dialogue, source audio, music, and SFX must have roles. If everything is loud all the time, nothing has impact. +- Check loudness by section, not only whole-file mean volume. Flat mean volume can hide a boring mix. +- For polished 60+ second videos, do not default to a procedural tone/noise bed as the main soundtrack. Prefer AI-generated music, downloaded relevant/authorized music, user-supplied music, or source-ambience-led design. +- If generating procedural hits/risers/pulses, write the intended structure first and keep them as stems or accents. Do not generate a single tone/noise loop and hope it creates drama. + +## Assembly Rules + +- Source order is not story order. Reorder clips around the arc unless the task is explicitly chronological. +- Repeated footage is allowed only as a deliberate callback, escalation, or motif. Accidental repetition is a failure. +- Chapter cards must change the story beat; they cannot be decorative separators for random clips. +- Every 10-20 seconds, something should change: setting, tension, density, audio texture, title system, character focus, or edit pattern. +- The ending must resolve the premise, intentionally withhold resolution, or hand off to a next action. It should not stop only because the timeline reached the target duration. + +## Review Rubric + +Fail and revise if any answer is "no": + +- Can a viewer explain the premise after the first 10 seconds? +- Does the middle develop or escalate instead of continuing the same pattern? +- Is there a clear high point or turning point? +- Does the audio have sections, not just volume? +- Is the main music/ambience strategy credible for the genre, rather than a synthetic test-tone/noise bed? +- Do music hits, source audio, titles, and cuts reinforce the same moments? +- Do the chosen sources have assigned narrative roles? +- Would removing the chapter cards still leave an understandable arc? +- Does the ending feel like a payoff rather than a cutoff? + +If the video is technically valid but fails this rubric, return to `creative_direction.md`, revise the arc/audio plan, and rebuild the timeline. diff --git a/cli-hub-matrix/video-creation/scripts/video_doctor.py b/cli-hub-matrix/video-creation/scripts/video_doctor.py new file mode 100644 index 000000000..79b70efe2 --- /dev/null +++ b/cli-hub-matrix/video-creation/scripts/video_doctor.py @@ -0,0 +1,2580 @@ +#!/usr/bin/env python3 +"""Investigate video-creation artifacts and report evidence for agent review. + +This helper is intentionally non-binary. It reports facts and investigation +signals; it does not decide whether a video passes. A nonzero exit means the +doctor could not run, not that the artifact is bad. +""" + +from __future__ import annotations + +import argparse +import json +import re +import shutil +import subprocess +import sys +from pathlib import Path +from typing import Any + + +DEFAULT_FLAG_TERMS = [ + "MISSION SUBTITLE", + "MISSION LOG", + "DEBUG", + "TODO", +] + +SOURCE_TEXT_TERMS = [ + "hardcoded subtitle", + "hardcoded subtitles", + "source subtitle", + "source subtitles", + "burned subtitle", + "burned subtitles", + "broadcast graphic", + "broadcast graphics", + "lower third", + "ticker", + "watermark", + "platform ui", + "logo bug", + "chinese subtitle", + "caption collision", +] + +SOURCE_TEXT_MITIGATION_TERMS = [ + "safe zone", + "safe-zone", + "crop", + "cropped", + "mask", + "masked", + "blur", + "blurred", + "replace", + "replaced", + "avoid", + "avoided", + "upper", + "side", + "acceptable", + "intentional", +] + +AUDIO_ROLE_VALUES = [ + "silent_or_mute", + "ambience_keep", + "dialogue_keep", + "music_only", + "mixed_music_speech", + "needs_separation", + "unknown", +] + +AUDIO_ROLE_FIELDS = [ + "audio_role", + "audio_class", + "source_audio_role", + "source_audio_class", +] + +AUDIO_OVERLAP_POLICY_FIELDS = [ + "overlap_policy", + "new_music_overlap_policy", + "source_audio_policy", + "narration_overlap_policy", + "audio_policy", +] + +SOURCE_AUDIO_CONFLICT_TERMS = [ + "under the music", + "under music", + "under the score", + "under score", + "under narration", + "under the narration", + "source texture", + "source-audio texture", + "source audio texture", + "retain all source audio", + "retained source audio", + "mixed audibly", + "audible under", +] + +SOURCE_AUDIO_RESOLUTION_TERMS = [ + "mute", + "muted", + "duck", + "ducked", + "sidechain", + "side-chain", + "separate", + "separated", + "separation", + "demucs", + "spleeter", + "uvr", + "isolate", + "vocal", + "vocals", + "accompaniment", + "instrumental", + "keep source only", + "source-only", + "no new music", +] + +SRT_TIME = re.compile( + r"(?P\d\d:\d\d:\d\d[,.]\d{3})\s*-->\s*(?P\d\d:\d\d:\d\d[,.]\d{3})" +) + + +def as_float(value: Any) -> float | None: + try: + return float(value) + except (TypeError, ValueError): + return None + + +def as_int(value: Any) -> int | None: + try: + return int(value) + except (TypeError, ValueError): + return None + + +def parse_rate(value: str | None) -> float | None: + if not value or value == "0/0": + return None + if "/" in value: + num, den = value.split("/", 1) + try: + den_f = float(den) + return float(num) / den_f if den_f else None + except ValueError: + return None + return as_float(value) + + +def ensure_tool(name: str) -> None: + if shutil.which(name) is None: + raise RuntimeError(f"{name} not found on PATH") + + +def run(cmd: list[str], *, capture: bool = True) -> subprocess.CompletedProcess[str]: + return subprocess.run(cmd, text=True, capture_output=capture, check=False) + + +def run_bytes(cmd: list[str]) -> subprocess.CompletedProcess[bytes]: + return subprocess.run(cmd, capture_output=True, check=False) + + +def run_checked(cmd: list[str]) -> subprocess.CompletedProcess[str]: + proc = run(cmd) + if proc.returncode != 0: + detail = proc.stderr.strip() or proc.stdout.strip() or "command failed" + raise RuntimeError(detail) + return proc + + +def run_ffprobe(path: Path) -> dict[str, Any]: + ensure_tool("ffprobe") + proc = run_checked( + [ + "ffprobe", + "-v", + "error", + "-print_format", + "json", + "-show_format", + "-show_streams", + str(path), + ] + ) + return json.loads(proc.stdout) + + +def media_summary(path: Path, raw: dict[str, Any]) -> dict[str, Any]: + fmt = raw.get("format", {}) + video_streams: list[dict[str, Any]] = [] + audio_streams: list[dict[str, Any]] = [] + for stream in raw.get("streams", []): + if stream.get("codec_type") == "video": + video_streams.append( + { + "index": stream.get("index"), + "codec": stream.get("codec_name"), + "profile": stream.get("profile"), + "width": stream.get("width"), + "height": stream.get("height"), + "pix_fmt": stream.get("pix_fmt"), + "duration": as_float(stream.get("duration")), + "nb_frames": stream.get("nb_frames"), + "avg_frame_rate": stream.get("avg_frame_rate"), + "avg_fps": parse_rate(stream.get("avg_frame_rate")), + "r_frame_rate": stream.get("r_frame_rate"), + "sample_aspect_ratio": stream.get("sample_aspect_ratio"), + "display_aspect_ratio": stream.get("display_aspect_ratio"), + "field_order": stream.get("field_order"), + "color_range": stream.get("color_range"), + "color_space": stream.get("color_space"), + "color_transfer": stream.get("color_transfer"), + "color_primaries": stream.get("color_primaries"), + "bit_rate": as_float(stream.get("bit_rate")), + } + ) + elif stream.get("codec_type") == "audio": + audio_streams.append( + { + "index": stream.get("index"), + "codec": stream.get("codec_name"), + "profile": stream.get("profile"), + "sample_rate": as_float(stream.get("sample_rate")), + "channels": stream.get("channels"), + "channel_layout": stream.get("channel_layout"), + "duration": as_float(stream.get("duration")), + "bit_rate": as_float(stream.get("bit_rate")), + } + ) + return { + "file": str(path), + "size_bytes": path.stat().st_size, + "format": fmt.get("format_name"), + "duration": as_float(fmt.get("duration")), + "bit_rate": as_float(fmt.get("bit_rate")), + "video_streams": video_streams, + "audio_streams": audio_streams, + } + + +def compact_media(path: Path | None) -> dict[str, Any] | None: + if path is None: + return None + raw = run_ffprobe(path) + summary = media_summary(path, raw) + video = summary["video_streams"][0] if summary["video_streams"] else {} + return { + "path": str(path), + "duration": summary.get("duration"), + "width": video.get("width"), + "height": video.get("height"), + "fps": video.get("avg_fps"), + "audio_streams": len(summary["audio_streams"]), + "video_streams": len(summary["video_streams"]), + } + + +def signal( + topic: str, + message: str, + *, + level: str = "review", + evidence: dict[str, Any] | None = None, + investigate: str | None = None, +) -> dict[str, Any]: + item: dict[str, Any] = {"level": level, "topic": topic, "message": message} + if evidence: + item["evidence"] = evidence + if investigate: + item["investigate"] = investigate + return item + + +def language_counts(text: str) -> dict[str, int]: + latin = len(re.findall(r"[A-Za-z]", text)) + cjk = len(re.findall(r"[\u3400-\u9fff\u3040-\u30ff\uac00-\ud7af]", text)) + return { + "latin_letters": latin, + "cjk_chars": cjk, + "visible_chars": len([char for char in text if not char.isspace()]), + } + + +def merge_intervals(intervals: list[tuple[float, float]], duration: float | None = None) -> list[tuple[float, float]]: + clipped: list[tuple[float, float]] = [] + for start, end in intervals: + if duration is not None: + start = max(0.0, min(duration, start)) + end = max(0.0, min(duration, end)) + if end > start: + clipped.append((start, end)) + clipped.sort() + merged: list[tuple[float, float]] = [] + for start, end in clipped: + if not merged or start > merged[-1][1] + 0.05: + merged.append((start, end)) + else: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + return merged + + +def coverage_seconds(intervals: list[tuple[float, float]], duration: float | None = None) -> float: + return sum(end - start for start, end in merge_intervals(intervals, duration)) + + +def sampled_frame_hashes( + media: Path, + *, + start: float = 0.0, + length: float | None = None, + fps: str = "1/2", + width: int = 32, + height: int = 18, +) -> list[str]: + ensure_tool("ffmpeg") + cmd = [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-ss", + f"{start:.3f}", + "-i", + str(media), + ] + if length is not None: + cmd += ["-t", f"{length:.3f}"] + cmd += [ + "-an", + "-vf", + f"fps={fps},scale={width}:{height}:flags=fast_bilinear,format=gray", + "-f", + "rawvideo", + "-", + ] + proc = run_bytes(cmd) + if proc.returncode != 0: + detail = proc.stderr.decode("utf-8", errors="replace").strip() or "ffmpeg raw frame sampling failed" + raise RuntimeError(detail) + frame_size = width * height + hashes: list[str] = [] + for offset in range(0, len(proc.stdout), frame_size): + frame = proc.stdout[offset : offset + frame_size] + if len(frame) != frame_size: + continue + avg = sum(frame) / frame_size + value = 0 + for pix in frame: + value = (value << 1) | int(pix >= avg) + hashes.append(f"{value:0{frame_size // 4}x}") + return hashes + + +def hamming_distance_hex(left: str, right: str) -> int: + return (int(left, 16) ^ int(right, 16)).bit_count() + + +def visual_diversity_summary(hashes: list[str], threshold: int = 36) -> dict[str, Any]: + groups: list[str] = [] + for item in hashes: + if not any(hamming_distance_hex(item, existing) <= threshold for existing in groups): + groups.append(item) + samples = len(hashes) + unique_groups = len(groups) + return { + "samples": samples, + "unique_groups": unique_groups, + "unique_ratio": round(unique_groups / samples, 3) if samples else None, + "near_duplicate_threshold": threshold, + } + + +def parse_time(value: str) -> float: + value = value.strip().replace(",", ".") + parts = value.split(":") + if len(parts) == 3: + hours, minutes, seconds = parts + return int(hours) * 3600 + int(minutes) * 60 + float(seconds) + if len(parts) == 2: + minutes, seconds = parts + return int(minutes) * 60 + float(seconds) + return float(value) + + +def clean_text(text: str) -> str: + text = re.sub(r"\{\\.*?\}", "", text) + text = re.sub(r"<[^>]+>", "", text) + text = text.replace("\\N", " ").replace("\\n", " ") + text = re.sub(r"\s+", " ", text) + return text.strip() + + +def parse_srt(text: str) -> list[dict[str, Any]]: + entries: list[dict[str, Any]] = [] + blocks = re.split(r"\n\s*\n", text.strip(), flags=re.MULTILINE) + for block in blocks: + lines = [line.strip("\ufeff") for line in block.splitlines() if line.strip()] + time_index = next((i for i, line in enumerate(lines) if SRT_TIME.search(line)), None) + if time_index is None: + continue + match = SRT_TIME.search(lines[time_index]) + if not match: + continue + entries.append( + { + "start": parse_time(match.group("start")), + "end": parse_time(match.group("end")), + "text": clean_text(" ".join(lines[time_index + 1 :])), + "style": "", + } + ) + return entries + + +def parse_ass(text: str) -> list[dict[str, Any]]: + entries: list[dict[str, Any]] = [] + in_events = False + fields: list[str] = [] + for raw_line in text.splitlines(): + line = raw_line.strip() + if line.lower() == "[events]": + in_events = True + continue + if in_events and line.startswith("[") and line.endswith("]"): + in_events = False + if not in_events: + continue + if line.lower().startswith("format:"): + fields = [part.strip().lower() for part in line.split(":", 1)[1].split(",")] + continue + if line.lower().startswith("dialogue:"): + payload = line.split(":", 1)[1].lstrip() + if not fields: + continue + parts = payload.split(",", maxsplit=len(fields) - 1) + if len(parts) != len(fields): + continue + row = dict(zip(fields, parts)) + try: + start = parse_time(row["start"]) + end = parse_time(row["end"]) + except (KeyError, ValueError): + continue + entries.append( + { + "start": start, + "end": end, + "text": clean_text(row.get("text", "")), + "style": row.get("style") or "", + } + ) + return entries + + +def parse_ass_metadata(text: str) -> dict[str, Any]: + metadata: dict[str, Any] = {"playres_x": None, "playres_y": None, "styles": {}} + in_styles = False + style_fields: list[str] = [] + for raw_line in text.splitlines(): + line = raw_line.strip() + lower = line.lower() + if lower.startswith("playresx:"): + metadata["playres_x"] = as_int(as_float(line.split(":", 1)[1].strip())) + elif lower.startswith("playresy:"): + metadata["playres_y"] = as_int(as_float(line.split(":", 1)[1].strip())) + if lower == "[v4+ styles]": + in_styles = True + continue + if in_styles and line.startswith("[") and line.endswith("]"): + in_styles = False + if not in_styles: + continue + if lower.startswith("format:"): + style_fields = [part.strip().lower() for part in line.split(":", 1)[1].split(",")] + continue + if lower.startswith("style:") and style_fields: + payload = line.split(":", 1)[1].lstrip() + parts = payload.split(",", maxsplit=len(style_fields) - 1) + if len(parts) != len(style_fields): + continue + row = dict(zip(style_fields, parts)) + name = row.get("name") + if not name: + continue + metadata["styles"][name] = { + "fontname": row.get("fontname"), + "fontsize": as_float(row.get("fontsize")), + "bold": row.get("bold"), + "alignment": row.get("alignment"), + "borderstyle": row.get("borderstyle"), + "outline": as_float(row.get("outline")), + "shadow": as_float(row.get("shadow")), + "primary_colour": row.get("primarycolour"), + "back_colour": row.get("backcolour"), + "margin_l": row.get("marginl"), + "margin_r": row.get("marginr"), + "margin_v": row.get("marginv"), + } + return metadata + + +def parse_captions(path: Path, text: str) -> list[dict[str, Any]]: + suffix = path.suffix.lower() + if suffix == ".srt": + return parse_srt(text) + if suffix in {".ass", ".ssa"}: + return parse_ass(text) + srt_entries = parse_srt(text) + return srt_entries if srt_entries else parse_ass(text) + + +def caption_doctor(args: argparse.Namespace) -> dict[str, Any]: + text = args.captions.read_text(encoding="utf-8-sig") + entries = parse_captions(args.captions, text) + metadata = parse_ass_metadata(text) + media = compact_media(args.media) if args.media else None + narration = compact_media(args.narration) if args.narration else None + voice_styles = {style.lower() for style in args.voice_style} + output_language = (args.output_language or "").strip().lower() + signals: list[dict[str, Any]] = [] + + if not entries: + signals.append( + signal( + "caption_parse", + "No timed caption entries were parsed.", + level="strong_signal", + investigate="Check file format, encoding, and whether captions are generated elsewhere.", + ) + ) + + all_caption_text = " ".join(str(entry.get("text") or "") for entry in entries) + caption_language = language_counts(all_caption_text) + if output_language in {"en", "eng", "english"} and caption_language["cjk_chars"] > 0: + signals.append( + signal( + "caption_language", + "Caption text includes CJK characters while English output was requested.", + evidence=caption_language, + investigate="Confirm whether these are intentional names/source translations or leaked source/instruction language.", + ) + ) + + duration = as_float(args.duration) + if duration is None and media: + duration = as_float(media.get("duration")) + + playres_x = metadata.get("playres_x") + playres_y = metadata.get("playres_y") + media_w = media.get("width") if media else None + media_h = media.get("height") if media else None + if media_w and media_h and playres_x and playres_y: + if playres_x != media_w or playres_y != media_h: + signals.append( + signal( + "caption_resolution", + "ASS PlayRes differs from the final media dimensions.", + level="strong_signal", + evidence={ + "playres": f"{playres_x}x{playres_y}", + "media": f"{media_w}x{media_h}", + }, + investigate=( + "Inspect caption-heavy frames at final size; resize styles or justify " + "why effective text remains readable." + ), + ) + ) + for style_name, style in metadata.get("styles", {}).items(): + font_size = as_float(style.get("fontsize")) + if not font_size: + continue + effective = font_size * float(media_h) / float(playres_y) + if style_name.lower() in voice_styles and effective < 32: + signals.append( + signal( + "caption_readability", + "A voice-caption style scales to a small effective font size.", + evidence={"style": style_name, "effective_px": round(effective, 1)}, + investigate="Review the final frame on the target display size.", + ) + ) + font_name = str(style.get("fontname") or "").strip().lower() + outline = as_float(style.get("outline")) or 0.0 + borderstyle = str(style.get("borderstyle") or "") + if style_name.lower() in voice_styles and font_name in {"arial", "dejavu sans", "sans-serif"}: + signals.append( + signal( + "caption_style", + "A voice-caption style uses a generic fallback-looking font.", + evidence={ + "style": style_name, + "font": style.get("fontname"), + "borderstyle": borderstyle, + "outline": outline, + }, + investigate=( + "Inspect caption-heavy frames for genre fit; choose an intentional " + "font/style or document why this fallback is acceptable." + ), + ) + ) + if style_name.lower() in voice_styles and borderstyle != "3" and outline >= 2.0: + signals.append( + signal( + "caption_style", + "A voice-caption style appears to rely on a thick outline instead of a designed plate/shadow.", + evidence={"style": style_name, "outline": outline, "borderstyle": borderstyle}, + investigate="Check whether captions look like default subtitles pasted over the video.", + ) + ) + + previous_end = -1.0 + overlap_count = 0 + end_after_media: list[int] = [] + empty_count = 0 + flagged_terms: list[dict[str, Any]] = [] + normalized_counts: dict[str, int] = {} + voice_entries: list[dict[str, Any]] = [] + + for idx, entry in enumerate(entries): + start = float(entry["start"]) + end = float(entry["end"]) + text_value = str(entry.get("text") or "") + if end <= start: + signals.append( + signal( + "caption_timing", + "Caption entry has non-positive duration.", + level="strong_signal", + evidence={"entry": idx, "start": start, "end": end}, + investigate="Fix or regenerate the timed-caption source.", + ) + ) + if start < previous_end - 0.05: + overlap_count += 1 + previous_end = max(previous_end, end) + if duration is not None and end > duration + 0.25: + end_after_media.append(idx) + if not text_value: + empty_count += 1 + upper = text_value.upper() + for term in args.flag_term: + if term.upper() in upper: + flagged_terms.append({"entry": idx, "term": term, "text": text_value[:80]}) + normalized = re.sub(r"[^A-Z0-9]+", " ", upper).strip() + if normalized: + normalized_counts[normalized] = normalized_counts.get(normalized, 0) + 1 + style = str(entry.get("style") or "") + if not style or style.lower() in voice_styles: + voice_entries.append(entry) + + if overlap_count: + signals.append( + signal( + "caption_timing", + "Some caption entries overlap previous entries.", + evidence={"overlap_count": overlap_count}, + investigate="Decide whether overlaps are intentional title/callout layering or stale subtitles.", + ) + ) + if end_after_media: + signals.append( + signal( + "caption_timing", + "Some captions extend beyond the media duration.", + level="strong_signal", + evidence={"entries": end_after_media[:10], "count": len(end_after_media)}, + investigate="Check whether captions were authored against a different master.", + ) + ) + if empty_count: + signals.append( + signal( + "caption_text", + "Some timed entries have no visible text after cleanup.", + evidence={"count": empty_count}, + investigate="Inspect ASS overrides or blank placeholder entries.", + ) + ) + if flagged_terms: + signals.append( + signal( + "caption_text", + "Potential debug or placeholder terms appear in captions.", + evidence={"matches": flagged_terms[:10], "count": len(flagged_terms)}, + investigate="Confirm whether these terms are intended viewer-facing copy.", + ) + ) + repeated = [ + {"text": key, "count": count} + for key, count in normalized_counts.items() + if count >= 3 and count / max(len(entries), 1) >= 0.5 + ] + if repeated: + signals.append( + signal( + "caption_text", + "Persistent repeated caption text may indicate a stuck label.", + evidence={"repeated": repeated[:10]}, + investigate="Inspect contact sheets for stale subtitles or debug labels.", + ) + ) + + voice_summary: dict[str, Any] | None = None + if voice_entries: + voice_first = min(float(entry["start"]) for entry in voice_entries) + voice_last = max(float(entry["end"]) for entry in voice_entries) + voice_intervals = [(float(entry["start"]), float(entry["end"])) for entry in voice_entries] + voice_media_coverage = None + post_caption_tail = None + if duration is not None and duration > 0: + covered = coverage_seconds(voice_intervals, duration) + voice_media_coverage = covered / duration + post_caption_tail = max(0.0, duration - voice_last) + voice_summary = { + "entries": len(voice_entries), + "first_start": voice_first, + "last_end": voice_last, + "span": voice_last - voice_first, + "media_coverage_ratio": round(voice_media_coverage, 3) if voice_media_coverage is not None else None, + "post_caption_tail_seconds": round(post_caption_tail, 3) if post_caption_tail is not None else None, + } + if args.expect_authored_coverage and duration is not None and post_caption_tail is not None: + tail_threshold = max(args.post_caption_tail_signal, duration * 0.12) + if post_caption_tail > tail_threshold: + signals.append( + signal( + "caption_media_coverage", + "The planned authored-caption coverage leaves a large gap before media end.", + level="strong_signal", + evidence={ + "media_duration": round(duration, 3), + "voice_last_end": round(voice_last, 3), + "post_caption_tail_seconds": round(post_caption_tail, 3), + "media_coverage_ratio": round(voice_media_coverage or 0.0, 3), + }, + investigate=( + "Because authored coverage was requested for this doctor run, inspect the tail; " + "add authored language coverage or shorten/restructure the ending." + ), + ) + ) + if ( + voice_media_coverage is not None + and duration >= 30 + and voice_media_coverage < args.voice_media_coverage_signal + ): + signals.append( + signal( + "caption_media_coverage", + "Voice-style caption coverage is low relative to the final media duration.", + evidence={ + "media_duration": round(duration, 3), + "media_coverage_ratio": round(voice_media_coverage, 3), + "threshold": args.voice_media_coverage_signal, + }, + investigate="Check whether the video relies on source-only content where the brief expects authored narration/intertitles.", + ) + ) + narration_duration = as_float(narration.get("duration")) if narration else None + if narration_duration is not None: + if ( + args.expect_authored_coverage + and duration is not None + and duration - narration_duration > args.narration_media_tail_signal + ): + signals.append( + signal( + "narration_media_coverage", + "Narration audio coverage leaves a large gap before media end in an authored-coverage review.", + evidence={ + "media_duration": round(duration, 3), + "narration_duration": round(narration_duration, 3), + "post_narration_tail_seconds": round(duration - narration_duration, 3), + }, + investigate="Inspect whether the ending has authored intertitles, intended source-only material, or an unintended coverage gap.", + ) + ) + if voice_last > narration_duration + args.voice_after_narration_signal: + signals.append( + signal( + "caption_voice_alignment", + "Voice-style captions extend well beyond the narration audio.", + level="strong_signal", + evidence={ + "voice_last_end": round(voice_last, 3), + "narration_duration": round(narration_duration, 3), + }, + investigate=( + "Check whether these are narration subtitles or hand-timed story " + "summaries; align against the final voice track." + ), + ) + ) + if narration_duration > 10 and (voice_last - voice_first) < narration_duration * 0.45: + signals.append( + signal( + "caption_voice_alignment", + "Voice-style caption coverage is much shorter than narration.", + evidence={ + "voice_span": round(voice_last - voice_first, 3), + "narration_duration": round(narration_duration, 3), + }, + investigate="Check for missing subtitles or wrong voice-style names.", + ) + ) + elif narration: + signals.append( + signal( + "caption_voice_alignment", + "Narration audio was supplied, but no voice-style caption entries were found.", + evidence={"voice_styles": sorted(args.voice_style)}, + investigate="Confirm style naming or whether subtitles were authored in another layer.", + ) + ) + + review_dir_summary = None + if args.review_dir: + images = [] + if args.review_dir.exists(): + images = [ + item.name + for item in args.review_dir.iterdir() + if item.suffix.lower() in {".jpg", ".jpeg", ".png", ".webp"} + ] + review_dir_summary = { + "path": str(args.review_dir), + "exists": args.review_dir.exists(), + "images": len(images), + } + if not args.review_dir.exists() or not images: + signals.append( + signal( + "review_assets", + "Review-frame directory is missing or has no image frames.", + evidence=review_dir_summary, + investigate="Generate caption-heavy frames from the exact promoted final.", + ) + ) + + return { + "doctor": "captions", + "artifact": str(args.captions), + "summary": { + "entries": len(entries), + "first_start": entries[0]["start"] if entries else None, + "last_end": entries[-1]["end"] if entries else None, + "ass_metadata": metadata, + "media": media, + "narration": narration, + "voice_styles": sorted(args.voice_style), + "output_language": args.output_language, + "caption_language_counts": caption_language, + "voice_caption_summary": voice_summary, + "review_dir": review_dir_summary, + }, + "signals": signals, + "agent_instruction": ( + "Use these signals to choose what to inspect next. Do not treat this " + "report as a pass/fail verdict. Use --expect-authored-coverage only " + "when the brief expects narration or intertitles to carry the story." + ), + } + + +def load_manifest(path: Path) -> list[dict[str, Any]]: + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, list): + return data + if isinstance(data, dict): + for key in ("sources", "music_sources", "items"): + if isinstance(data.get(key), list): + return data[key] + raise ValueError("manifest must be a list or contain sources/music_sources/items") + + +def get_source_path(item: dict[str, Any]) -> str | None: + for key in ("local_file", "file", "path", "filename"): + value = item.get(key) + if isinstance(value, str) and value: + return value + return None + + +def get_ranges(item: dict[str, Any]) -> list[dict[str, Any]]: + for key in ("selected_ranges", "used_ranges", "ranges", "clips"): + value = item.get(key) + if isinstance(value, list): + return [v for v in value if isinstance(v, dict)] + return [] + + +def resolve_path(value: str, root: Path, manifest_dir: Path) -> Path: + path = Path(value) + if path.is_absolute(): + return path + root_candidate = root / path + if root_candidate.exists(): + return root_candidate + return manifest_dir / path + + +def range_value(item: dict[str, Any], *names: str) -> float | None: + for name in names: + if name in item: + return as_float(item[name]) + return None + + +def manifest_text_blob(*items: Any) -> str: + parts: list[str] = [] + for item in items: + if item is None: + continue + if isinstance(item, dict): + parts.append(manifest_text_blob(*item.values())) + elif isinstance(item, list): + parts.append(manifest_text_blob(*item)) + else: + parts.append(str(item)) + return " ".join(part for part in parts if part).lower() + + +def has_any_term(text: str, terms: list[str]) -> bool: + return any(term in text for term in terms) + + +def first_manifest_value(item: dict[str, Any], names: list[str]) -> Any: + for name in names: + value = item.get(name) + if value not in (None, ""): + return value + return None + + +def normalize_audio_role(value: Any) -> str | None: + if value is None: + return None + text = str(value).strip().lower().replace("-", "_").replace(" ", "_") + aliases = { + "mute": "silent_or_mute", + "muted": "silent_or_mute", + "silent": "silent_or_mute", + "no_audio": "silent_or_mute", + "ambience": "ambience_keep", + "ambient": "ambience_keep", + "nat_sound": "ambience_keep", + "natural_sound": "ambience_keep", + "dialogue": "dialogue_keep", + "speech": "dialogue_keep", + "voice": "dialogue_keep", + "source_music": "music_only", + "music": "music_only", + "mixed": "mixed_music_speech", + "mixed_speech_music": "mixed_music_speech", + "speech_music": "mixed_music_speech", + } + return aliases.get(text, text) + + +def source_audio_metadata(item: dict[str, Any], selected: dict[str, Any] | None = None) -> dict[str, Any]: + selected = selected or {} + role = normalize_audio_role( + first_manifest_value(selected, AUDIO_ROLE_FIELDS) + or first_manifest_value(item, AUDIO_ROLE_FIELDS) + ) + policy = ( + first_manifest_value(selected, AUDIO_OVERLAP_POLICY_FIELDS) + or first_manifest_value(item, AUDIO_OVERLAP_POLICY_FIELDS) + ) + speech_needed = first_manifest_value(selected, ["speech_needed", "dialogue_needed", "voice_needed"]) + source_music_present = first_manifest_value( + selected, + ["source_music_present", "music_present", "bgm_present"], + ) or first_manifest_value(item, ["source_music_present", "music_present", "bgm_present"]) + separation_tool = first_manifest_value(selected, ["separation_tool", "source_separation_tool"]) or first_manifest_value( + item, + ["separation_tool", "source_separation_tool"], + ) + return { + "audio_role": role, + "overlap_policy": str(policy).strip() if policy not in (None, "") else None, + "speech_needed": speech_needed, + "source_music_present": source_music_present, + "separation_tool": separation_tool, + } + + +def parse_time_ranges_from_text(text: str) -> list[tuple[float, float]]: + ranges: list[tuple[float, float]] = [] + for match in re.finditer( + r"(? start: + ranges.append((start, end)) + return ranges + + +def selected_timeline_range(selected: dict[str, Any]) -> tuple[float, float] | None: + start = range_value( + selected, + "timeline_start", + "final_start", + "edit_start", + "out_start", + "placement_start", + ) + end = range_value( + selected, + "timeline_end", + "final_end", + "edit_end", + "out_end", + "placement_end", + ) + if start is not None and end is not None and end > start: + return start, end + start = range_value(selected, "timeline_start", "final_start", "edit_start", "out_start", "placement_start") + duration = range_value(selected, "duration", "timeline_duration", "edit_duration") + if start is not None and duration is not None and duration > 0: + return start, start + duration + return None + + +def sources_doctor(args: argparse.Namespace) -> dict[str, Any]: + items = load_manifest(args.manifest) + entries: list[dict[str, Any]] = [] + signals: list[dict[str, Any]] = [] + + for idx, item in enumerate(items): + source_id = str(item.get("id") or item.get("name") or f"source_{idx}") + local_value = get_source_path(item) + entry: dict[str, Any] = {"id": source_id} + if not local_value: + signals.append( + signal( + "source_manifest", + "Source entry has no local file/path field.", + level="strong_signal", + evidence={"id": source_id}, + investigate="Record the downloaded or generated file used by the edit.", + ) + ) + entries.append(entry) + continue + + path = resolve_path(local_value, args.root, args.manifest.parent) + entry["path"] = str(path) + if not path.exists(): + signals.append( + signal( + "source_manifest", + "Referenced source file does not exist.", + level="strong_signal", + evidence={"id": source_id, "path": str(path)}, + investigate="Check stale paths or whether the file was generated elsewhere.", + ) + ) + entries.append(entry) + continue + + try: + compact = compact_media(path) + except Exception as exc: # noqa: BLE001 - diagnostic report + compact = {"probe_error": str(exc)} + signals.append( + signal( + "source_probe", + "Source media probe did not complete.", + evidence={"id": source_id, "error": str(exc)}, + investigate="Probe manually with ffprobe and check file integrity.", + ) + ) + if compact: + entry.update(compact) + width = compact.get("width") + height = compact.get("height") + if width is not None and height is not None and (width < 1280 or height < 720): + signals.append( + signal( + "source_quality", + "Source resolution is below 720p.", + evidence={"id": source_id, "resolution": f"{width}x{height}"}, + investigate="Inspect whether this source can carry the intended shot role.", + ) + ) + + if not (item.get("platform_url") or item.get("url") or item.get("source_url")): + signals.append( + signal( + "source_provenance", + "Source entry lacks platform/source URL evidence.", + evidence={"id": source_id}, + investigate="Record origin URL or explain why the asset is local/generated.", + ) + ) + if not item.get("evidence_level"): + signals.append( + signal( + "source_provenance", + "Source entry lacks evidence_level.", + evidence={"id": source_id}, + investigate="Classify direct platform, verified transport, weak mirror, or generated/local.", + ) + ) + if not (item.get("license") or item.get("rights_notes")): + signals.append( + signal( + "source_rights", + "Source entry lacks license or rights notes.", + evidence={"id": source_id}, + investigate="Add usable rights notes before promotion or ask the user.", + ) + ) + + ranges = get_ranges(item) + entry["selected_ranges"] = len(ranges) + entry["audio_streams"] = entry.get("audio_streams", 0) + if not ranges: + signals.append( + signal( + "source_selection", + "Source entry has no selected ranges.", + evidence={"id": source_id}, + investigate="Record which ranges are actually used and their story roles.", + ) + ) + duration = as_float(entry.get("duration")) + missing_risk_note_ranges: list[dict[str, Any]] = [] + unmitigated_source_text_ranges: list[dict[str, Any]] = [] + missing_audio_role_ranges: list[dict[str, Any]] = [] + risky_audio_policy_ranges: list[dict[str, Any]] = [] + for ridx, selected in enumerate(ranges): + start = range_value(selected, "start", "in", "start_time") + end = range_value(selected, "end", "out", "end_time") + if start is None or end is None: + signals.append( + signal( + "source_selection", + "Selected range lacks numeric start/end.", + evidence={"id": source_id, "range": ridx}, + investigate="Normalize selected range metadata.", + ) + ) + continue + if end <= start: + signals.append( + signal( + "source_selection", + "Selected range has end <= start.", + level="strong_signal", + evidence={"id": source_id, "range": ridx, "start": start, "end": end}, + investigate="Fix range timing before using this source.", + ) + ) + if duration is not None and end > duration + 0.25: + signals.append( + signal( + "source_selection", + "Selected range ends after source duration.", + level="strong_signal", + evidence={ + "id": source_id, + "range": ridx, + "end": end, + "duration": duration, + }, + investigate="Check whether the manifest references the wrong source file.", + ) + ) + if not (selected.get("role") or selected.get("shot_role")): + signals.append( + signal( + "source_selection", + "Selected range lacks a story role.", + evidence={"id": source_id, "range": ridx}, + investigate="Explain why this shot belongs in the edit.", + ) + ) + if not any( + key in selected + for key in ( + "risk", + "risks", + "quality_notes", + "source_text", + "visible_text", + "watermark", + "subtitles", + ) + ): + missing_risk_note_ranges.append({"range": ridx, "start": start, "end": end}) + risk_text = manifest_text_blob( + selected.get("risk"), + selected.get("risks"), + selected.get("quality_notes"), + selected.get("source_text"), + selected.get("watermark"), + selected.get("subtitles"), + selected.get("visible_text"), + item.get("quality_caveat"), + item.get("source_text"), + item.get("watermark"), + item.get("subtitles"), + item.get("visible_text"), + ) + if has_any_term(risk_text, SOURCE_TEXT_TERMS) and not has_any_term( + risk_text, SOURCE_TEXT_MITIGATION_TERMS + ): + unmitigated_source_text_ranges.append({"range": ridx, "start": start, "end": end}) + audio_meta = source_audio_metadata(item, selected) + audio_role = audio_meta["audio_role"] + policy_text = manifest_text_blob(audio_meta["overlap_policy"], selected, item) + if entry.get("audio_streams") and not audio_role: + missing_audio_role_ranges.append({"range": ridx, "start": start, "end": end}) + elif audio_role and audio_role not in AUDIO_ROLE_VALUES: + signals.append( + signal( + "source_audio_role", + "Selected range uses a non-standard audio role.", + evidence={"id": source_id, "range": ridx, "audio_role": audio_role}, + investigate=( + "Normalize to silent_or_mute, ambience_keep, dialogue_keep, music_only, " + "mixed_music_speech, needs_separation, or unknown." + ), + ) + ) + if audio_role in {"dialogue_keep", "music_only", "mixed_music_speech", "needs_separation"} and not audio_meta[ + "overlap_policy" + ]: + risky_audio_policy_ranges.append( + {"range": ridx, "start": start, "end": end, "audio_role": audio_role} + ) + if audio_role in {"mixed_music_speech", "needs_separation"} and not ( + audio_meta["separation_tool"] or has_any_term(policy_text, SOURCE_AUDIO_RESOLUTION_TERMS) + ): + signals.append( + signal( + "source_audio_overlap", + "Mixed speech/music source audio lacks a separation or replacement plan.", + level="strong_signal", + evidence={"id": source_id, "range": ridx, "audio_role": audio_role}, + investigate=( + "Use source separation, keep the source speech as foreground, mute the source, " + "or choose a cleaner range before adding narration or a new music bed." + ), + ) + ) + if has_any_term(policy_text, SOURCE_AUDIO_CONFLICT_TERMS) and not has_any_term( + policy_text, SOURCE_AUDIO_RESOLUTION_TERMS + ): + signals.append( + signal( + "source_audio_overlap", + "Source-audio text suggests overlap with music or narration without a clear mitigation.", + evidence={"id": source_id, "range": ridx}, + investigate=( + "Decide whether source audio is muted, ducked, isolated, source-only, or truly ambience-only." + ), + ) + ) + if missing_risk_note_ranges: + signals.append( + signal( + "source_range_review", + "Some selected ranges lack source-text/watermark/quality risk notes.", + evidence={ + "id": source_id, + "count": len(missing_risk_note_ranges), + "sample": missing_risk_note_ranges[:8], + }, + investigate=( + "Review selected-range contact sheets for hardcoded subtitles, broadcast graphics, " + "watermarks, platform UI, and caption-safe-zone risks." + ), + ) + ) + if unmitigated_source_text_ranges: + signals.append( + signal( + "source_text_occupancy", + "Some selected ranges mention source subtitles, broadcast graphics, watermark, or platform UI without mitigation notes.", + evidence={ + "id": source_id, + "count": len(unmitigated_source_text_ranges), + "sample": unmitigated_source_text_ranges[:8], + }, + investigate="Record safe zone, crop/mask/blur, replacement, or reason the source text is acceptable.", + ) + ) + if missing_audio_role_ranges: + signals.append( + signal( + "source_audio_role", + "Some selected ranges with audio streams lack source-audio role metadata.", + evidence={ + "id": source_id, + "count": len(missing_audio_role_ranges), + "sample": missing_audio_role_ranges[:8], + }, + investigate=( + "Classify each selected range as silent_or_mute, ambience_keep, dialogue_keep, " + "music_only, mixed_music_speech, or needs_separation before mixing." + ), + ) + ) + if risky_audio_policy_ranges: + signals.append( + signal( + "source_audio_overlap", + "Some selected ranges have foreground speech/music roles but no overlap policy.", + evidence={ + "id": source_id, + "count": len(risky_audio_policy_ranges), + "sample": risky_audio_policy_ranges[:8], + }, + investigate=( + "Record whether new narration/music is absent, ducked, source-only, muted, or separated in those windows." + ), + ) + ) + entries.append(entry) + + return { + "doctor": "sources", + "artifact": str(args.manifest), + "summary": {"entries": entries, "root": str(args.root)}, + "signals": signals, + "agent_instruction": ( + "Use this as a provenance and source-selection investigation, not as a license verdict." + ), + } + + +def calc_time(duration: float, expr: str) -> float: + if expr == "first": + return 0.5 if duration > 1 else 0 + if expr == "middle": + return duration / 2 + if expr == "last": + return duration - 0.25 if duration > 0.5 else 0 + if expr == "tail_start": + return max(duration - 10, 0) + if expr == "dense_start": + return duration * 0.8 + if expr == "tail_len": + return 10 if duration > 10 else duration + if expr == "dense_len": + return duration * 0.2 if duration > 5 else duration + return 0 + + +def frames_doctor(args: argparse.Namespace) -> dict[str, Any]: + ensure_tool("ffmpeg") + ensure_tool("ffprobe") + args.out_dir.mkdir(parents=True, exist_ok=True) + media = compact_media(args.media) + duration = as_float(media.get("duration")) if media else None + if duration is None or duration <= 0: + raise RuntimeError("could not read positive media duration") + + times = { + "first": calc_time(duration, "first"), + "middle": calc_time(duration, "middle"), + "last": calc_time(duration, "last"), + "tail_start": calc_time(duration, "tail_start"), + "tail_len": calc_time(duration, "tail_len"), + "dense_start": calc_time(duration, "dense_start"), + "dense_len": calc_time(duration, "dense_len"), + } + + outputs: dict[str, str] = {} + for name in ("first", "middle", "last"): + out = args.out_dir / f"{name}.jpg" + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-ss", + f"{times[name]:.3f}", + "-i", + str(args.media), + "-frames:v", + "1", + "-q:v", + "2", + str(out), + ] + ) + outputs[name] = str(out) + + contact = args.out_dir / f"contact_every_{args.interval}s.jpg" + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-i", + str(args.media), + "-vf", + f"fps=1/{args.interval},scale=320:-1:flags=lanczos,tile=5x5", + "-frames:v", + "1", + str(contact), + ] + ) + outputs["contact"] = str(contact) + + tail = args.out_dir / "tail_10s_contact.jpg" + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-ss", + f"{times['tail_start']:.3f}", + "-i", + str(args.media), + "-t", + f"{times['tail_len']:.3f}", + "-vf", + "fps=1,scale=320:-1:flags=lanczos,tile=5x5", + "-frames:v", + "1", + str(tail), + ] + ) + outputs["tail_10s"] = str(tail) + + final_act_fraction = max(0.05, min(0.5, float(args.final_act_fraction))) + times["dense_start"] = duration * (1.0 - final_act_fraction) + times["dense_len"] = duration * final_act_fraction + + dense = args.out_dir / "final_act_dense.jpg" + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-ss", + f"{times['dense_start']:.3f}", + "-i", + str(args.media), + "-t", + f"{times['dense_len']:.3f}", + "-vf", + "fps=2,scale=320:-1:flags=lanczos,tile=5x5", + "-frames:v", + "1", + str(dense), + ] + ) + outputs["final_act_dense"] = str(dense) + + diversity: dict[str, Any] = {} + signals = [ + signal( + "manual_review", + "Review frames were generated from the exact media path.", + level="info", + investigate=( + "Inspect these images for topic clarity, subtitle collisions, unreadable text, " + "black/frozen frames, wrong crops, and final-act payoff." + ), + ), + signal( + "final_act_payoff", + "A dense final-act contact sheet was generated for ending/payoff review.", + level="info", + evidence={ + "image": str(dense), + "start": round(times["dense_start"], 3), + "fraction": final_act_fraction, + }, + investigate=( + "Decide whether the ending is a climax, payoff, useful recap, hook, or deliberate unresolved ending " + "appropriate to the genre; motion alone is not enough." + ), + ), + ] + try: + overview_hashes = sampled_frame_hashes( + args.media, + start=0.0, + length=duration, + fps=f"1/{args.interval}", + ) + final_hashes = sampled_frame_hashes( + args.media, + start=times["dense_start"], + length=times["dense_len"], + fps="2", + ) + overview_diversity = visual_diversity_summary(overview_hashes) + final_diversity = visual_diversity_summary(final_hashes) + diversity = {"overview": overview_diversity, "final_act": final_diversity} + if ( + overview_diversity["samples"] >= 8 + and overview_diversity["unique_ratio"] is not None + and overview_diversity["unique_ratio"] < 0.45 + ): + signals.append( + signal( + "visual_diversity", + "Sampled frames have low perceptual diversity.", + evidence=overview_diversity, + investigate="Inspect the contact sheet for repeated screens/cards or accidental static structure.", + ) + ) + if ( + final_diversity["samples"] >= 8 + and final_diversity["unique_ratio"] is not None + and final_diversity["unique_ratio"] < 0.45 + ): + signals.append( + signal( + "final_act_visual_diversity", + "The final-act sample has low perceptual diversity.", + evidence=final_diversity, + investigate="Check whether the ending is an intentional hold/payoff or accidental repetition.", + ) + ) + except Exception as exc: # noqa: BLE001 - diagnostic signal only + signals.append( + signal( + "visual_diversity", + "Perceptual diversity sampling did not complete.", + evidence={"error": str(exc)}, + investigate="Use the generated contact sheets for manual repetition review.", + ) + ) + + return { + "doctor": "frames", + "artifact": str(args.media), + "summary": { + "media": media, + "out_dir": str(args.out_dir), + "times": {key: round(value, 3) for key, value in times.items()}, + "outputs": outputs, + "visual_diversity": diversity, + }, + "signals": signals, + "agent_instruction": "The frames are evidence for visual review; inspect them directly.", + } + + +def tail_doctor(args: argparse.Namespace) -> dict[str, Any]: + ensure_tool("ffmpeg") + ensure_tool("ffprobe") + args.out_dir.mkdir(parents=True, exist_ok=True) + media = compact_media(args.media) + duration = as_float(media.get("duration")) if media else None + if duration is None or duration <= 0: + raise RuntimeError("could not read positive media duration") + start = max(duration - args.tail_seconds, 0) + length = duration - start + + black_log = args.out_dir / "blackdetect.log" + freeze_log = args.out_dir / "freezedetect.log" + framemd5 = args.out_dir / "tail_framemd5.txt" + contact = args.out_dir / "tail_contact.jpg" + + proc = run( + [ + "ffmpeg", + "-hide_banner", + "-ss", + f"{start:.3f}", + "-i", + str(args.media), + "-t", + f"{length:.3f}", + "-vf", + "blackdetect=d=0.15:pic_th=0.98", + "-an", + "-f", + "null", + "-", + ] + ) + black_log.write_text(proc.stdout + proc.stderr, encoding="utf-8") + + proc = run( + [ + "ffmpeg", + "-hide_banner", + "-ss", + f"{start:.3f}", + "-i", + str(args.media), + "-t", + f"{length:.3f}", + "-vf", + "freezedetect=n=0.003:d=1.0", + "-an", + "-f", + "null", + "-", + ] + ) + freeze_log.write_text(proc.stdout + proc.stderr, encoding="utf-8") + + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-ss", + f"{start:.3f}", + "-i", + str(args.media), + "-t", + f"{length:.3f}", + "-an", + "-vf", + "fps=1", + "-f", + "framemd5", + str(framemd5), + ] + ) + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-ss", + f"{start:.3f}", + "-i", + str(args.media), + "-t", + f"{length:.3f}", + "-vf", + "fps=1,scale=320:-1:flags=lanczos,tile=5x5", + "-frames:v", + "1", + str(contact), + ] + ) + + hashes = [ + line.split(",")[-1].strip() + for line in framemd5.read_text(encoding="utf-8").splitlines() + if line and not line.startswith("#") + ] + signals: list[dict[str, Any]] = [] + black_text = black_log.read_text(encoding="utf-8", errors="replace") + freeze_text = freeze_log.read_text(encoding="utf-8", errors="replace") + if "black_start" in black_text: + signals.append( + signal( + "tail_visual", + "Blackdetect reported black frames in the inspected tail.", + evidence={"log": str(black_log)}, + investigate="Open the tail contact sheet and decide whether the black segment is intentional.", + ) + ) + if "freeze_start" in freeze_text: + signals.append( + signal( + "tail_visual", + "Freezedetect reported a frozen segment in the inspected tail.", + evidence={"log": str(freeze_log)}, + investigate="Check whether the final shot is intentionally held or an accidental freeze.", + ) + ) + if len(hashes) >= 3 and len(set(hashes)) <= 1: + signals.append( + signal( + "tail_visual", + "Sampled tail frame hashes are static.", + evidence={"sampled_frames": len(hashes), "unique_hashes": len(set(hashes))}, + investigate="Inspect whether the video has a frozen or audio-only ending.", + ) + ) + + return { + "doctor": "tail", + "artifact": str(args.media), + "summary": { + "media": media, + "tail_start": round(start, 3), + "tail_length": round(length, 3), + "sampled_frames": len(hashes), + "unique_frame_hashes": len(set(hashes)), + "logs": { + "blackdetect": str(black_log), + "freezedetect": str(freeze_log), + "framemd5": str(framemd5), + "contact": str(contact), + }, + }, + "signals": signals, + "agent_instruction": "Use tail signals to guide visual/audio inspection; do not use them as verdicts.", + } + + +def lint_doctor(args: argparse.Namespace) -> dict[str, Any]: + signals: list[dict[str, Any]] = [] + logs: list[dict[str, Any]] = [] + total_warnings = 0 + total_errors = 0 + warning_lines: list[str] = [] + error_lines: list[str] = [] + + for path in args.logs: + text = path.read_text(encoding="utf-8", errors="replace") + lower = text.lower() + lines = text.splitlines() + warning_sample = [ + line.strip() + for line in lines + if ("⚠" in line or "warning" in line.lower()) + and not ( + "⚠" not in line + and re.search(r"\b\d+\s+warning(?:s|\(s\))?\b", line.lower()) + ) + ] + error_sample = [ + line.strip() + for line in lines + if ("✖" in line or "error" in line.lower()) + and not ( + "✖" not in line + and re.search(r"\b\d+\s+error(?:s|\(s\))?\b", line.lower()) + ) + ] + warnings = len(warning_sample) + errors = len(error_sample) + total_warnings += warnings + total_errors += errors + warning_lines.extend(warning_sample) + error_lines.extend(error_sample) + logs.append({"path": str(path), "warnings": warnings, "errors": errors}) + if "continuing render despite lint issues" in lower: + signals.append( + signal( + "lint_disposition", + "Render continued despite lint issues.", + level="strong_signal", + evidence={"log": str(path)}, + investigate="Record why continuing was acceptable, or revise/split/fix the composition and rerun lint.", + ) + ) + for term in ("composition_file_too_large", "timeline_track_too_dense"): + if term in lower: + signals.append( + signal( + "lint_structure", + f"Lint reported {term}.", + evidence={"log": str(path), "term": term}, + investigate="Split dense compositions or document why the warning is acceptable for this render.", + ) + ) + + disposition = None + if args.disposition: + if args.disposition.exists(): + disposition_text = args.disposition.read_text(encoding="utf-8", errors="replace").lower() + disposition = {"path": str(args.disposition), "exists": True} + if not any(term in disposition_text for term in ("fixed", "revised", "accepted", "intentional", "blocked")): + signals.append( + signal( + "lint_disposition", + "Lint disposition file exists but does not clearly say fixed, revised, accepted, intentional, or blocked.", + evidence=disposition, + investigate="Write a concise warning-by-warning disposition before promotion.", + ) + ) + else: + disposition = {"path": str(args.disposition), "exists": False} + if (total_warnings or total_errors) and (not disposition or not disposition.get("exists")): + signals.append( + signal( + "lint_disposition", + "Lint warnings/errors exist without a disposition file.", + level="strong_signal", + evidence={"warnings": total_warnings, "errors": total_errors}, + investigate="Fix the lint issues or save a warning disposition before promoting the final.", + ) + ) + if total_errors: + signals.append( + signal( + "lint_errors", + "Lint reported errors.", + level="strong_signal", + evidence={"errors": total_errors, "sample": error_lines[:8]}, + investigate="Treat lint errors as blockers unless the renderer/linter bug is documented.", + ) + ) + elif total_warnings: + signals.append( + signal( + "lint_warnings", + "Lint reported warnings.", + evidence={"warnings": total_warnings, "sample": warning_lines[:8]}, + investigate="Warnings are not automatic failures, but they must be inspected and dispositioned.", + ) + ) + + return { + "doctor": "lint", + "artifact": ", ".join(str(path) for path in args.logs), + "summary": { + "logs": logs, + "total_warnings": total_warnings, + "total_errors": total_errors, + "disposition": disposition, + }, + "signals": signals, + "agent_instruction": ( + "Use lint signals to decide whether to fix, split, accept with reason, or block; " + "do not silently continue from lint warnings." + ), + } + + +def parse_volumedetect(text: str) -> dict[str, float | None]: + result: dict[str, float | None] = {"mean_volume_db": None, "max_volume_db": None} + mean = re.search(r"mean_volume:\s*(-?\d+(?:\.\d+)?)\s*dB", text) + peak = re.search(r"max_volume:\s*(-?\d+(?:\.\d+)?)\s*dB", text) + if mean: + result["mean_volume_db"] = float(mean.group(1)) + if peak: + result["max_volume_db"] = float(peak.group(1)) + return result + + +def parse_ebur128(text: str) -> dict[str, float | None]: + result: dict[str, float | None] = { + "integrated_lufs": None, + "lra_lu": None, + "true_peak_dbfs": None, + } + summaries = [match.start() for match in re.finditer(r"Integrated loudness:", text)] + if summaries: + tail = text[summaries[-1] :] + else: + tail = text + integrated = re.search(r"I:\s*(-?\d+(?:\.\d+)?)\s*LUFS", tail) + lra = re.search(r"LRA:\s*(-?\d+(?:\.\d+)?)\s*LU", tail) + true_peak = re.search(r"Peak:\s*(-?\d+(?:\.\d+)?)\s*dBFS", tail) + if integrated: + result["integrated_lufs"] = float(integrated.group(1)) + if lra: + result["lra_lu"] = float(lra.group(1)) + if true_peak: + result["true_peak_dbfs"] = float(true_peak.group(1)) + return result + + +PROCEDURAL_AUDIO_PATTERNS = [ + (r"\bnp\.sin\b|\bnumpy\b", "numpy_synthesis"), + (r"\brng\.normal\b|\bstandard_normal\b|\brandom\.normal\b", "random_noise"), + (r"\bAudioArrayClip\b", "audio_array_clip"), + (r"\bwave\.open\b", "wave_writer"), + (r"\banoisesrc\b|\baevalsrc\b", "ffmpeg_synthetic_source"), + (r"\bprocedural (?:score|music|audio|bed|stem)", "procedural_claim"), + (r"\briser\b|\bsub drop\b|\bimpact hit\b|\bui tick\b", "sfx_claim"), +] + + +def scan_audio_artifacts(paths: list[Path], *, max_bytes: int = 512_000) -> dict[str, Any]: + files: list[dict[str, Any]] = [] + totals: dict[str, int] = {} + suffixes = {".py", ".md", ".json", ".html", ".txt", ".log", ".mlt"} + expanded: list[Path] = [] + for path in paths: + if path.is_dir(): + for child in path.rglob("*"): + if child.is_file() and child.suffix.lower() in suffixes: + expanded.append(child) + elif path.is_file(): + expanded.append(path) + for path in sorted(set(expanded)): + try: + if path.stat().st_size > max_bytes: + continue + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + matches: dict[str, int] = {} + for pattern, name in PROCEDURAL_AUDIO_PATTERNS: + count = len(re.findall(pattern, text, flags=re.IGNORECASE)) + if count: + matches[name] = count + totals[name] = totals.get(name, 0) + count + if matches: + files.append({"path": str(path), "matches": matches}) + return {"files": files[:80], "total_files_with_matches": len(files), "totals": totals} + + +def ffmpeg_filter_report(media: Path, audio_filter: str) -> subprocess.CompletedProcess[str]: + return run( + [ + "ffmpeg", + "-hide_banner", + "-i", + str(media), + "-vn", + "-af", + audio_filter, + "-f", + "null", + "-", + ] + ) + + +def export_audio_snippet(media: Path, out: Path, start: float, duration: float) -> None: + run_checked( + [ + "ffmpeg", + "-hide_banner", + "-loglevel", + "error", + "-y", + "-ss", + f"{max(0.0, start):.3f}", + "-i", + str(media), + "-t", + f"{max(0.1, duration):.3f}", + "-vn", + "-ac", + "2", + "-ar", + "48000", + "-c:a", + "pcm_s16le", + str(out), + ] + ) + + +def manifest_audio_overlap_review( + manifests: list[Path], + *, + root: Path, +) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[tuple[float, float]]]: + signals: list[dict[str, Any]] = [] + entries: list[dict[str, Any]] = [] + snippet_ranges: list[tuple[float, float]] = [] + + for manifest in manifests: + try: + items = load_manifest(manifest) + except Exception as exc: # noqa: BLE001 - diagnostic report + signals.append( + signal( + "source_audio_manifest", + "Audio overlap manifest could not be read.", + evidence={"manifest": str(manifest), "error": str(exc)}, + investigate="Fix JSON shape or pass the intended sources/music manifest.", + ) + ) + continue + + for idx, item in enumerate(items): + source_id = str(item.get("id") or item.get("name") or f"source_{idx}") + role_value = str(item.get("role") or item.get("type") or "").lower() + role_text = manifest_text_blob(item.get("role"), item.get("type")) + local_value = get_source_path(item) + compact: dict[str, Any] | None = None + if local_value: + path = resolve_path(local_value, root, manifest.parent) + if path.exists(): + try: + compact = compact_media(path) + except Exception: # noqa: BLE001 - optional evidence only + compact = None + + ranges = get_ranges(item) + if not ranges: + ranges = [{}] + for ridx, selected in enumerate(ranges): + audio_meta = source_audio_metadata(item, selected) + text_blob = manifest_text_blob(item, selected) + timeline = selected_timeline_range(selected) + if timeline is None: + parsed = parse_time_ranges_from_text(text_blob) + timeline = parsed[0] if parsed else None + + is_source_audio = ( + role_value.startswith("source") + or "source_audio" in role_text + or "ambience" in role_text + or "dialogue" in role_text + or bool(audio_meta["audio_role"]) + ) + has_overlap_smell = has_any_term(text_blob, SOURCE_AUDIO_CONFLICT_TERMS) + if is_source_audio or has_overlap_smell: + entries.append( + { + "manifest": str(manifest), + "id": source_id, + "range": ridx if selected else None, + "role": item.get("role"), + "audio_role": audio_meta["audio_role"], + "overlap_policy": audio_meta["overlap_policy"], + "timeline_range": list(timeline) if timeline else None, + "audio_streams": compact.get("audio_streams") if compact else None, + } + ) + if timeline and (is_source_audio or has_overlap_smell): + snippet_ranges.append(timeline) + if compact and compact.get("audio_streams") and is_source_audio and not audio_meta["audio_role"]: + signals.append( + signal( + "source_audio_role", + "Manifest source-audio entry lacks audio_role metadata.", + evidence={"manifest": str(manifest), "id": source_id}, + investigate=( + "Classify it before mixing: silent_or_mute, ambience_keep, dialogue_keep, " + "music_only, mixed_music_speech, or needs_separation." + ), + ) + ) + + if audio_meta["audio_role"] in {"music_only", "dialogue_keep", "mixed_music_speech", "needs_separation"} and not audio_meta[ + "overlap_policy" + ]: + signals.append( + signal( + "source_audio_overlap", + "Source audio has a foreground speech/music role but no overlap policy.", + evidence={ + "manifest": str(manifest), + "id": source_id, + "audio_role": audio_meta["audio_role"], + "timeline_range": list(timeline) if timeline else None, + }, + investigate=( + "Decide whether source audio is source-only, muted, ducked, separated, " + "or kept as foreground while new narration/music exits." + ), + ) + ) + + if has_overlap_smell and not has_any_term( + text_blob, + SOURCE_AUDIO_RESOLUTION_TERMS, + ): + signals.append( + signal( + "source_audio_overlap", + "Manifest text suggests source audio is mixed under music or narration without a mitigation.", + evidence={ + "manifest": str(manifest), + "id": source_id, + "timeline_range": list(timeline) if timeline else None, + }, + investigate=( + "Verify this is true ambience with no music/speech, or revise to mute, duck, separate, " + "or keep source audio as the only foreground audio." + ), + ) + ) + + return entries, signals, snippet_ranges + + +def sound_design_overlap_review(paths: list[Path]) -> tuple[list[dict[str, Any]], list[tuple[float, float]]]: + signals: list[dict[str, Any]] = [] + snippet_ranges: list[tuple[float, float]] = [] + for path in paths: + if not path.exists(): + signals.append( + signal( + "sound_design", + "Referenced sound design file does not exist.", + evidence={"path": str(path)}, + investigate="Pass the current sound_design.md for audio-overlap review.", + ) + ) + continue + text = path.read_text(encoding="utf-8", errors="replace") + lower = text.lower() + for line in lower.splitlines(): + if has_any_term(line, SOURCE_AUDIO_CONFLICT_TERMS) or ( + "source audio" in line and ("music" in line or "narration" in line or "dialogue" in line) + ): + snippet_ranges.extend(parse_time_ranges_from_text(line)) + if "source audio policy" not in lower and ("source audio" in lower or "source-audio" in lower): + signals.append( + signal( + "source_audio_policy", + "Sound design mentions source audio but lacks a clear Source Audio Policy section/table.", + evidence={"path": str(path)}, + investigate=( + "Add a table with time range, source file, audio_role, keep reason, overlap policy, processing, and review snippet." + ), + ) + ) + if has_any_term(lower, SOURCE_AUDIO_CONFLICT_TERMS) and not has_any_term(lower, SOURCE_AUDIO_RESOLUTION_TERMS): + signals.append( + signal( + "source_audio_overlap", + "Sound design suggests source audio is layered under music or narration without clear mitigation.", + evidence={"path": str(path)}, + investigate=( + "Do not stack source music/speech with new music/narration. Mute, duck, isolate, or make source audio foreground." + ), + ) + ) + return signals, snippet_ranges + + +def audio_doctor(args: argparse.Namespace) -> dict[str, Any]: + ensure_tool("ffmpeg") + ensure_tool("ffprobe") + raw = run_ffprobe(args.media) + summary = media_summary(args.media, raw) + signals: list[dict[str, Any]] = [] + logs: dict[str, str] = {} + snippets: dict[str, str] = {} + audio_streams = summary.get("audio_streams") or [] + duration = as_float(summary.get("duration")) or 0.0 + if not audio_streams: + signals.append( + signal( + "audio_stream", + "No audio stream found.", + level="strong_signal", + investigate="Confirm whether the brief allowed a silent video; otherwise remux or rebuild audio.", + ) + ) + return { + "doctor": "audio", + "artifact": str(args.media), + "summary": {"media": summary, "logs": logs, "snippets": snippets}, + "signals": signals, + "agent_instruction": "No audio was available to investigate.", + } + + out_dir = args.out_dir + if out_dir: + out_dir.mkdir(parents=True, exist_ok=True) + + vol_proc = ffmpeg_filter_report(args.media, "volumedetect") + full_volume = parse_volumedetect(vol_proc.stderr + vol_proc.stdout) + if out_dir: + log = out_dir / "audio_volumedetect.log" + log.write_text(vol_proc.stdout + vol_proc.stderr, encoding="utf-8") + logs["volumedetect"] = str(log) + + high_proc = ffmpeg_filter_report(args.media, "highpass=f=6000,volumedetect") + high_volume = parse_volumedetect(high_proc.stderr + high_proc.stdout) + if out_dir: + log = out_dir / "audio_highpass_6000_volumedetect.log" + log.write_text(high_proc.stdout + high_proc.stderr, encoding="utf-8") + logs["highpass_6000_volumedetect"] = str(log) + + low_proc = ffmpeg_filter_report(args.media, "lowpass=f=140,volumedetect") + low_volume = parse_volumedetect(low_proc.stderr + low_proc.stdout) + if out_dir: + log = out_dir / "audio_lowpass_140_volumedetect.log" + log.write_text(low_proc.stdout + low_proc.stderr, encoding="utf-8") + logs["lowpass_140_volumedetect"] = str(log) + + ebu_proc = ffmpeg_filter_report(args.media, "ebur128=peak=true") + loudness = parse_ebur128(ebu_proc.stderr + ebu_proc.stdout) + if out_dir: + log = out_dir / "audio_ebur128.log" + log.write_text(ebu_proc.stdout + ebu_proc.stderr, encoding="utf-8") + logs["ebur128"] = str(log) + + high_delta = None + if full_volume["mean_volume_db"] is not None and high_volume["mean_volume_db"] is not None: + high_delta = high_volume["mean_volume_db"] - full_volume["mean_volume_db"] + low_delta = None + if full_volume["mean_volume_db"] is not None and low_volume["mean_volume_db"] is not None: + low_delta = low_volume["mean_volume_db"] - full_volume["mean_volume_db"] + + if full_volume["max_volume_db"] is not None and full_volume["max_volume_db"] > -1.0: + signals.append( + signal( + "audio_headroom", + "Peak level is very close to clipping.", + evidence={"max_volume_db": full_volume["max_volume_db"]}, + investigate="Listen around loud hits and consider lowering/limiting the mix with more headroom.", + ) + ) + if loudness["integrated_lufs"] is not None and not (-28 <= loudness["integrated_lufs"] <= -12): + signals.append( + signal( + "audio_loudness", + "Integrated loudness is outside a broad review range for edited video.", + evidence={"integrated_lufs": loudness["integrated_lufs"]}, + investigate="Check whether the target platform/genre justifies this level.", + ) + ) + if loudness["lra_lu"] is not None and loudness["lra_lu"] < 3.0 and duration >= 45: + signals.append( + signal( + "audio_dynamics", + "Loudness range is very low for a medium/long polished edit.", + evidence={"lra_lu": loudness["lra_lu"], "duration": duration}, + investigate="Listen for a flat bed; revise section dynamics if the genre needs a stronger arc.", + ) + ) + if high_delta is not None and high_delta > -14: + signals.append( + signal( + "audio_high_band", + "High-frequency band is relatively strong compared with the full mix.", + evidence={"highpass_6000_mean_minus_full_mean_db": round(high_delta, 2)}, + investigate="Listen for hiss, sizzle, harsh UI ticks, codec artifacts, or noisy risers.", + ) + ) + if low_delta is not None and low_delta > -5 and duration >= 45: + signals.append( + signal( + "audio_low_band", + "Low-frequency band dominates the mix.", + evidence={"lowpass_140_mean_minus_full_mean_db": round(low_delta, 2)}, + investigate="Listen for sub-pulse monotony or repeated boom hits replacing real music.", + ) + ) + + scan_paths = args.scan_path or [] + artifact_scan = scan_audio_artifacts(scan_paths) if scan_paths else {"files": [], "total_files_with_matches": 0, "totals": {}} + totals = artifact_scan.get("totals", {}) + if totals: + level = "strong_signal" if duration >= 45 and any(key in totals for key in ("random_noise", "procedural_claim")) else "review" + signals.append( + signal( + "procedural_audio_artifacts", + "Text artifacts suggest procedural or synthetic audio was used.", + level=level, + evidence={"totals": totals, "matched_files": artifact_scan.get("total_files_with_matches")}, + investigate=( + "Confirm whether generated stems are only short SFX/accent layers or whether they became the main music bed. " + "For polished long videos, prefer AI-generated music or downloaded relevant/authorized music as the main bed." + ), + ) + ) + + source_audio_review_entries: list[dict[str, Any]] = [] + source_audio_snippet_ranges: list[tuple[float, float]] = [] + manifest_paths = list(args.sources_manifest or []) + list(args.music_manifest or []) + if manifest_paths: + root = args.root or Path.cwd() + entries, manifest_signals, ranges = manifest_audio_overlap_review(manifest_paths, root=root) + source_audio_review_entries.extend(entries) + source_audio_snippet_ranges.extend(ranges) + signals.extend(manifest_signals) + if args.sound_design: + sound_signals, ranges = sound_design_overlap_review(list(args.sound_design)) + source_audio_snippet_ranges.extend(ranges) + signals.extend(sound_signals) + + if out_dir and source_audio_snippet_ranges and duration > 0: + seen: set[tuple[int, int]] = set() + for index, (start, end) in enumerate(source_audio_snippet_ranges, 1): + start = max(0.0, min(start, max(duration - 0.1, 0.0))) + length = max(0.1, min(end - start, args.snippet_seconds, duration - start)) + key = (round(start * 10), round(length * 10)) + if key in seen: + continue + seen.add(key) + out = out_dir / f"source_overlap_{index:02d}_{start:.1f}s.wav" + export_audio_snippet(args.media, out, start, length) + snippets[f"source_overlap_{index:02d}_{start:.1f}s"] = str(out) + if seen: + signals.append( + signal( + "source_audio_overlap_listening", + "Audio snippets were exported around documented source-audio overlap windows.", + level="info", + evidence={ + key: value + for key, value in snippets.items() + if key.startswith("source_overlap_") + }, + investigate=( + "Listen for stacked music beds, source commentary under new narration, or ambience that is actually speech/music." + ), + ) + ) + + if out_dir and duration > 0: + points = { + "opening": 0.0, + "middle": max(0.0, duration / 2 - args.snippet_seconds / 2), + "ending": max(0.0, duration - args.snippet_seconds), + } + if args.snippet_at: + for index, start in enumerate(args.snippet_at, 1): + points[f"custom_{index:02d}_{start:.1f}s"] = max(0.0, start) + for name, start in points.items(): + out = out_dir / f"{name}.wav" + export_audio_snippet(args.media, out, start, min(args.snippet_seconds, max(duration - start, 0.1))) + snippets[name] = str(out) + signals.append( + signal( + "listening_pass", + "Audio snippets were exported for manual listening.", + level="info", + evidence=snippets, + investigate="Listen to the snippets before promotion; technical metrics cannot judge taste or harshness.", + ) + ) + + return { + "doctor": "audio", + "artifact": str(args.media), + "summary": { + "media": summary, + "full_volume": full_volume, + "highpass_6000_volume": high_volume, + "lowpass_140_volume": low_volume, + "band_deltas_db": { + "highpass_6000_mean_minus_full_mean": high_delta, + "lowpass_140_mean_minus_full_mean": low_delta, + }, + "loudness": loudness, + "procedural_artifact_scan": artifact_scan, + "source_audio_overlap_review": source_audio_review_entries, + "logs": logs, + "snippets": snippets, + }, + "signals": signals, + "agent_instruction": ( + "Use audio signals to guide listening and mix review; do not treat this report as a pass/fail verdict. " + "If procedural audio is the main bed in a polished long video, consider AI-generated music or downloaded relevant/authorized music instead. " + "If source audio overlaps new music or narration, confirm it is intentional ambience or revise with mute/duck/source-only/separation." + ), + } + + +def probe_doctor(args: argparse.Namespace) -> dict[str, Any]: + raw = run_ffprobe(args.media) + summary = media_summary(args.media, raw) + if args.raw: + summary["raw_ffprobe"] = raw + return { + "doctor": "probe", + "artifact": str(args.media), + "summary": summary, + "signals": [], + "agent_instruction": "Compare these media facts to the brief and render reports.", + } + + +def print_human(report: dict[str, Any]) -> None: + print(f"doctor: {report['doctor']}") + print(f"artifact: {report['artifact']}") + print("summary:") + print(json.dumps(report["summary"], indent=2, sort_keys=True)) + signals = report.get("signals") or [] + print(f"signals: {len(signals)}") + for item in signals: + print(f"- [{item.get('level')}] {item.get('topic')}: {item.get('message')}") + if item.get("evidence"): + print(f" evidence: {json.dumps(item['evidence'], sort_keys=True)}") + if item.get("investigate"): + print(f" investigate: {item['investigate']}") + print(f"agent_instruction: {report['agent_instruction']}") + + +def add_common(parser: argparse.ArgumentParser) -> None: + parser.add_argument("--json", action="store_true", help="print JSON report") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=__doc__) + sub = parser.add_subparsers(dest="command", required=True) + + probe = sub.add_parser("probe", help="summarize a media file with ffprobe") + probe.add_argument("media", type=Path) + probe.add_argument("--raw", action="store_true", help="include raw ffprobe JSON") + add_common(probe) + + captions = sub.add_parser("captions", help="investigate SRT/ASS caption timing and metadata") + captions.add_argument("captions", type=Path) + captions.add_argument("--duration", type=float, help="media duration in seconds") + captions.add_argument("--media", type=Path, help="final rendered media file") + captions.add_argument("--narration", type=Path, help="narration audio file") + captions.add_argument( + "--output-language", + help="expected authored viewer-facing language, e.g. English or Chinese", + ) + captions.add_argument( + "--voice-style", + action="append", + default=[], + help="ASS style name used for voice subtitles; repeatable", + ) + captions.add_argument( + "--flag-term", + action="append", + default=[], + help="viewer-facing term to flag for review; repeatable", + ) + captions.add_argument( + "--voice-after-narration-signal", + type=float, + default=2.0, + help="seconds after narration before the doctor emits an investigation signal", + ) + captions.add_argument( + "--post-caption-tail-signal", + type=float, + default=10.0, + help="seconds of media after the last voice caption before emitting an authored-coverage signal", + ) + captions.add_argument( + "--voice-media-coverage-signal", + type=float, + default=0.75, + help="voice-caption coverage ratio below which the doctor emits an authored-coverage signal", + ) + captions.add_argument( + "--narration-media-tail-signal", + type=float, + default=12.0, + help="seconds of media after narration duration before emitting an authored-coverage signal", + ) + captions.add_argument( + "--expect-authored-coverage", + action="store_true", + help=( + "emit media-coverage signals when the brief expects narration/intertitles " + "to carry the story; omit for spot captions or intentionally partial captions" + ), + ) + captions.add_argument("--review-dir", type=Path, help="directory of final-path review frames") + add_common(captions) + + sources = sub.add_parser("sources", help="investigate a sources/music_sources manifest") + sources.add_argument("manifest", type=Path) + sources.add_argument("--root", type=Path, default=Path.cwd()) + add_common(sources) + + frames = sub.add_parser("frames", help="generate review frames/contact sheets") + frames.add_argument("media", type=Path) + frames.add_argument("out_dir", type=Path) + frames.add_argument("--interval", type=int, default=2) + frames.add_argument( + "--final-act-fraction", + type=float, + default=0.2, + help="fraction of the ending to sample for dense final-act review; default 0.2", + ) + add_common(frames) + + tail = sub.add_parser("tail", help="investigate the final video tail") + tail.add_argument("media", type=Path) + tail.add_argument("out_dir", type=Path) + tail.add_argument("--tail-seconds", type=float, default=10.0) + add_common(tail) + + audio = sub.add_parser("audio", help="investigate final audio quality signals") + audio.add_argument("media", type=Path) + audio.add_argument("out_dir", type=Path, nargs="?", help="optional directory for logs and listening snippets") + audio.add_argument( + "--scan-path", + type=Path, + action="append", + default=[], + help="script/manifest/project path to scan for procedural-audio evidence; repeatable", + ) + audio.add_argument( + "--sources-manifest", + type=Path, + action="append", + default=[], + help="sources.json to inspect for source-audio roles and overlap policies; repeatable", + ) + audio.add_argument( + "--music-manifest", + type=Path, + action="append", + default=[], + help="music_sources.json to inspect for source-audio/new-music overlap; repeatable", + ) + audio.add_argument( + "--sound-design", + type=Path, + action="append", + default=[], + help="sound_design.md to inspect for source-audio policy and overlap language; repeatable", + ) + audio.add_argument("--root", type=Path, default=Path.cwd(), help="root for resolving manifest paths") + audio.add_argument("--snippet-seconds", type=float, default=5.0) + audio.add_argument( + "--snippet-at", + type=float, + action="append", + default=[], + help="additional snippet start time in seconds; repeatable", + ) + add_common(audio) + + lint = sub.add_parser("lint", help="investigate renderer/linter logs and warning disposition") + lint.add_argument("logs", type=Path, nargs="+") + lint.add_argument("--disposition", type=Path, help="file documenting warning/error disposition") + add_common(lint) + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + for attr in ("media", "captions", "manifest"): + path = getattr(args, attr, None) + if path is not None and not path.exists(): + print(f"error: file not found: {path}", file=sys.stderr) + return 2 + for path in getattr(args, "logs", []) or []: + if not path.exists(): + print(f"error: file not found: {path}", file=sys.stderr) + return 2 + if getattr(args, "voice_style", None) == []: + args.voice_style = ["Caption", "Narration", "Subtitle"] + if getattr(args, "flag_term", None) == []: + args.flag_term = DEFAULT_FLAG_TERMS.copy() + + try: + if args.command == "probe": + report = probe_doctor(args) + elif args.command == "captions": + report = caption_doctor(args) + elif args.command == "sources": + report = sources_doctor(args) + elif args.command == "frames": + report = frames_doctor(args) + elif args.command == "tail": + report = tail_doctor(args) + elif args.command == "audio": + report = audio_doctor(args) + elif args.command == "lint": + report = lint_doctor(args) + else: + raise RuntimeError(f"unknown command: {args.command}") + except Exception as exc: # noqa: BLE001 - command-line diagnostic + print(f"error: {exc}", file=sys.stderr) + return 1 + + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cli-hub-meta-skill/SKILL.md b/cli-hub-meta-skill/SKILL.md index 0aa9fde61..5745c8a75 100644 --- a/cli-hub-meta-skill/SKILL.md +++ b/cli-hub-meta-skill/SKILL.md @@ -29,6 +29,34 @@ cli-hub install gimp cli-hub info gimp ``` +## Workflow Matrices + +A single CLI is one tool. A **matrix** is a whole workflow packaged as +capabilities × providers — e.g. `video-creation` maps intents like +`text.transcribe` or `visual.generate` to harness CLIs, public CLIs, Python +libraries, native binaries, and cloud APIs. Reach for a matrix when a task spans +several tools (produce a video, design an image, build a game). + +Standard agent sequence — **preflight before you install**: + +```bash +cli-hub matrix list # browse all matrices +cli-hub can "transcribe audio" # find the capability across matrices +cli-hub matrix search "video subtitle" # search; shows the matched capability +cli-hub matrix preflight video-creation --json # what's usable here? (exit 3 = gaps) +cli-hub matrix preflight video-creation -c text.transcribe --fix-hints # one capability + install hints +cli-hub matrix install video-creation --capability text.transcribe # install ONLY what the task needs +# After install, the matrix SKILL.md renders locally with provider-selection rules — read it. +``` + +Scope every install — do not bulk-install a 14-CLI matrix for a one-capability +task. Use `--capability `, `--recipe `, or `--only a,b`, and +`--dry-run` to preview the plan with zero side effects. `--json` is available on +every matrix subcommand; exit codes are `0` ok · `3` partial/gaps · `1` failure · +`2` usage error. Retry failures with `cli-hub matrix install --resume`, +and audit an install with `cli-hub matrix doctor `. + + ## Live Catalog **URL**: [`https://reeceyang.sgp1.cdn.digitaloceanspaces.com/SKILL.md`](https://reeceyang.sgp1.cdn.digitaloceanspaces.com/SKILL.md) diff --git a/cli-hub/MANIFEST.in b/cli-hub/MANIFEST.in new file mode 100644 index 000000000..db1860206 --- /dev/null +++ b/cli-hub/MANIFEST.in @@ -0,0 +1,4 @@ +# Vendored matrix skill content (generated by setup.py from ../cli-hub-matrix +# at sdist/wheel build time; see cli_hub/matrix_skill.py lookup chain). +recursive-include cli_hub/_matrix_data * +global-exclude __pycache__ *.pyc *.pyo diff --git a/cli-hub/cli_hub/cli.py b/cli-hub/cli_hub/cli.py index 651dbbb1b..a2f3fba96 100644 --- a/cli-hub/cli_hub/cli.py +++ b/cli-hub/cli_hub/cli.py @@ -10,7 +10,27 @@ from cli_hub import __version__ from cli_hub.registry import fetch_all_clis, get_cli, search_clis, list_categories -from cli_hub.installer import install_cli, uninstall_cli, get_installed, update_cli +from cli_hub.matrix import ( + all_recipes, + capability_matches, + fetch_all_matrices, + get_matrix, + get_recipe, + preflight_matrix, + provider_install_hint, + search_capabilities, + search_matrices, +) +from cli_hub.matrix_skill import get_rendered_matrix_skill_path, render_matrix_skill_file +from cli_hub.installer import ( + doctor_matrix, + get_installed, + install_cli, + install_matrix, + plan_matrix_install, + uninstall_cli, + update_cli, +) from cli_hub.analytics import ( detect_invocation_context, track_first_run, @@ -33,6 +53,14 @@ ) +# Exit-code contract for the matrix command family (F2.3 / F2.4): +# 0 success · 1 failure or not-found · 2 usage error · 3 partial / gaps +EXIT_OK = 0 +EXIT_FAIL = 1 +EXIT_USAGE = 2 +EXIT_PARTIAL = 3 + + def _invocation_command(ctx, version): """Return a compact label for the current invocation.""" argv = sys.argv[1:] @@ -51,7 +79,7 @@ def _invocation_command(ctx, version): @click.option("--version", is_flag=True, help="Show version.") @click.pass_context def main(ctx, version): - """cli-hub — Download and manage CLI-Anything harnesses and public CLIs.""" + """cli-hub — Download and manage CLI-Anything CLIs, public CLIs, and curated matrices.""" track_first_run() track_visit(command=_invocation_command(ctx, version), detection=detect_invocation_context()) if version: @@ -70,6 +98,11 @@ def _source_tag(cli): return "" +def _plural(count, singular, plural=None): + """Return a compact count label with basic pluralization.""" + return f"{count} {singular if count == 1 else (plural or singular + 's')}" + + @main.command() @click.argument("name") def install(name): @@ -230,6 +263,8 @@ def info(name): click.echo(f" Version: {cli['version']}") click.echo(f" Requires: {cli.get('requires') or 'nothing'}") click.echo(f" Entry point: {cli['entry_point']}") + if cli.get("skill_md"): + click.echo(f" Skill: {cli['skill_md']}") click.echo(f" Homepage: {cli.get('homepage', 'N/A')}") contributors = cli.get("contributors", []) if contributors: @@ -366,5 +401,568 @@ def preview_open(preview_ref, output_path, poll_ms, port): click.echo(f"Opened in {launched['browser']}: pid {launched['pid']}") else: click.echo(f"Open this file manually: {rendered}") + + +@main.command("can") +@click.argument("query") +@click.option("--json", "as_json", is_flag=True, help="Output as JSON.") +def can(query, as_json): + """Find a capability across all matrices for a task (e.g. cli-hub can "transcribe audio").""" + hits = search_capabilities(query) + + if as_json: + click.echo(json_mod.dumps({"query": query, "matched_capabilities": hits}, indent=2)) + raise SystemExit(EXIT_OK if hits else EXIT_FAIL) + + if not hits: + click.echo(f"No capability matches '{query}'.") + click.echo(" Browse matrices: cli-hub matrix list") + raise SystemExit(EXIT_FAIL) + + for hit in hits: + cap = click.style(hit["capability_id"], bold=True) + loc = click.style(f"({hit['matrix']} [{hit['matrix_id']}])", fg="cyan") + click.echo(f"\n {cap} {loc}") + click.echo(f" {hit['intent'][:88]}") + + chips = [] + for provider in hit["providers"][:5]: + if provider.get("agent_installable"): + chips.append(click.style(f"○ {provider['name']} (agent)", fg="bright_black")) + elif provider["available"]: + chips.append(click.style(f"✓ {provider['name']}", fg="green")) + else: + missing = [item for values in provider["missing"].values() for item in values] + miss = f" (missing: {', '.join(missing)})" if missing else "" + chips.append(click.style(f"✗ {provider['name']}{miss}", fg="yellow")) + if chips: + click.echo(" local: " + " · ".join(chips)) + click.echo( + f" next: cli-hub matrix preflight {hit['matrix']} -c {hit['capability_id']}" + ) + + +@main.group(name="matrix", invoke_without_command=True) +@click.pass_context +def matrix(ctx): + """Browse and install curated multi-CLI workflow matrices.""" + if ctx.invoked_subcommand is None: + click.echo(ctx.get_help()) + + +@matrix.command("list") +@click.option("--json", "as_json", is_flag=True, help="Output as JSON.") +def list_matrices(as_json): + """List all available matrices.""" + try: + matrices = fetch_all_matrices() + except Exception as e: + click.secho(f"Failed to fetch matrix registry: {e}", fg="red", err=True) + raise SystemExit(1) + + installed = get_installed() + + if as_json: + click.echo(json_mod.dumps(matrices, indent=2)) + return + + if not matrices: + click.echo("No matrices found.") + return + + click.secho("\n MATRICES", fg="blue", bold=True) + for matrix_item in sorted(matrices, key=lambda s: s["name"]): + installed_count = sum(1 for cli_name in matrix_item.get("clis", []) if cli_name in installed) + total = len(matrix_item.get("clis", [])) + marker = click.style(" ●", fg="green") if total and installed_count == total else " " + name = click.style(f"{matrix_item['name']:20s}", bold=True) + matrix_label = click.style(f"[{matrix_item.get('matrix_id', 'matrix')}]", fg="cyan") + click.echo(f" {marker} {name} {matrix_label} {matrix_item['description'][:65]}") + click.echo(f" Includes: {installed_count}/{total} CLIs installed") + + click.echo(f"\n {len(matrices)} matrices available") + + +@matrix.command("search") +@click.argument("query") +@click.option("--json", "as_json", is_flag=True, help="Output as JSON.") +def matrix_search(query, as_json): + """Search matrices by name, capabilities, providers, recipes, or gaps.""" + results = search_matrices(query) + query_lower = query.lower() + + if as_json: + enriched = [] + for matrix_item in results: + entry = dict(matrix_item) + entry["matched_capabilities"] = capability_matches(matrix_item, query_lower) + enriched.append(entry) + click.echo(json_mod.dumps(enriched, indent=2)) + return + + if not results: + click.echo(f"No matrices matching '{query}'.") + click.echo(f" Try capability search: cli-hub can \"{query}\"") + return + + installed = get_installed() + for matrix_item in results: + cli_names = matrix_item.get("clis", []) + installed_count = sum(1 for c in cli_names if c in installed) + total = len(cli_names) + name_str = click.style(matrix_item["name"], bold=True) + matrix_label = click.style(f"[{matrix_item.get('matrix_id', 'matrix')}]", fg="cyan") + click.echo(f"\n {name_str} {matrix_label} - {matrix_item['description'][:65]}") + click.echo(f" CLIs: {installed_count}/{total} installed") + + matched = capability_matches(matrix_item, query_lower) + for hit in matched: + star = click.style("✦", fg="cyan") + click.echo( + f" {star} matched capability: {click.style(hit['capability_id'], bold=True)} " + f"({hit['match_field']})" + ) + click.echo(f" {hit['intent'][:80]}") + click.echo(f" providers: {hit['providers_summary']}") + + if matched: + first = matched[0]["capability_id"] + click.echo( + f" Install: cli-hub matrix install {matrix_item['name']} " + f"--capability {first}" + ) + else: + click.echo(f" Install: cli-hub matrix install {matrix_item['name']}") + + +@matrix.command("info") +@click.argument("name") +@click.option("--json", "as_json", is_flag=True, help="Output matrix metadata as JSON.") +def matrix_info(name, as_json): + """Show details for a specific matrix.""" + matrix_item = get_matrix(name) + if not matrix_item: + click.secho(f"Matrix '{name}' not found.", fg="red", err=True) + raise SystemExit(1) + + installed = get_installed() + cli_names = matrix_item.get("clis", []) + installed_count = sum(1 for cli_name in cli_names if cli_name in installed) + + if as_json: + payload = dict(matrix_item) + payload["_installed"] = { + "count": installed_count, + "total": len(cli_names), + "clis": [cli_name for cli_name in cli_names if cli_name in installed], + } + click.echo(json_mod.dumps(payload, indent=2)) + return + + click.secho(f"\n {matrix_item['display_name']}", bold=True) + click.echo(f" {matrix_item['description']}") + click.echo(f" Matrix: {matrix_item.get('matrix', 'N/A')} {matrix_item.get('matrix_id', '')}".rstrip()) + if matrix_item.get("schema_version"): + click.echo(f" Schema: v{matrix_item['schema_version']}") + click.echo(f" Category: {matrix_item.get('category', 'N/A')}") + click.echo(f" CLIs: {len(cli_names)}") + click.echo(f" Installed: {installed_count}/{len(cli_names)}") + if matrix_item.get("skill_md"): + click.echo(f" Skill: {matrix_item['skill_md']}") + rendered_skill_path = get_rendered_matrix_skill_path(matrix_item["name"]) + if rendered_skill_path.exists(): + click.echo(f" Local skill: {rendered_skill_path}") + if matrix_item.get("homepage"): + click.echo(f" Homepage: {matrix_item['homepage']}") + + click.echo("\n Members:") + for cli_name in cli_names: + status = click.style("installed", fg="green") if cli_name in installed else "not installed" + click.echo(f" - {cli_name} ({status})") + + stages = matrix_item.get("stages", []) + if stages: + click.echo("\n Stage Coverage:") + for stage in stages: + members = ", ".join(stage.get("clis", [])) + goal = stage.get("goal", "") + goal_suffix = f" -- {goal}" if goal else "" + click.echo(f" - {stage['name']}: {members}{goal_suffix}") + + capabilities = matrix_item.get("capabilities", []) + if capabilities: + click.echo("\n Capabilities:") + for capability in capabilities: + providers = capability.get("providers", []) + cli_provider_count = sum( + 1 for provider in providers + if provider.get("kind") in {"harness-cli", "public-cli"} + ) + offline_count = sum(1 for provider in providers if provider.get("offline")) + click.echo( + f" - {capability['id']}: " + f"{_plural(len(providers), 'provider')} " + f"({_plural(cli_provider_count, 'CLI')}, {offline_count} offline)" + ) + if capability.get("intent"): + click.echo(f" {capability['intent']}") + + recipes = matrix_item.get("recipes", []) + if recipes: + click.echo("\n Recipes:") + for recipe in recipes: + capability_count = len(recipe.get("capabilities_used", [])) + click.echo(f" - {recipe['id']}: {capability_count} capabilities - {recipe.get('description', '')}") + + known_gaps = matrix_item.get("known_gaps", []) + if known_gaps: + click.echo("\n Known Gaps:") + for gap in known_gaps: + click.echo(f" - {gap.get('capability', 'unknown')}: {gap.get('reason', '')}") + + click.echo(f"\n Install: cli-hub matrix install {matrix_item['name']}") + if capabilities: + click.echo(f" Preflight: cli-hub matrix preflight {matrix_item['name']}") + click.echo() + + +@matrix.command("preflight") +@click.argument("name") +@click.option("--capability", "-c", default=None, help="Only check one capability id.") +@click.option("--recipe", default=None, help="Only check the capabilities used by one recipe.") +@click.option("--offline", is_flag=True, help="Only consider offline-capable providers.") +@click.option("--fix-hints", is_flag=True, help="Show an install command under each missing provider.") +@click.option("--summary", "summary_only", is_flag=True, help="Print only the two-line summary.") +@click.option("--json", "as_json", is_flag=True, help="Output provider availability as JSON.") +def matrix_preflight(name, capability, recipe, offline, fix_hints, summary_only, as_json): + """Check which matrix providers are available in the current environment. + + Exit codes: 0 all capabilities covered · 3 one or more capability gaps · + 1 matrix not found · 2 unknown capability/recipe. + """ + matrix_item = get_matrix(name) + if not matrix_item: + click.secho(f"Matrix '{name}' not found.", fg="red", err=True) + raise SystemExit(EXIT_FAIL) + + if capability and recipe: + click.secho("Use only one of --capability or --recipe.", fg="red", err=True) + raise SystemExit(EXIT_USAGE) + + capability_ids = None + if recipe: + recipe_item = get_recipe(matrix_item, recipe) + if recipe_item is None: + valid = ", ".join(r.get("id", "") for r in matrix_item.get("recipes", [])) + click.secho(f"Recipe '{recipe}' not found. Valid: {valid or '(none)'}", fg="red", err=True) + raise SystemExit(EXIT_USAGE) + capability_ids = recipe_item.get("capabilities_used", []) + if capability and capability not in {c.get("id") for c in matrix_item.get("capabilities", [])}: + valid = ", ".join(c.get("id", "") for c in matrix_item.get("capabilities", [])) + click.secho(f"Capability '{capability}' not found. Valid: {valid or '(none)'}", fg="red", err=True) + raise SystemExit(EXIT_USAGE) + + payload = preflight_matrix( + matrix_item, capability_id=capability, offline=offline, capability_ids=capability_ids + ) + + if as_json: + click.echo(json_mod.dumps(payload, indent=2)) + raise SystemExit(EXIT_PARTIAL if payload["summary"].get("gaps", 0) else EXIT_OK) + + capabilities = payload["capabilities"] + if not capabilities: + target = capability or recipe or "capabilities" + click.secho(f"No capability data found for {target}.", fg="yellow") + raise SystemExit(EXIT_FAIL) + + summary = payload["summary"] + cli_names = matrix_item.get("clis", []) + click.secho(f"\n {payload['matrix']['display_name']} Preflight", bold=True) + mode = "offline providers only" if offline else "all providers" + scope_suffix = f" · recipe {recipe}" if recipe else (f" · capability {capability}" if capability else "") + click.echo( + f" {summary['covered']}/{summary['capabilities']} capabilities covered " + f"({summary['available_providers']}/{_plural(summary['providers'], 'provider')} " + f"available, {mode}{scope_suffix})" + ) + if summary.get("gaps"): + click.secho(f" {_plural(summary['gaps'], 'capability')} with no usable provider", fg="yellow") + agent_installable = summary.get("agent_installable_providers", 0) + if agent_installable: + verb = "is" if agent_installable == 1 else "are" + click.echo( + f" {_plural(agent_installable, 'agent-installable skill provider')} " + f"{verb} not counted as installed or missing" + ) + + if not summary_only: + for capability_result in capabilities: + click.echo(f"\n {capability_result['id']}") + click.echo(f" {capability_result['intent']}") + + for provider in capability_result["providers"][:4]: + if provider.get("agent_installable"): + marker = click.style("○", fg="bright_black") + elif provider["available"]: + marker = click.style("✓", fg="green") + else: + marker = click.style("·", fg="yellow") + missing = [ + item + for values in provider["missing"].values() + for item in values + ] + if provider.get("agent_installable"): + suffix = " agent-installable" + elif provider["available"]: + suffix = "" + else: + suffix = f" missing: {', '.join(missing) or 'requirements'}" + click.echo( + f" {marker} {provider['name']} " + f"[{provider['kind']}; {provider['quality_tier']}; {provider['cost_tier']}]{suffix}" + ) + if fix_hints and not provider["available"] and not provider.get("agent_installable"): + hint = provider_install_hint(provider, cli_names) + if hint: + click.echo(click.style(f" ↳ install: {hint}", fg="bright_black")) + + raise SystemExit(EXIT_PARTIAL if summary.get("gaps") else EXIT_OK) + + +_NOT_MANAGED_LABELS = { + "python": "Python libraries", + "native": "native binaries", + "api": "cloud APIs (need keys)", + "agent-skill": "agent skills (agent-installed)", + "public-unmanaged": "third-party CLIs (brew/npm/pip)", +} + + +def _render_dry_run(payload): + """Print a no-side-effect install plan (F2.1).""" + matrix_item = payload["matrix"] + summary = payload["summary"] + click.secho(f"\n Install plan: {matrix_item['name']} ({payload['scope_label']})", bold=True) + + skips = [p for p in payload["plan"] if p["action"] == "skip"] + installs = [p for p in payload["plan"] if p["action"] == "install"] + errors = [p for p in payload["plan"] if p["action"] == "error"] + + if skips: + names = ", ".join(p["name"] for p in skips) + click.secho(f" ✓ Already installed, will skip ({len(skips)}): {names}", fg="green") + for via in ("pip", "npm", "uv", "bundled", "command"): + group = [p for p in installs if p["via"] == via] + if group: + names = ", ".join(p["name"] for p in group) + click.echo(f" + Will install via {via} ({len(group)}): {names}") + if errors: + names = ", ".join(p["name"] for p in errors) + click.secho(f" ! Not in CLI registry ({len(errors)}): {names}", fg="yellow") + + not_managed = payload.get("not_managed", {}) + if not_managed: + click.echo(" ! Not installed by this command (use preflight + install hints):") + for category, names in not_managed.items(): + label = _NOT_MANAGED_LABELS.get(category, category) + click.echo(f" {label}: {', '.join(names)}") + + if not payload["plan"]: + click.secho(" Nothing to install for this scope via cli-hub.", fg="yellow") + click.echo(" These providers are public/native/API — see: " + f"cli-hub matrix preflight {matrix_item['name']} --fix-hints") + + click.echo(f"\n {summary['to_install']} to install, {summary['to_skip']} to skip" + + (f", {summary['unresolved']} unresolved" if summary["unresolved"] else "")) + click.echo(f" Run: cli-hub matrix install {matrix_item['name']}" + + _scope_args(payload["scope"])) + + +def _scope_args(scope): + """Reconstruct the scope flags for an echoed command line.""" + scope_type = scope.get("type") + if scope_type == "capability": + return f" --capability {scope['value']}" + if scope_type == "recipe": + return f" --recipe {scope['value']}" + if scope_type == "only": + return f" --only {','.join(scope['value'])}" + return "" + + +@matrix.command("install") +@click.argument("name") +@click.option("--capability", "-c", default=None, help="Install only the CLIs behind one capability.") +@click.option("--recipe", default=None, help="Install only the CLIs used by one recipe.") +@click.option("--only", default=None, help="Install a comma-separated subset of the matrix CLIs.") +@click.option("--dry-run", is_flag=True, help="Show the install plan without installing anything.") +@click.option("--resume", is_flag=True, help="Retry only the CLIs that failed in the last install.") +@click.option( + "--skill-only", + is_flag=True, + help="Render the matrix skill (SKILL.md + references/ + scripts/) without installing member CLIs.", +) +@click.option("--json", "as_json", is_flag=True, help="Output the plan or result as JSON.") +def matrix_install(name, capability, recipe, only, dry_run, resume, skill_only, as_json): + """Install the CLIs in a matrix (optionally scoped to a capability, recipe, or subset). + + Exit codes: 0 success · 3 partial failure · 1 total failure or not found · + 2 usage error. + """ + if skill_only: + matrix_item = get_matrix(name) + if not matrix_item: + click.secho(f"Matrix '{name}' not found.", fg="red", err=True) + raise SystemExit(EXIT_FAIL) + rendered_skill_path = render_matrix_skill_file(matrix_item, installed=get_installed()) + click.echo(f" Local matrix skill: {rendered_skill_path}") + click.echo(f" Install CLIs: cli-hub matrix install {matrix_item['name']}") + return + + if dry_run: + ok, payload = plan_matrix_install(name, capability=capability, recipe=recipe, only=only) + if not ok: + if as_json: + click.echo(json_mod.dumps({"error": payload["error"]}, indent=2)) + else: + click.secho(f"✗ {payload['error']}", fg="red", err=True) + raise SystemExit(EXIT_USAGE if payload.get("arg_error") else EXIT_FAIL) + if as_json: + data = {k: payload[k] for k in ("scope", "scope_label", "plan", "not_managed", "summary")} + data["matrix"] = payload["matrix"]["name"] + click.echo(json_mod.dumps(data, indent=2)) + else: + _render_dry_run(payload) + return + + success, payload = install_matrix( + name, capability=capability, recipe=recipe, only=only, resume=resume + ) + if payload.get("error"): + if as_json: + click.echo(json_mod.dumps({"error": payload["error"]}, indent=2)) + else: + click.secho(f"✗ {payload['error']}", fg="red", err=True) + raise SystemExit(EXIT_USAGE if payload.get("arg_error") else EXIT_FAIL) + + matrix_item = payload["matrix"] + summary = payload["summary"] + + if as_json: + data = { + "matrix": matrix_item["name"], + "scope": payload.get("scope"), + "results": payload["results"], + "summary": summary, + "rendered_skill_path": payload.get("rendered_skill_path"), + } + click.echo(json_mod.dumps(data, indent=2)) + else: + if payload.get("nothing_to_resume"): + click.secho(f" Nothing to resume — last install of {name} had no failures.", fg="green") + return + scope_label = payload.get("scope_label", "full matrix") + click.echo(f"Installing matrix {name} ({scope_label})...") + if not payload["results"]: + click.secho(" No cli-hub-managed CLIs in this scope.", fg="yellow") + click.echo(f" Check provider availability: cli-hub matrix preflight {name} --fix-hints") + for result in payload["results"]: + status = result["status"] + prefix = "✓" if status in {"installed", "skipped"} else "✗" + color = "green" if status in {"installed", "skipped"} else "red" + click.secho(f" {prefix} {result['name']}: {result['message']}", + fg=color, err=status == "failed") + + click.echo( + f"\n Summary: {summary['installed']} installed, " + f"{summary['skipped']} skipped, {summary['failed']} failed" + ) + if summary["failed"]: + click.echo(f" Retry failures: cli-hub matrix install {matrix_item['name']} --resume") + if matrix_item.get("skill_md"): + click.echo(f" Matrix skill: {matrix_item['skill_md']}") + if payload.get("rendered_skill_path"): + click.echo(f" Local matrix skill: {payload['rendered_skill_path']}") + click.echo(f" Inspect: cli-hub matrix info {matrix_item['name']}") + + if summary["failed"]: + # Partial failure (some installed) vs. total failure get distinct exit codes. + raise SystemExit(EXIT_PARTIAL if summary["installed"] or summary["skipped"] else EXIT_FAIL) + + +@matrix.command("doctor") +@click.argument("name") +@click.option("--json", "as_json", is_flag=True, help="Output the audit as JSON.") +def matrix_doctor(name, as_json): + """Audit install completeness for a matrix's CLIs and suggest fixes (F2.3). + + Exit codes: 0 healthy · 3 some CLIs missing or broken · 1 matrix not found. + """ + healthy, payload = doctor_matrix(name) + if payload.get("error"): + if as_json: + click.echo(json_mod.dumps({"error": payload["error"]}, indent=2)) + else: + click.secho(f"✗ {payload['error']}", fg="red", err=True) + raise SystemExit(EXIT_FAIL) + + if as_json: + data = {k: payload[k] for k in ("last_run", "checks", "summary")} + data["matrix"] = payload["matrix"]["name"] + click.echo(json_mod.dumps(data, indent=2)) + raise SystemExit(EXIT_OK if healthy else EXIT_PARTIAL) + + matrix_item = payload["matrix"] + summary = payload["summary"] + click.secho(f"\n {matrix_item['display_name']} Doctor", bold=True) + if payload.get("last_run"): + click.echo(f" Last install: {payload['last_run']}") + for check in payload["checks"]: + if check["status"] == "ok": + marker = click.style("✓", fg="green") + elif check["status"] == "broken": + marker = click.style("!", fg="yellow") + else: + marker = click.style("·", fg="bright_black") + click.echo(f" {marker} {check['name']}: {check['detail']}") + if check["fix"]: + click.echo(click.style(f" ↳ fix: {check['fix']}", fg="bright_black")) + + click.echo(f"\n Summary: {summary['ok']} ok, {summary['broken']} broken, " + f"{summary['not_installed']} not installed") + raise SystemExit(EXIT_OK if healthy else EXIT_PARTIAL) + + +@matrix.command("recipes") +@click.option("--search", "query", default=None, help="Filter recipes by id, description, or capability.") +@click.option("--json", "as_json", is_flag=True, help="Output as JSON.") +def matrix_recipes(query, as_json): + """List task-oriented recipes across all matrices (F1.4).""" + recipes = all_recipes(query) + + if as_json: + click.echo(json_mod.dumps({"query": query, "recipes": recipes}, indent=2)) + return + + if not recipes: + target = f" matching '{query}'" if query else "" + click.echo(f"No recipes found{target}.") + return + + for recipe in recipes: + name_str = click.style(recipe["id"], bold=True) + loc = click.style(f"({recipe['matrix']})", fg="cyan") + click.echo(f"\n {name_str} {loc}") + if recipe["description"]: + click.echo(f" {recipe['description']}") + caps = ", ".join(recipe["capabilities_used"]) + click.echo(f" capabilities: {caps}") + click.echo( + f" Preflight: cli-hub matrix preflight {recipe['matrix']} --recipe {recipe['id']}" + ) + + if __name__ == "__main__": main() diff --git a/cli-hub/cli_hub/installer.py b/cli-hub/cli_hub/installer.py index 0a7451c48..aef36d6fd 100644 --- a/cli-hub/cli_hub/installer.py +++ b/cli-hub/cli_hub/installer.py @@ -1,15 +1,19 @@ -"""Install, uninstall, and manage CLIs — dispatches to pip or npm based on source.""" +"""Install, uninstall, and manage CLIs and matrices.""" import json import shlex import shutil import subprocess import sys +from datetime import datetime from pathlib import Path from cli_hub.registry import get_cli +from cli_hub.matrix import get_matrix, resolve_install_scope, unmanaged_providers +from cli_hub.matrix_skill import render_matrix_skill_file INSTALLED_FILE = Path.home() / ".cli-hub" / "installed.json" +MATRIX_STATE_FILE = Path.home() / ".cli-hub" / "matrix_state.json" def _load_installed(): @@ -26,6 +30,20 @@ def _save_installed(data): INSTALLED_FILE.write_text(json.dumps(data, indent=2)) +def _load_matrix_state(): + if MATRIX_STATE_FILE.exists(): + try: + return json.loads(MATRIX_STATE_FILE.read_text()) + except json.JSONDecodeError: + pass + return {} + + +def _save_matrix_state(data): + MATRIX_STATE_FILE.parent.mkdir(parents=True, exist_ok=True) + MATRIX_STATE_FILE.write_text(json.dumps(data, indent=2)) + + def _find_npm(): """Find npm executable. Returns path or None.""" return shutil.which("npm") @@ -371,3 +389,216 @@ def update_cli(name): def get_installed(): """Return dict of installed CLIs.""" return _load_installed() + + +def _scope_label(scope, capabilities=None): + """Human-readable label for an install scope (used in state + output).""" + scope_type = scope.get("type") + if scope_type == "capability": + return f"capability {scope.get('value')}" + if scope_type == "recipe": + return f"recipe {scope.get('value')}" + if scope_type == "only": + return f"only {', '.join(scope.get('value', []))}" + return "full matrix" + + +def plan_matrix_install(name, capability=None, recipe=None, only=None): + """Compute what ``matrix install`` would do — no side effects (F2.1). + + Returns ``(ok, payload)``. ``ok`` is false only on lookup/usage errors; + ``payload['arg_error']`` distinguishes usage errors (exit 2) from not-found. + """ + matrix_item = get_matrix(name) + if matrix_item is None: + return False, {"error": f"Matrix '{name}' not found. Use 'cli-hub matrix list' to see available matrices."} + + scope = resolve_install_scope(matrix_item, capability=capability, recipe=recipe, only=only) + if scope.get("error"): + return False, {"error": scope["error"], "arg_error": True, "matrix": matrix_item} + + installed = set(get_installed()) + plan = [] + for cli_name in scope["cli_names"]: + cli = get_cli(cli_name) + if cli_name in installed: + action, via = "skip", (_install_strategy(cli) if cli else None) + elif cli is None: + action, via = "error", None + else: + action, via = "install", _install_strategy(cli) + plan.append({ + "name": cli_name, + "display_name": cli["display_name"] if cli else cli_name, + "action": action, + "via": via, + "already_installed": cli_name in installed, + "found": cli is not None, + }) + + scope_caps = scope.get("capabilities") + cap_objs = None + if scope_caps is not None and scope["scope"].get("type") in {"capability", "recipe"}: + cap_set = set(scope_caps) + cap_objs = [c for c in matrix_item.get("capabilities", []) if c.get("id") in cap_set] + not_managed = unmanaged_providers(matrix_item, cap_objs) + + summary = { + "total": len(plan), + "to_install": sum(1 for p in plan if p["action"] == "install"), + "to_skip": sum(1 for p in plan if p["action"] == "skip"), + "unresolved": sum(1 for p in plan if p["action"] == "error"), + } + payload = { + "matrix": matrix_item, + "scope": scope["scope"], + "scope_label": _scope_label(scope["scope"]), + "plan": plan, + "not_managed": not_managed, + "summary": summary, + } + return True, payload + + +def install_matrix(name, capability=None, recipe=None, only=None, resume=False): + """Install the CLIs in a named matrix, optionally scoped (F2.2) or resumed (F2.3). + + Returns ``(success, payload)``. ``payload['arg_error']`` marks usage errors. + Records per-CLI outcomes to ``matrix_state.json`` so ``--resume`` and + ``matrix doctor`` can act on them. + """ + matrix_item = get_matrix(name) + if matrix_item is None: + return False, {"error": f"Matrix '{name}' not found. Use 'cli-hub matrix list' to see available matrices."} + + state = _load_matrix_state() + + if resume: + if capability or recipe or only: + return False, {"error": "Cannot combine --resume with --capability/--recipe/--only.", + "arg_error": True, "matrix": matrix_item} + prior = state.get(name) + if not prior: + return False, {"error": f"No previous install of '{name}' to resume. " + f"Run 'cli-hub matrix install {name}' first.", + "arg_error": True, "matrix": matrix_item} + target_names = [r["name"] for r in prior.get("results", []) if r.get("status") == "failed"] + scope = prior.get("scope", {"type": "all"}) + if not target_names: + return True, {"matrix": matrix_item, "results": [], "scope": scope, + "scope_label": _scope_label(scope), + "summary": {"total": 0, "installed": 0, "skipped": 0, "failed": 0}, + "resumed": True, "nothing_to_resume": True} + else: + scope_info = resolve_install_scope(matrix_item, capability=capability, recipe=recipe, only=only) + if scope_info.get("error"): + return False, {"error": scope_info["error"], "arg_error": True, "matrix": matrix_item} + target_names = scope_info["cli_names"] + scope = scope_info["scope"] + + installed = set(get_installed()) + results = [] + + for cli_name in target_names: + cli = get_cli(cli_name) + display_name = cli["display_name"] if cli else cli_name + via = _install_strategy(cli) if cli else None + + if cli_name in installed: + results.append({"name": cli_name, "display_name": display_name, + "status": "skipped", "via": via, "message": "Already installed"}) + continue + + if cli is None: + results.append({"name": cli_name, "display_name": display_name, + "status": "failed", "via": via, "message": "CLI not found in registry"}) + continue + + success, msg = install_cli(cli_name) + results.append({"name": cli_name, "display_name": display_name, + "status": "installed" if success else "failed", "via": via, "message": msg}) + if success: + installed.add(cli_name) + + summary = { + "total": len(results), + "installed": sum(1 for result in results if result["status"] == "installed"), + "skipped": sum(1 for result in results if result["status"] == "skipped"), + "failed": sum(1 for result in results if result["status"] == "failed"), + } + + state[name] = { + "last_run": datetime.now().isoformat(timespec="seconds"), + "scope": scope, + "results": results, + } + _save_matrix_state(state) + + installed_state = get_installed() + rendered_skill_path = render_matrix_skill_file(matrix_item, installed=installed_state) + payload = { + "matrix": matrix_item, + "results": results, + "scope": scope, + "scope_label": _scope_label(scope), + "summary": summary, + "resumed": resume, + "rendered_skill_path": str(rendered_skill_path), + } + return summary["failed"] == 0, payload + + +def doctor_matrix(name): + """Audit install completeness for a matrix's CLIs against the environment (F2.3). + + Unlike preflight (which reports provider availability for selection), doctor + checks whether the matrix's own ``clis[]`` are installed and on PATH, and + emits a fix command per broken/missing member. + """ + matrix_item = get_matrix(name) + if matrix_item is None: + return False, {"error": f"Matrix '{name}' not found. Use 'cli-hub matrix list' to see available matrices."} + + installed = get_installed() + state = _load_matrix_state().get(name) + checks = [] + for cli_name in matrix_item.get("clis", []): + cli = get_cli(cli_name) + entry_point = cli.get("entry_point") if cli else None + record = installed.get(cli_name) + + if record is None: + status = "not_installed" + detail = "Not installed" + fix = f"cli-hub install {cli_name}" + elif entry_point and not _command_exists(entry_point): + status = "broken" + detail = f"Recorded as installed but '{entry_point}' is not on PATH" + fix = f"cli-hub install {cli_name}" + else: + status = "ok" + detail = "Installed" + fix = None + + checks.append({ + "name": cli_name, + "entry_point": entry_point, + "status": status, + "detail": detail, + "fix": fix, + }) + + summary = { + "total": len(checks), + "ok": sum(1 for check in checks if check["status"] == "ok"), + "broken": sum(1 for check in checks if check["status"] == "broken"), + "not_installed": sum(1 for check in checks if check["status"] == "not_installed"), + } + payload = { + "matrix": matrix_item, + "last_run": state.get("last_run") if state else None, + "checks": checks, + "summary": summary, + } + healthy = summary["broken"] == 0 and summary["not_installed"] == 0 + return healthy, payload diff --git a/cli-hub/cli_hub/matrix.py b/cli-hub/cli_hub/matrix.py new file mode 100644 index 000000000..865b5a6b8 --- /dev/null +++ b/cli-hub/cli_hub/matrix.py @@ -0,0 +1,537 @@ +"""Fetch, cache, and query curated CLI workflow matrices.""" + +import importlib.metadata +import importlib.util +import json +import os +import shutil +import time +from pathlib import Path + +import requests + +MATRIX_REGISTRY_URL = "https://hkuds.github.io/CLI-Anything/matrix_registry.json" +MATRIX_CACHE_FILE = Path.home() / ".cli-hub" / "matrix_registry_cache.json" +CACHE_TTL = 3600 # 1 hour + +AGENT_INSTALLABLE_KINDS = {"agent-skill"} + +# Provider kinds whose CLIs `matrix install` can manage via cli-hub / public registries. +INSTALLABLE_KINDS = {"harness-cli", "public-cli"} + +# Harness CLI providers are named `cli-anything-`; the flat `clis[]` list uses ``. +HARNESS_PREFIX = "cli-anything-" + +# Short, stable labels for provider kinds used across search / can / preflight output. +KIND_LABELS = { + "harness-cli": "harness", + "public-cli": "public", + "python": "python", + "native": "native", + "api": "api", + "agent-skill": "skill", + "agent-native": "native", + "web-search": "web", +} + + +def _ensure_cache_dir(): + MATRIX_CACHE_FILE.parent.mkdir(parents=True, exist_ok=True) + + +def _load_cached_data(): + if not MATRIX_CACHE_FILE.exists(): + return None + try: + cached = json.loads(MATRIX_CACHE_FILE.read_text()) + return cached["data"] + except (json.JSONDecodeError, KeyError): + return None + + +def _load_local_registry(): + """Load matrix_registry.json from a source checkout when available.""" + for parent in Path(__file__).resolve().parents: + candidate = parent / "matrix_registry.json" + if not candidate.exists(): + continue + try: + return json.loads(candidate.read_text()) + except json.JSONDecodeError: + return None + return None + + +def fetch_matrix_registry(force_refresh=False): + """Fetch the matrix registry with local file caching.""" + _ensure_cache_dir() + + if not force_refresh and MATRIX_CACHE_FILE.exists(): + try: + cached = json.loads(MATRIX_CACHE_FILE.read_text()) + if time.time() - cached.get("_cached_at", 0) < CACHE_TTL: + return cached["data"] + except (json.JSONDecodeError, KeyError): + pass + + try: + resp = requests.get(MATRIX_REGISTRY_URL, timeout=15) + resp.raise_for_status() + data = resp.json() + except (requests.RequestException, ValueError): + cached_data = _load_cached_data() + if cached_data is not None: + return cached_data + local_data = _load_local_registry() + if local_data is not None: + return local_data + raise + + MATRIX_CACHE_FILE.write_text(json.dumps({"_cached_at": time.time(), "data": data}, indent=2)) + return data + + +def fetch_all_matrices(force_refresh=False): + """Return all matrix entries.""" + return fetch_matrix_registry(force_refresh).get("matrices", []) + + +def get_matrix(name, force_refresh=False): + """Look up a matrix entry by name (case-insensitive).""" + name_lower = name.lower() + for matrix_item in fetch_all_matrices(force_refresh): + if matrix_item["name"].lower() == name_lower: + return matrix_item + return None + + +def _string_values(value): + """Yield lowercase strings from nested matrix registry values.""" + if isinstance(value, str): + yield value.lower() + elif isinstance(value, dict): + for child in value.values(): + yield from _string_values(child) + elif isinstance(value, list): + for child in value: + yield from _string_values(child) + + +def search_matrices(query, force_refresh=False): + """Search matrices by name, capabilities, providers, recipes, or gaps.""" + query_lower = query.lower() + results = [] + for matrix_item in fetch_all_matrices(force_refresh): + haystack_values = { + "name": matrix_item.get("name", ""), + "display_name": matrix_item.get("display_name", ""), + "description": matrix_item.get("description", ""), + "category": matrix_item.get("category", ""), + "matrix_id": matrix_item.get("matrix_id", ""), + "capabilities": matrix_item.get("capabilities", []), + "recipes": matrix_item.get("recipes", []), + "known_gaps": matrix_item.get("known_gaps", []), + "clis": matrix_item.get("clis", []), + } + if any(query_lower in value for value in _string_values(haystack_values)): + results.append(matrix_item) + return results + + +def _as_list(value): + """Normalize registry requirement fields to lists.""" + if value is None: + return [] + if isinstance(value, list): + return value + return [value] + + +def _package_available(name): + try: + if importlib.util.find_spec(name) is not None: + return True + except Exception: + pass + try: + normalized = name.replace("-", "_") + if normalized != name and importlib.util.find_spec(normalized) is not None: + return True + except Exception: + pass + try: + importlib.metadata.version(name) + return True + except Exception: + pass + return False + + +def check_provider_requirements(provider): + """Check whether a provider's declared requirements are available locally.""" + kind = provider.get("kind", "") + if kind in AGENT_INSTALLABLE_KINDS: + return { + "name": provider.get("name", ""), + "kind": kind, + "available": False, + "agent_installable": True, + "status": "agent-installable", + "offline": bool(provider.get("offline")), + "cost_tier": provider.get("cost_tier", "unknown"), + "quality_tier": provider.get("quality_tier", "unknown"), + "requires": provider.get("requires") or {}, + "present": {"env": [], "binary": [], "package": []}, + "missing": {"env": [], "binary": [], "package": []}, + "notes": provider.get("notes", ""), + "install_hint": provider.get("install_hint"), + } + + requires = provider.get("requires") or {} + env_names = _as_list(requires.get("env")) + binary_names = _as_list(requires.get("binary")) + package_names = _as_list(requires.get("package")) + + present = { + "env": [name for name in env_names if os.environ.get(name)], + "binary": [name for name in binary_names if shutil.which(name)], + "package": [name for name in package_names if _package_available(name)], + } + missing = { + "env": [name for name in env_names if name not in present["env"]], + "binary": [name for name in binary_names if name not in present["binary"]], + "package": [name for name in package_names if name not in present["package"]], + } + available = not any(missing.values()) + + return { + "name": provider.get("name", ""), + "kind": kind, + "available": available, + "agent_installable": False, + "status": "available" if available else "missing", + "offline": bool(provider.get("offline")), + "cost_tier": provider.get("cost_tier", "unknown"), + "quality_tier": provider.get("quality_tier", "unknown"), + "requires": requires, + "present": present, + "missing": missing, + "notes": provider.get("notes", ""), + "install_hint": provider.get("install_hint"), + } + + +def preflight_matrix(matrix_item, capability_id=None, offline=False, capability_ids=None): + """Return provider availability for a matrix, optionally filtered. + + ``capability_id`` filters to a single capability; ``capability_ids`` (a set or + list, used by ``--recipe``) filters to a named subset. The two compose. + """ + id_filter = set(capability_ids) if capability_ids is not None else None + capability_results = [] + + for capability in matrix_item.get("capabilities", []): + if capability_id and capability.get("id") != capability_id: + continue + if id_filter is not None and capability.get("id") not in id_filter: + continue + + provider_results = [ + check_provider_requirements(provider) + for provider in capability.get("providers", []) + if not offline or provider.get("offline") + ] + capability_results.append({ + "id": capability.get("id", ""), + "intent": capability.get("intent", ""), + "provider_count": len(provider_results), + "available_count": sum(1 for provider in provider_results if provider["available"]), + "agent_installable_count": sum( + 1 for provider in provider_results + if provider.get("agent_installable") + ), + "providers": provider_results, + }) + + summary = { + "capabilities": len(capability_results), + "with_available_provider": sum(1 for cap in capability_results if cap["available_count"] > 0), + "with_agent_installable_provider": sum( + 1 for cap in capability_results + if cap["available_count"] == 0 and cap["agent_installable_count"] > 0 + ), + "providers": sum(cap["provider_count"] for cap in capability_results), + "available_providers": sum(cap["available_count"] for cap in capability_results), + "agent_installable_providers": sum( + cap["agent_installable_count"] for cap in capability_results + ), + } + # A capability is "covered" when it has at least one available provider or an + # agent-installable fallback; everything else is a hard gap (drives exit code 3). + summary["covered"] = sum( + 1 for cap in capability_results + if cap["available_count"] > 0 or cap["agent_installable_count"] > 0 + ) + summary["gaps"] = summary["capabilities"] - summary["covered"] + + return { + "matrix": { + "name": matrix_item.get("name", ""), + "display_name": matrix_item.get("display_name", matrix_item.get("name", "")), + "schema_version": matrix_item.get("schema_version", "1"), + }, + "capability_filter": capability_id, + "offline": offline, + "summary": summary, + "capabilities": capability_results, + } + + +# ── Provider ↔ CLI resolution and install scoping (F2.2) ────────────────────── + + +def provider_cli_name(provider, cli_names): + """Resolve a provider to the ``clis[]`` member that ``matrix install`` manages. + + Returns the registry CLI name, or ``None`` when the provider is not installable + through cli-hub (Python libs, native binaries, cloud APIs, agent skills, or + third-party public CLIs that live outside the matrix's ``clis[]`` list). + """ + if provider.get("kind") not in INSTALLABLE_KINDS: + return None + cli_set = set(cli_names) + explicit = provider.get("cli") # forward-compatible with schema v2.1 (F4.2) + if explicit and explicit in cli_set: + return explicit + name = provider.get("name", "") + if name in cli_set: + return name + if name.startswith(HARNESS_PREFIX): + stripped = name[len(HARNESS_PREFIX):] + if stripped in cli_set: + return stripped + return None + + +def provider_install_hint(provider, cli_names): + """Return a human-readable install command for a provider, or ``None``. + + Prefers the registry's explicit ``install_hint`` (F2.4); otherwise derives + ``cli-hub install `` for providers that map into the matrix's ``clis[]``. + """ + hint = provider.get("install_hint") + if hint: + return hint + cli = provider_cli_name(provider, cli_names) + if cli: + return f"cli-hub install {cli}" + return None + + +def get_recipe(matrix_item, recipe_id): + """Look up a recipe entry by id (case-insensitive).""" + recipe_lower = recipe_id.lower() + for recipe in matrix_item.get("recipes", []): + if recipe.get("id", "").lower() == recipe_lower: + return recipe + return None + + +def _scope_clis_for_capabilities(matrix_item, capabilities): + """Return the ``clis[]`` members backing the given capabilities, in registry order.""" + cli_names = matrix_item.get("clis", []) + wanted = set() + for capability in capabilities: + for provider in capability.get("providers", []): + cli = provider_cli_name(provider, cli_names) + if cli: + wanted.add(cli) + return [name for name in cli_names if name in wanted] + + +def resolve_install_scope(matrix_item, capability=None, recipe=None, only=None): + """Resolve install scope flags to a concrete subset of the matrix's ``clis[]``. + + Returns a dict with ``cli_names`` (ordered subset), ``scope`` (type/value), + ``capabilities`` (capability ids in scope, when applicable), and ``error`` + (a usage message when the selectors are invalid or mutually exclusive). + """ + cli_names = matrix_item.get("clis", []) + selectors = [(kind, value) for kind, value in + (("capability", capability), ("recipe", recipe), ("only", only)) if value] + + if len(selectors) > 1: + return {"error": "Use only one of --capability, --recipe, or --only.", + "scope": {"type": "invalid"}, "cli_names": [], "capabilities": []} + + if not selectors: + return {"error": None, "scope": {"type": "all"}, "cli_names": list(cli_names), + "capabilities": [c.get("id") for c in matrix_item.get("capabilities", [])]} + + sel_type, sel_value = selectors[0] + + if sel_type == "only": + requested = [name.strip() for name in only.split(",") if name.strip()] + unknown = [name for name in requested if name not in set(cli_names)] + if unknown: + return {"error": (f"Not in matrix '{matrix_item.get('name')}' clis[]: " + f"{', '.join(unknown)}. Valid: {', '.join(cli_names) or '(none)'}"), + "scope": {"type": "only"}, "cli_names": [], "capabilities": []} + chosen = [name for name in cli_names if name in set(requested)] + return {"error": None, "scope": {"type": "only", "value": requested}, + "cli_names": chosen, "capabilities": []} + + if sel_type == "capability": + capability_item = next( + (c for c in matrix_item.get("capabilities", []) if c.get("id") == capability), None) + if capability_item is None: + valid = ", ".join(c.get("id", "") for c in matrix_item.get("capabilities", [])) + return {"error": (f"Capability '{capability}' not found in " + f"'{matrix_item.get('name')}'. Valid: {valid or '(none)'}"), + "scope": {"type": "capability"}, "cli_names": [], "capabilities": []} + return {"error": None, "scope": {"type": "capability", "value": capability}, + "cli_names": _scope_clis_for_capabilities(matrix_item, [capability_item]), + "capabilities": [capability]} + + # sel_type == "recipe" + recipe_item = get_recipe(matrix_item, recipe) + if recipe_item is None: + valid = ", ".join(r.get("id", "") for r in matrix_item.get("recipes", [])) + return {"error": (f"Recipe '{recipe}' not found in '{matrix_item.get('name')}'. " + f"Valid: {valid or '(none)'}"), + "scope": {"type": "recipe"}, "cli_names": [], "capabilities": []} + used = recipe_item.get("capabilities_used", []) + used_set = set(used) + capabilities = [c for c in matrix_item.get("capabilities", []) if c.get("id") in used_set] + return {"error": None, "scope": {"type": "recipe", "value": recipe}, + "cli_names": _scope_clis_for_capabilities(matrix_item, capabilities), + "capabilities": list(used)} + + +def unmanaged_providers(matrix_item, capabilities=None): + """Group providers that ``matrix install`` does NOT install, by category. + + Used by ``install --dry-run`` to show what still needs manual setup + (Python libs, native binaries, cloud APIs, agent skills, and third-party + public CLIs outside ``clis[]``). + """ + cli_names = matrix_item.get("clis", []) + caps = capabilities if capabilities is not None else matrix_item.get("capabilities", []) + buckets = {"python": [], "native": [], "api": [], "agent-skill": [], "public-unmanaged": []} + seen = set() + for capability in caps: + for provider in capability.get("providers", []): + kind = provider.get("kind") + name = provider.get("name", "") + key = (kind, name) + if key in seen: + continue + if kind in INSTALLABLE_KINDS: + if kind == "public-cli" and provider_cli_name(provider, cli_names) is None: + buckets["public-unmanaged"].append(name) + seen.add(key) + continue + if kind in buckets: + buckets[kind].append(name) + seen.add(key) + return {category: names for category, names in buckets.items() if names} + + +# ── Capability-level search (F1.1 / F1.4) ───────────────────────────────────── + + +def providers_summary(capability, limit=4): + """Render a compact 'name (kind) · …' summary of a capability's providers.""" + providers = capability.get("providers", []) + parts = [ + f"{p.get('name', '')} ({KIND_LABELS.get(p.get('kind'), p.get('kind', '?'))})" + for p in providers[:limit] + ] + summary = " · ".join(parts) + extra = len(providers) - limit + if extra > 0: + summary += f" · +{extra} more" + return summary + + +def _capability_match_field(capability, query_lower): + """Return which field of a capability matched the query, or ``None``.""" + if query_lower in capability.get("id", "").lower(): + return "id" + if query_lower in capability.get("intent", "").lower(): + return "intent" + if any(query_lower in hint.lower() for hint in capability.get("skill_search_hints", [])): + return "hint" + for provider in capability.get("providers", []): + if query_lower in provider.get("name", "").lower(): + return "provider" + return None + + +def capability_matches(matrix_item, query_lower): + """Return per-capability match attribution for a single matrix (F1.1 matched_in).""" + matches = [] + for capability in matrix_item.get("capabilities", []): + field = _capability_match_field(capability, query_lower) + if not field: + continue + matches.append({ + "matrix": matrix_item.get("name", ""), + "matrix_id": matrix_item.get("matrix_id", ""), + "capability_id": capability.get("id", ""), + "intent": capability.get("intent", ""), + "match_field": field, + "providers_summary": providers_summary(capability), + }) + return matches + + +def search_capabilities(query, force_refresh=False): + """Search every matrix at capability granularity (powers ``cli-hub can``). + + Each hit carries local provider availability so callers can show what is + usable on this machine right now. + """ + query_lower = query.lower() + hits = [] + for matrix_item in fetch_all_matrices(force_refresh): + for capability in matrix_item.get("capabilities", []): + field = _capability_match_field(capability, query_lower) + if not field: + continue + hits.append({ + "matrix": matrix_item.get("name", ""), + "matrix_id": matrix_item.get("matrix_id", ""), + "capability_id": capability.get("id", ""), + "intent": capability.get("intent", ""), + "match_field": field, + "providers": [ + check_provider_requirements(provider) + for provider in capability.get("providers", []) + ], + }) + return hits + + +def all_recipes(query=None, force_refresh=False): + """Return recipes across all matrices, optionally filtered by a query (F1.4).""" + query_lower = query.lower() if query else None + out = [] + for matrix_item in fetch_all_matrices(force_refresh): + for recipe in matrix_item.get("recipes", []): + if query_lower: + haystack = " ".join([ + recipe.get("id", ""), + recipe.get("description", ""), + " ".join(recipe.get("capabilities_used", [])), + ]).lower() + if query_lower not in haystack: + continue + out.append({ + "matrix": matrix_item.get("name", ""), + "matrix_id": matrix_item.get("matrix_id", ""), + "id": recipe.get("id", ""), + "description": recipe.get("description", ""), + "capabilities_used": recipe.get("capabilities_used", []), + }) + return out diff --git a/cli-hub/cli_hub/matrix_skill.py b/cli-hub/cli_hub/matrix_skill.py new file mode 100644 index 000000000..f2d9f2cf1 --- /dev/null +++ b/cli-hub/cli_hub/matrix_skill.py @@ -0,0 +1,397 @@ +"""Render local matrix skill files with resolved CLI skill paths. + +Installed layout (one directory per matrix, so the skill's relative links to +``references/*.md`` and ``scripts/*.py`` resolve): + + ~/.cli-hub/matrix//SKILL.md + ~/.cli-hub/matrix//references/... + ~/.cli-hub/matrix//scripts/... + +Skill content source lookup chain: + +1. Repo checkout: ``/cli-hub-matrix//`` (via ``skill_md``). +2. Bundled package data: ``cli_hub/_matrix_data//`` (shipped in + wheels/sdists built from a checkout; absent in editable installs, which + hit the checkout in step 1 instead). +3. Published URL: ``https://hkuds.github.io/CLI-Anything/matrix//SKILL.md`` + (SKILL.md only; references/scripts stay remote and are linked from the + rendered file). +4. Generated stub. +""" + +import shutil +import subprocess +from importlib import metadata +from pathlib import Path + +import requests + +from cli_hub.registry import get_cli + +MATRIX_SKILL_DIR = Path.home() / ".cli-hub" / "matrix" + +# Base URL where deploy-pages.yml publishes cli-hub-matrix/ content (main only). +MATRIX_CONTENT_BASE_URL = "https://hkuds.github.io/CLI-Anything/matrix" + +# Package data dir bundled into wheels/sdists by cli-hub/setup.py. +BUNDLED_MATRIX_DATA_DIR = Path(__file__).resolve().parent / "_matrix_data" + +# Asset directories co-installed beside the rendered SKILL.md. +MATRIX_ASSET_SUBDIRS = ("references", "scripts") + +_COPY_IGNORE = shutil.ignore_patterns("__pycache__", "*.pyc", "*.pyo") + + +def _find_repo_root(): + """Find the repository root via git, falling back to parent traversal.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0: + root = Path(result.stdout.strip()) + if root.is_dir(): + return root + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # Fallback: walk up from this file looking for .git + current = Path(__file__).resolve().parent + for parent in [current] + list(current.parents): + if (parent / ".git").exists(): + return parent + + return None + + +def get_rendered_matrix_skill_path(name): + """Return the local rendered SKILL.md path for a matrix. + + Prefers the per-matrix directory layout (``/SKILL.md``); falls back + to the legacy flat ``.SKILL.md`` file when only that exists, so + pre-existing installs keep resolving until the next re-render. + """ + current = MATRIX_SKILL_DIR / name / "SKILL.md" + legacy = MATRIX_SKILL_DIR / f"{name}.SKILL.md" + if not current.exists() and legacy.exists(): + return legacy + return current + + +def resolve_local_skill_path(cli): + """Resolve an installed harness CLI's local SKILL.md path if possible.""" + if cli.get("_source", "harness") != "harness": + return None + + dist_name = cli.get("dist_name") or f"cli-anything-{cli['name']}" + try: + dist = metadata.distribution(dist_name) + except metadata.PackageNotFoundError: + return _fallback_repo_skill_path(cli) + + for file in dist.files or []: + file_str = str(file).replace("\\", "/") + if file_str.endswith("/skills/SKILL.md") or file_str.endswith("skills/SKILL.md"): + return str(dist.locate_file(file).resolve()) + + return _fallback_repo_skill_path(cli) + + +def _fallback_repo_skill_path(cli): + """Use the repo-relative skill path when available in the current checkout.""" + skill_ref = cli.get("skill_md") + if not skill_ref or "://" in skill_ref or skill_ref.startswith("npx "): + return None + + repo_root = _find_repo_root() + if repo_root is None: + return None + candidate = repo_root / skill_ref + if candidate.exists(): + return str(candidate.resolve()) + return None + + +def render_matrix_skill_file(matrix_item, installed=None): + """Write a local matrix SKILL.md with resolved member skill paths. + + Renders into ``MATRIX_SKILL_DIR//SKILL.md`` and co-installs the + matrix content directory's ``references/`` and ``scripts/`` beside it so + the skill's relative links resolve. Re-rendering is idempotent: asset + directories are replaced wholesale on each render. + """ + name = matrix_item["name"] + output_dir = MATRIX_SKILL_DIR / name + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / "SKILL.md" + + template_path, content_dir = _resolve_matrix_content_source(matrix_item) + base_content = _load_matrix_skill_template(matrix_item, template_path).rstrip() + copied = _copy_matrix_assets(content_dir, output_dir) if content_dir else [] + + extra = "" + if not copied: + extra = ( + "\n\n## Reference Modules\n\n" + "No local copy of this matrix's `references/` and `scripts/` was " + "found; relative links above will not resolve locally. The " + "published copies live under " + f"{MATRIX_CONTENT_BASE_URL}/{name}/ (e.g. " + f"{MATRIX_CONTENT_BASE_URL}/{name}/SKILL.md)." + ) + + injected_section = _render_injected_section(matrix_item, installed or {}) + output_path.write_text( + f"{base_content}\n\n\n\n{injected_section}{extra}\n\n\n", + encoding="utf-8", + ) + return output_path + + +def _resolve_matrix_content_source(matrix_item): + """Locate the matrix skill source as ``(template_path, content_dir)``. + + Tries the repo checkout first, then bundled package data. Either element + may be ``None`` when nothing local is available (the template then falls + back to the published URL or a stub). + """ + skill_ref = matrix_item.get("skill_md") + if skill_ref and "://" not in skill_ref and not skill_ref.startswith("npx "): + repo_root = _find_repo_root() + if repo_root is not None: + candidate = repo_root / skill_ref + if candidate.exists(): + return candidate, candidate.parent + + bundled = BUNDLED_MATRIX_DATA_DIR / matrix_item["name"] / "SKILL.md" + if bundled.exists(): + return bundled, bundled.parent + + return None, None + + +def _copy_matrix_assets(content_dir, output_dir): + """Copy references/ and scripts/ beside the rendered SKILL.md. + + Existing asset directories are removed first so re-installs are clean and + stale files do not linger. ``__pycache__`` and ``*.pyc`` are excluded. + Returns the list of subdirectories that were copied. + """ + copied = [] + for subdir in MATRIX_ASSET_SUBDIRS: + source = content_dir / subdir + destination = output_dir / subdir + if destination.exists(): + shutil.rmtree(destination) + if source.is_dir(): + shutil.copytree(source, destination, ignore=_COPY_IGNORE) + copied.append(subdir) + return copied + + +def _load_matrix_skill_template(matrix_item, template_path=None): + """Load the matrix skill template via the lookup chain. + + Order: local source file (checkout or bundled data) -> published URL -> + generated stub. + """ + if template_path is None: + template_path, _ = _resolve_matrix_content_source(matrix_item) + if template_path is not None: + return template_path.read_text(encoding="utf-8") + + published = _fetch_published_matrix_skill(matrix_item["name"]) + if published is not None: + return published + + title = matrix_item.get("display_name", matrix_item["name"]) + description = matrix_item.get("description", "") + return ( + f"# {title}\n\n" + f"{description}\n\n" + f"Install with `cli-hub matrix install {matrix_item['name']}`.\n\n" + f"Full skill content (when published): " + f"{MATRIX_CONTENT_BASE_URL}/{matrix_item['name']}/SKILL.md" + ) + + +def _fetch_published_matrix_skill(name): + """Fetch the published SKILL.md for a matrix, or None on any failure.""" + url = f"{MATRIX_CONTENT_BASE_URL}/{name}/SKILL.md" + try: + resp = requests.get(url, timeout=10) + except requests.RequestException: + return None + if resp.status_code != 200 or not resp.text.strip(): + return None + return resp.text + + +def _render_injected_section(matrix_item, installed): + """Render the injected skill reference section.""" + lines = [ + "## Installed CLI Skills", + "", + "Generated by `cli-hub matrix install` from the current local environment.", + "", + "| CLI | Entry Point | Canonical Skill | Local Skill | Status |", + "|---|---|---|---|---|", + ] + + for cli_name in matrix_item.get("clis", []): + cli = get_cli(cli_name) or {"name": cli_name, "entry_point": cli_name} + canonical_skill = cli.get("skill_md") or "—" + local_skill = resolve_local_skill_path(cli) or "—" + status = "installed" if cli_name in installed else "not installed" + lines.append( + f"| `{cli_name}` | `{cli.get('entry_point', '—')}` | " + f"{canonical_skill} | {local_skill} | {status} |" + ) + + capability_tooling = _render_capability_tooling(matrix_item, installed) + if capability_tooling: + lines.append("") + lines.append(capability_tooling) + + stage_tooling = _render_stage_tooling(matrix_item, installed) + if stage_tooling: + lines.append("") + lines.append(stage_tooling) + + discovery = _render_discovery_section(matrix_item) + if discovery: + lines.append("") + lines.append(discovery) + + return "\n".join(lines) + + +def _provider_installed(provider, installed): + """Return whether a CLI provider appears installed by cli-hub name.""" + if provider.get("kind") not in {"harness-cli", "public-cli"}: + return False + name = provider.get("name", "") + aliases = {name} + if name.startswith("cli-anything-"): + aliases.add(name.removeprefix("cli-anything-")) + return any(alias in installed for alias in aliases) + + +def _format_requires(provider): + requires = provider.get("requires") or {} + parts = [] + for key in ("binary", "env", "package"): + values = requires.get(key) or [] + if isinstance(values, str): + values = [values] + if values: + parts.append(f"{key}: {', '.join(values)}") + return "; ".join(parts) if parts else "none" + + +def _render_capability_tooling(matrix_item, installed): + """Render v2 capability/provider guidance for local matrix skills.""" + capabilities = matrix_item.get("capabilities", []) + if not capabilities: + return "" + + lines = [ + "## Capability Provider Overview", + "", + "Pick providers per capability from task constraints and preflight facts. CLI providers show cli-hub install status; non-CLI providers list their preflight requirements.", + "", + ] + + for capability in capabilities: + lines.append(f"### `{capability['id']}`") + if capability.get("intent"): + lines.append(capability["intent"]) + lines.append("") + + for provider in capability.get("providers", []): + kind = provider.get("kind", "provider") + quality = provider.get("quality_tier", "unknown") + cost = provider.get("cost_tier", "unknown") + offline = "offline" if provider.get("offline") else "online" + status = "" + if kind in {"harness-cli", "public-cli"}: + status = "installed" if _provider_installed(provider, installed) else "not installed" + status = f"; {status}" + requires = _format_requires(provider) + lines.append( + f"- `{provider.get('name', '')}` ({kind}; {quality}; {cost}; {offline}{status})" + ) + lines.append(f" - Requires: {requires}") + if provider.get("notes"): + lines.append(f" - Notes: {provider['notes']}") + + lines.append("") + + recipes = matrix_item.get("recipes", []) + if recipes: + lines.append("## Recipes") + lines.append("") + for recipe in recipes: + capabilities_used = ", ".join(f"`{item}`" for item in recipe.get("capabilities_used", [])) + lines.append(f"- `{recipe['id']}`: {recipe.get('description', '')}") + if capabilities_used: + lines.append(f" - Uses: {capabilities_used}") + lines.append("") + + known_gaps = matrix_item.get("known_gaps", []) + if known_gaps: + lines.append("## Known Gaps") + lines.append("") + for gap in known_gaps: + lines.append(f"- `{gap.get('capability', 'unknown')}`: {gap.get('reason', '')}") + if gap.get("workaround"): + lines.append(f" - Workaround: {gap['workaround']}") + lines.append("") + + return "\n".join(lines).rstrip() + + +def _render_stage_tooling(matrix_item, installed): + """Render per-stage tooling overview with goals and alternatives.""" + stages = matrix_item.get("stages", []) + has_goals = any(s.get("goal") for s in stages) + if not has_goals: + return "" + + lines = [ + "## Stage Tooling Overview", + "", + "What is available for each stage on this system.", + "", + ] + + for stage in stages: + goal = stage.get("goal") + if not goal: + continue + + lines.append(f"### {stage['name']}") + lines.append(f"**Goal:** {goal}") + lines.append("") + + for cli_name in stage.get("clis", []): + marker = "installed" if cli_name in installed else "not installed" + lines.append(f"- CLI: `{cli_name}` ({marker})") + + alts = stage.get("alternatives", {}) + if alts.get("python"): + lines.append(f"- Python: {', '.join(alts['python'])}") + if alts.get("api"): + lines.append(f"- APIs: {', '.join(alts['api'])}") + if alts.get("native"): + lines.append(f"- Native: {', '.join(alts['native'])}") + + lines.append("") + + return "\n".join(lines) + + +def _render_discovery_section(matrix_item): + """Registry search hints are metadata; generated skills list concrete providers.""" + return "" diff --git a/cli-hub/setup.py b/cli-hub/setup.py index 936899224..6274164ae 100644 --- a/cli-hub/setup.py +++ b/cli-hub/setup.py @@ -1,6 +1,52 @@ """cli-hub — package manager for CLI-Anything harnesses.""" +import shutil +from pathlib import Path + from setuptools import setup, find_packages +from setuptools.command.build_py import build_py as _build_py +from setuptools.command.sdist import sdist as _sdist + +HERE = Path(__file__).resolve().parent + +# Matrix skill content lives at the repo root (outside this package dir). +# It is vendored into cli_hub/_matrix_data/ at build time so wheels and +# sdists ship real matrix content for users without a repo checkout. +# Editable installs (`pip install -e`) do not need the vendored copy: the +# runtime lookup chain in cli_hub/matrix_skill.py finds the checkout first. +MATRIX_CONTENT_SOURCE = HERE.parent / "cli-hub-matrix" +MATRIX_DATA_DIR = HERE / "cli_hub" / "_matrix_data" + + +def _sync_matrix_data(): + """Vendor cli-hub-matrix/ into cli_hub/_matrix_data/ (build artifact). + + No-op when building from an sdist (the data is already vendored) or when + the repo content is unavailable (runtime falls back to the published URL + or a stub). + """ + if not MATRIX_CONTENT_SOURCE.is_dir(): + return + if MATRIX_DATA_DIR.exists(): + shutil.rmtree(MATRIX_DATA_DIR) + shutil.copytree( + MATRIX_CONTENT_SOURCE, + MATRIX_DATA_DIR, + ignore=shutil.ignore_patterns("__pycache__", "*.pyc", "*.pyo"), + ) + + +class build_py(_build_py): + def run(self): + _sync_matrix_data() + super().run() + + +class sdist(_sdist): + def run(self): + _sync_matrix_data() + super().run() + setup( name="cli-anything-hub", @@ -19,6 +65,15 @@ }, license="MIT", packages=find_packages(exclude=["tests", "tests.*"]), + cmdclass={"build_py": build_py, "sdist": sdist}, + include_package_data=True, + package_data={ + "cli_hub": [ + "_matrix_data/*/SKILL.md", + "_matrix_data/*/references/*", + "_matrix_data/*/scripts/*", + ], + }, python_requires=">=3.10", install_requires=[ "click>=8.0", diff --git a/cli-hub/tests/test_cli_hub.py b/cli-hub/tests/test_cli_hub.py index ce2edc365..dba729ac4 100644 --- a/cli-hub/tests/test_cli_hub.py +++ b/cli-hub/tests/test_cli_hub.py @@ -12,6 +12,29 @@ from cli_hub import __version__ from cli_hub.registry import fetch_registry, fetch_all_clis, get_cli, search_clis, list_categories +from cli_hub.matrix import ( + _package_available, + all_recipes, + capability_matches, + check_provider_requirements, + fetch_matrix_registry, + fetch_all_matrices, + get_matrix, + preflight_matrix, + provider_cli_name, + provider_install_hint, + resolve_install_scope, + search_capabilities, + search_matrices, + unmanaged_providers, +) +from cli_hub.matrix_skill import ( + resolve_local_skill_path, + render_matrix_skill_file, + _render_capability_tooling, + _render_stage_tooling, + _render_discovery_section, +) from cli_hub.preview import ( inspect_bundle, inspect_session, @@ -23,6 +46,7 @@ ) from cli_hub.installer import ( install_cli, + install_matrix, uninstall_cli, get_installed, _load_installed, @@ -85,6 +109,106 @@ ], } +SAMPLE_MATRIX_REGISTRY = { + "meta": {"repo": "https://github.com/HKUDS/CLI-Anything", "description": "test matrices"}, + "matrices": [ + { + "name": "video-creation", + "display_name": "Video Creation & Editing", + "description": "Curated video workflow matrix", + "category": "video", + "matrix": "cli-matrix", + "matrix_id": "S1", + "schema_version": "2", + "skill_md": "cli-hub-matrix/video-creation/SKILL.md", + "clis": ["gimp", "blender", "audacity"], + "stages": [ + { + "name": "Thumbnail", + "clis": ["gimp"], + "goal": "Create a thumbnail image", + "alternatives": {"python": ["Pillow"], "native": ["ImageMagick convert"]}, + "skill_search_hints": ["thumbnail", "image editing"], + }, + {"name": "3D", "clis": ["blender"]}, + { + "name": "Audio", + "clis": ["audacity"], + "goal": "Edit and process audio", + "alternatives": {"python": ["pydub"], "native": ["sox"]}, + "skill_search_hints": ["audio editing"], + }, + ], + "capabilities": [ + { + "id": "package.thumbnail", + "intent": "Create a thumbnail image", + "inputs": ["concept:text"], + "outputs": ["image:path"], + "skill_search_hints": ["thumbnail", "image editing"], + "providers": [ + { + "kind": "harness-cli", + "name": "cli-anything-gimp", + "requires": {"binary": ["cli-anything-gimp"]}, + "cost_tier": "free", + "quality_tier": "high", + "offline": True, + }, + { + "kind": "python", + "name": "Pillow", + "requires": {"package": ["PIL"]}, + "cost_tier": "free", + "quality_tier": "good", + "offline": True, + }, + ], + }, + { + "id": "audio.capture", + "intent": "Edit and process audio", + "inputs": ["source:mic|file"], + "outputs": ["audio_clip:path"], + "skill_search_hints": ["audio editing"], + "providers": [ + { + "kind": "harness-cli", + "name": "cli-anything-audacity", + "requires": {"binary": ["cli-anything-audacity"]}, + "cost_tier": "free", + "quality_tier": "high", + "offline": True, + }, + { + "kind": "native", + "name": "sox", + "requires": {"binary": ["sox"]}, + "cost_tier": "free", + "quality_tier": "high", + "offline": True, + }, + ], + }, + ], + "recipes": [ + { + "id": "social-short", + "description": "Create a short with a thumbnail and cleaned audio.", + "capabilities_used": ["package.thumbnail", "audio.capture"], + } + ], + "known_gaps": [ + { + "capability": "publish.upload", + "reason": "No platform upload CLI yet.", + "workaround": "Ask the user to upload manually.", + } + ], + } + ], +} + def _make_preview_bundle(tmp_path: Path, *, with_trajectory: bool = False) -> Path: bundle_dir = tmp_path / "preview-bundle" @@ -338,6 +462,300 @@ def test_list_categories(self, mock_fetch): assert cats == ["3d", "audio", "image"] +class TestMatrixRegistry: + """Tests for matrix.py — fetch, cache, search, and lookup.""" + + @patch("cli_hub.matrix.requests.get") + @patch("cli_hub.matrix.MATRIX_CACHE_FILE", Path(tempfile.mktemp())) + def test_fetch_matrix_registry_from_remote(self, mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = SAMPLE_MATRIX_REGISTRY + mock_resp.raise_for_status = MagicMock() + mock_get.return_value = mock_resp + + result = fetch_matrix_registry(force_refresh=True) + assert result["matrices"][0]["name"] == "video-creation" + mock_get.assert_called_once() + + @patch("cli_hub.matrix._load_local_registry", return_value=SAMPLE_MATRIX_REGISTRY) + @patch("cli_hub.matrix.requests.get", side_effect=requests.HTTPError("not found")) + def test_fetch_matrix_registry_falls_back_to_local_checkout(self, mock_get, mock_local, tmp_path): + with patch("cli_hub.matrix.MATRIX_CACHE_FILE", tmp_path / "matrix_cache.json"): + result = fetch_matrix_registry(force_refresh=True) + assert result["matrices"][0]["name"] == "video-creation" + mock_local.assert_called_once() + + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_get_matrix_found(self, mock_fetch): + matrix_item = get_matrix("video-creation") + assert matrix_item is not None + assert matrix_item["display_name"] == "Video Creation & Editing" + + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_search_matrices_matches_description(self, mock_fetch): + results = search_matrices("video") + assert len(results) == 1 + assert results[0]["name"] == "video-creation" + + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_search_matrices_matches_capability_provider(self, mock_fetch): + results = search_matrices("Pillow") + assert len(results) == 1 + assert results[0]["name"] == "video-creation" + + @patch("cli_hub.matrix.importlib.util.find_spec") + @patch("cli_hub.matrix.shutil.which") + def test_check_provider_requirements(self, mock_which, mock_find_spec): + provider = { + "name": "Pillow", + "kind": "python", + "requires": {"package": ["PIL"], "binary": ["ffmpeg"], "env": ["MISSING_KEY"]}, + "cost_tier": "free", + "quality_tier": "good", + "offline": True, + } + mock_which.return_value = "/usr/bin/ffmpeg" + mock_find_spec.return_value = MagicMock() + + result = check_provider_requirements(provider) + assert result["available"] is False + assert result["present"]["binary"] == ["ffmpeg"] + assert result["present"]["package"] == ["PIL"] + assert result["missing"]["env"] == ["MISSING_KEY"] + + @patch("cli_hub.matrix.importlib.util.find_spec") + @patch("cli_hub.matrix.shutil.which") + def test_check_provider_requirements_marks_agent_skill_installable(self, mock_which, mock_find_spec): + provider = { + "name": "video-scriptwriting skill", + "kind": "agent-skill", + "requires": {"binary": ["some-skill-cli"]}, + "cost_tier": "free", + "quality_tier": "sota", + "offline": True, + } + + result = check_provider_requirements(provider) + assert result["available"] is False + assert result["agent_installable"] is True + assert result["status"] == "agent-installable" + assert result["missing"] == {"env": [], "binary": [], "package": []} + mock_which.assert_not_called() + mock_find_spec.assert_not_called() + + @patch("cli_hub.matrix.importlib.util.find_spec") + @patch("cli_hub.matrix.shutil.which") + def test_preflight_matrix_reports_provider_availability_without_recommendation(self, mock_which, mock_find_spec): + mock_which.side_effect = lambda binary: "/usr/bin/sox" if binary == "sox" else None + mock_find_spec.side_effect = lambda package: MagicMock() if package == "PIL" else None + + payload = preflight_matrix(SAMPLE_MATRIX_REGISTRY["matrices"][0], capability_id="package.thumbnail") + assert payload["summary"]["capabilities"] == 1 + assert payload["summary"]["available_providers"] == 1 + assert "recommended" not in payload["capabilities"][0] + assert payload["capabilities"][0]["providers"][0]["name"] == "cli-anything-gimp" + assert payload["capabilities"][0]["providers"][0]["available"] is False + assert payload["capabilities"][0]["providers"][1]["name"] == "Pillow" + assert payload["capabilities"][0]["providers"][1]["available"] is True + + @patch("cli_hub.matrix.shutil.which", return_value=None) + def test_preflight_matrix_keeps_agent_skills_out_of_available_counts(self, mock_which): + matrix_item = { + "name": "video-creation", + "display_name": "Video Creation & Editing", + "schema_version": "2", + "capabilities": [ + { + "id": "script.storyboard", + "intent": "Plan a video", + "providers": [ + { + "kind": "agent-skill", + "name": "video-scriptwriting skill", + "requires": {}, + "cost_tier": "free", + "quality_tier": "sota", + "offline": True, + }, + { + "kind": "native", + "name": "planner-cli", + "requires": {"binary": ["planner-cli"]}, + "cost_tier": "free", + "quality_tier": "good", + "offline": True, + }, + ], + } + ], + } + + payload = preflight_matrix(matrix_item, capability_id="script.storyboard") + assert payload["summary"]["available_providers"] == 0 + assert payload["summary"]["agent_installable_providers"] == 1 + assert payload["summary"]["with_agent_installable_provider"] == 1 + assert payload["capabilities"][0]["available_count"] == 0 + assert payload["capabilities"][0]["agent_installable_count"] == 1 + assert "recommended" not in payload["capabilities"][0] + assert payload["capabilities"][0]["providers"][0]["status"] == "agent-installable" + + +class TestPackageAvailable: + """Tests for _package_available() — import name, dist name, and error handling.""" + + def test_stdlib_import_name_detected(self): + assert _package_available("json") is True + + @patch("cli_hub.matrix.importlib.util.find_spec", return_value=None) + @patch("cli_hub.matrix.importlib.metadata.version", return_value="7.2.3") + def test_dist_name_with_dash_detected_via_metadata(self, mock_version, mock_find_spec): + assert _package_available("edge-tts") is True + mock_version.assert_called_with("edge-tts") + + @patch("cli_hub.matrix.importlib.util.find_spec") + @patch("cli_hub.matrix.importlib.metadata.version", side_effect=Exception("not found")) + def test_dash_normalized_to_underscore_detected_via_find_spec(self, mock_version, mock_find_spec): + mock_find_spec.side_effect = lambda n: MagicMock() if n == "edge_tts" else None + assert _package_available("edge-tts") is True + + @patch("cli_hub.matrix.importlib.util.find_spec", return_value=None) + @patch("cli_hub.matrix.importlib.metadata.version", side_effect=Exception("not found")) + def test_uninstalled_garbage_name_returns_false(self, mock_version, mock_find_spec): + assert _package_available("xyzzy-totally-fake-pkg-99999") is False + + @patch("cli_hub.matrix.importlib.util.find_spec", side_effect=RuntimeError("boom")) + @patch("cli_hub.matrix.importlib.metadata.version", side_effect=RuntimeError("boom")) + def test_exceptions_are_swallowed_returns_false(self, mock_version, mock_find_spec): + assert _package_available("some-pkg") is False + + @patch("cli_hub.matrix.importlib.util.find_spec", return_value=MagicMock()) + def test_plain_import_name_detected_via_find_spec(self, mock_find_spec): + assert _package_available("PIL") is True + mock_find_spec.assert_called_with("PIL") + + +class TestMatrixSkill: + """Tests for matrix_skill.py — local skill resolution and rendering.""" + + @patch("cli_hub.matrix_skill.metadata.distribution") + def test_resolve_local_skill_path_from_distribution(self, mock_distribution, tmp_path): + class FakeDist: + files = [Path("cli_anything/audacity/skills/SKILL.md")] + + def locate_file(self, file): + return tmp_path / file + + mock_distribution.return_value = FakeDist() + cli = {"name": "audacity", "_source": "harness"} + resolved = resolve_local_skill_path(cli) + assert resolved == str((tmp_path / "cli_anything/audacity/skills/SKILL.md").resolve()) + + @patch("cli_hub.matrix_skill.MATRIX_SKILL_DIR", Path(tempfile.mkdtemp())) + @patch("cli_hub.matrix_skill.resolve_local_skill_path") + @patch("cli_hub.matrix_skill.get_cli") + def test_render_matrix_skill_file_injects_paths(self, mock_get_cli, mock_resolve): + mock_get_cli.side_effect = lambda name: next((c for c in SAMPLE_REGISTRY["clis"] if c["name"] == name), None) + mock_resolve.side_effect = lambda cli: f"/tmp/{cli['name']}/skills/SKILL.md" if cli["name"] != "blender" else None + + rendered = render_matrix_skill_file(SAMPLE_MATRIX_REGISTRY["matrices"][0], installed={"gimp": {}, "audacity": {}}) + content = Path(rendered).read_text() + assert "## Installed CLI Skills" in content + assert "/tmp/gimp/skills/SKILL.md" in content + assert "skills/cli-anything-gimp/SKILL.md" in content + assert "## Capability Provider Overview" in content + assert "not installed" in content + + def test_render_capability_tooling_includes_providers_and_recipes(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_capability_tooling(matrix_item, installed={"gimp": {}}) + assert "## Capability Provider Overview" in result + assert "`package.thumbnail`" in result + assert "`cli-anything-gimp`" in result + assert "binary: cli-anything-gimp" in result + assert "## Recipes" in result + assert "`social-short`" in result + assert "## Known Gaps" in result + + +class TestMultiApproachRendering: + """Tests for multi-approach stage rendering in matrix_skill.py.""" + + def test_render_stage_tooling_includes_goals(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_stage_tooling(matrix_item, installed={"gimp": {}}) + assert "## Stage Tooling Overview" in result + assert "Create a thumbnail image" in result + assert "Edit and process audio" in result + + def test_render_stage_tooling_includes_alternatives(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_stage_tooling(matrix_item, installed={}) + assert "Pillow" in result + assert "pydub" in result + assert "sox" in result + assert "ImageMagick convert" in result + + def test_render_stage_tooling_shows_install_status(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_stage_tooling(matrix_item, installed={"gimp": {}}) + assert "`gimp` (installed)" in result + assert "`audacity` (not installed)" in result + + def test_render_stage_tooling_omits_skill_search_hints(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_stage_tooling(matrix_item, installed={}) + assert "npx skills search" not in result + assert "Search for skills" not in result + + def test_render_stage_tooling_backward_compat_no_goal(self): + """Stages without 'goal' field are skipped gracefully.""" + matrix_no_goals = { + "name": "test", + "stages": [ + {"name": "Stage1", "clis": ["foo"]}, + ], + } + result = _render_stage_tooling(matrix_no_goals, installed={}) + assert result == "" + + def test_render_discovery_section(self): + matrix_item = SAMPLE_MATRIX_REGISTRY["matrices"][0] + result = _render_discovery_section(matrix_item) + assert result == "" + + def test_render_discovery_section_uses_capability_hints(self): + matrix_item = { + "name": "test", + "capabilities": [ + {"id": "publish.upload", "skill_search_hints": ["youtube upload"]}, + ], + } + result = _render_discovery_section(matrix_item) + assert result == "" + + def test_render_discovery_section_empty_when_no_hints(self): + matrix_no_hints = { + "name": "test", + "stages": [{"name": "S1", "clis": ["foo"]}], + } + result = _render_discovery_section(matrix_no_hints) + assert result == "" + + @patch("cli_hub.matrix_skill.MATRIX_SKILL_DIR", Path(tempfile.mkdtemp())) + @patch("cli_hub.matrix_skill.resolve_local_skill_path") + @patch("cli_hub.matrix_skill.get_cli") + def test_render_matrix_skill_file_includes_stage_tooling(self, mock_get_cli, mock_resolve): + mock_get_cli.side_effect = lambda name: next((c for c in SAMPLE_REGISTRY["clis"] if c["name"] == name), None) + mock_resolve.return_value = None + + rendered = render_matrix_skill_file(SAMPLE_MATRIX_REGISTRY["matrices"][0], installed={"gimp": {}}) + content = Path(rendered).read_text() + assert "## Stage Tooling Overview" in content + assert "## Skill Discovery Commands" not in content + assert "npx skills search" not in content + assert "Create a thumbnail image" in content + + class TestPreviewBundle: """Tests for preview bundle inspection and HTML rendering.""" @@ -1087,7 +1505,204 @@ def test_help(self, mock_detect, mock_visit, mock_first_run): mock_detect.return_value = self.human_detection result = self.runner.invoke(main, ["--help"]) assert "cli-hub" in result.output + assert "matrix" in result.output + assert "previews" in result.output + assert result.exit_code == 0 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + @patch("cli_hub.cli.get_installed", return_value={"gimp": {"version": "1.0.0"}}) + def test_matrix_list_command(self, mock_installed, mock_fetch_matrices, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "list"]) + assert "video-creation" in result.output + assert "1/3" in result.output + assert result.exit_code == 0 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.search_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + @patch("cli_hub.cli.get_installed", return_value={"gimp": {"version": "1.0.0"}}) + def test_matrix_search_command(self, mock_installed, mock_search, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "search", "video"]) + assert "video-creation" in result.output + assert "1/3" in result.output + assert result.exit_code == 0 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.search_matrices", return_value=[]) + def test_matrix_search_no_results(self, mock_search, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "search", "nonexistent"]) + assert "No matrices matching" in result.output + assert result.exit_code == 0 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.get_matrix", return_value=SAMPLE_MATRIX_REGISTRY["matrices"][0]) + @patch("cli_hub.cli.get_installed", return_value={"gimp": {"version": "1.0.0"}}) + @patch("cli_hub.cli.get_rendered_matrix_skill_path", return_value=Path("/tmp/video-creation.SKILL.md")) + @patch("pathlib.Path.exists", return_value=True) + def test_matrix_info_command( + self, + mock_exists, + mock_rendered, + mock_installed, + mock_get_matrix, + mock_detect, + mock_visit, + mock_first_run, + ): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "info", "video-creation"]) + assert "Video Creation & Editing" in result.output + assert "cli-hub matrix install video-creation" in result.output + assert "cli-hub-matrix/video-creation/SKILL.md" in result.output + assert "Local skill: /tmp/video-creation.SKILL.md" in result.output + assert "Capabilities:" in result.output + assert "package.thumbnail" in result.output + assert "Known Gaps:" in result.output + assert result.exit_code == 0 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.get_matrix", return_value=SAMPLE_MATRIX_REGISTRY["matrices"][0]) + @patch("cli_hub.cli.preflight_matrix") + def test_matrix_preflight_command(self, mock_preflight, mock_get_matrix, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + mock_preflight.return_value = { + "matrix": {"display_name": "Video Creation & Editing"}, + "offline": False, + "summary": { + "capabilities": 1, + "with_available_provider": 1, + "providers": 2, + "available_providers": 1, + "covered": 1, + "gaps": 0, + }, + "capabilities": [ + { + "id": "package.thumbnail", + "intent": "Create a thumbnail image", + "providers": [ + { + "name": "Pillow", + "kind": "python", + "available": True, + "quality_tier": "good", + "cost_tier": "free", + "missing": {"env": [], "binary": [], "package": []}, + } + ], + } + ], + } + result = self.runner.invoke(main, ["matrix", "preflight", "video-creation"]) assert result.exit_code == 0 + assert "Video Creation & Editing Preflight" in result.output + assert "Recommended:" not in result.output + assert "Pillow [python; good; free]" in result.output + mock_preflight.assert_called_once_with( + SAMPLE_MATRIX_REGISTRY["matrices"][0], + capability_id=None, + offline=False, + capability_ids=None, + ) + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.get_matrix", return_value=SAMPLE_MATRIX_REGISTRY["matrices"][0]) + @patch("cli_hub.cli.preflight_matrix") + def test_matrix_preflight_command_renders_agent_skills_separately( + self, + mock_preflight, + mock_get_matrix, + mock_detect, + mock_visit, + mock_first_run, + ): + mock_detect.return_value = self.human_detection + mock_preflight.return_value = { + "matrix": {"display_name": "Video Creation & Editing"}, + "offline": False, + "summary": { + "capabilities": 1, + "with_available_provider": 0, + "with_agent_installable_provider": 1, + "providers": 1, + "available_providers": 0, + "agent_installable_providers": 1, + "covered": 1, + "gaps": 0, + }, + "capabilities": [ + { + "id": "script.storyboard", + "intent": "Plan a video", + "providers": [ + { + "name": "video-scriptwriting skill", + "kind": "agent-skill", + "available": False, + "agent_installable": True, + "quality_tier": "sota", + "cost_tier": "free", + "missing": {"env": [], "binary": [], "package": []}, + } + ], + } + ], + } + + result = self.runner.invoke(main, ["matrix", "preflight", "video-creation"]) + assert result.exit_code == 0 + assert "1 agent-installable skill provider is not counted as installed or missing" in result.output + assert "Recommended:" not in result.output + assert "Agent-installable:" not in result.output + assert "video-scriptwriting skill [agent-skill; sota; free] agent-installable" in result.output + assert "missing:" not in result.output + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.install_matrix", return_value=(False, { + "matrix": SAMPLE_MATRIX_REGISTRY["matrices"][0], + "results": [ + {"name": "gimp", "status": "skipped", "message": "Already installed"}, + {"name": "blender", "status": "installed", "message": "Installed Blender"}, + {"name": "audacity", "status": "failed", "message": "Install failed"}, + ], + "summary": {"installed": 1, "skipped": 1, "failed": 1}, + "rendered_skill_path": "/tmp/video-creation.SKILL.md", + })) + def test_matrix_install_command_partial_failure(self, mock_install_matrix, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "install", "video-creation"]) + # Partial failure (some installed, some failed) → exit code 3 per the contract. + assert result.exit_code == 3 + assert "Summary: 1 installed, 1 skipped, 1 failed" in result.output + assert "Matrix skill: cli-hub-matrix/video-creation/SKILL.md" in result.output + assert "Local matrix skill: /tmp/video-creation.SKILL.md" in result.output + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.install_matrix", return_value=(False, {"error": "Matrix 'missing' not found."})) + def test_matrix_install_command_not_found(self, mock_install_matrix, mock_detect, mock_visit, mock_first_run): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "install", "missing"]) + assert result.exit_code == 1 + assert "not found" in result.output @patch("cli_hub.cli.track_first_run") @patch("cli_hub.cli.track_visit") @@ -1228,3 +1843,179 @@ def test_launch_unknown_cli(self, mock_get, mock_detect, mock_visit, mock_first_ result = self.runner.invoke(main, ["launch", "nonexistent"]) assert result.exit_code == 1 assert "not found" in result.output + + +SAMPLE_MATRIX = SAMPLE_MATRIX_REGISTRY["matrices"][0] + + +class TestMatrixScopeHelpers: + """Pure-function tests for provider↔CLI resolution and install scoping (F2.2).""" + + def test_provider_cli_name_strips_harness_prefix(self): + gimp = SAMPLE_MATRIX["capabilities"][0]["providers"][0] + assert provider_cli_name(gimp, SAMPLE_MATRIX["clis"]) == "gimp" + + def test_provider_cli_name_none_for_non_installable(self): + pillow = SAMPLE_MATRIX["capabilities"][0]["providers"][1] # python + assert provider_cli_name(pillow, SAMPLE_MATRIX["clis"]) is None + + def test_provider_cli_name_explicit_field_wins(self): + provider = {"kind": "public-cli", "name": "Whatever", "cli": "blender"} + assert provider_cli_name(provider, SAMPLE_MATRIX["clis"]) == "blender" + + def test_scope_all_returns_every_cli(self): + scope = resolve_install_scope(SAMPLE_MATRIX) + assert scope["error"] is None + assert scope["cli_names"] == ["gimp", "blender", "audacity"] + assert scope["scope"]["type"] == "all" + + def test_scope_capability_maps_to_clis(self): + scope = resolve_install_scope(SAMPLE_MATRIX, capability="package.thumbnail") + assert scope["error"] is None + assert scope["cli_names"] == ["gimp"] + + def test_scope_recipe_unions_capability_clis(self): + scope = resolve_install_scope(SAMPLE_MATRIX, recipe="social-short") + assert scope["error"] is None + # social-short uses package.thumbnail (gimp) + audio.capture (audacity), in clis[] order + assert scope["cli_names"] == ["gimp", "audacity"] + + def test_scope_only_validates_membership(self): + ok = resolve_install_scope(SAMPLE_MATRIX, only="gimp,audacity") + assert ok["error"] is None + assert ok["cli_names"] == ["gimp", "audacity"] + bad = resolve_install_scope(SAMPLE_MATRIX, only="gimp,bogus") + assert bad["error"] is not None and "bogus" in bad["error"] + + def test_scope_mutually_exclusive(self): + scope = resolve_install_scope(SAMPLE_MATRIX, capability="package.thumbnail", only="gimp") + assert scope["error"] is not None + + def test_scope_unknown_capability_errors(self): + scope = resolve_install_scope(SAMPLE_MATRIX, capability="nope") + assert scope["error"] is not None and "nope" in scope["error"] + + def test_unmanaged_providers_groups_by_kind(self): + groups = unmanaged_providers(SAMPLE_MATRIX) + assert groups.get("python") == ["Pillow"] + assert groups.get("native") == ["sox"] + + def test_provider_install_hint_derives_cli_hub_command(self): + gimp = SAMPLE_MATRIX["capabilities"][0]["providers"][0] + assert provider_install_hint(gimp, SAMPLE_MATRIX["clis"]) == "cli-hub install gimp" + + def test_provider_install_hint_prefers_explicit(self): + provider = {"kind": "public-cli", "name": "tool", "install_hint": "brew install tool"} + assert provider_install_hint(provider, SAMPLE_MATRIX["clis"]) == "brew install tool" + + +class TestCapabilitySearch: + """Capability-level search powering matrix search matched_in and `cli-hub can` (F1.1).""" + + def test_capability_matches_by_intent(self): + matches = capability_matches(SAMPLE_MATRIX, "thumbnail") + ids = {m["capability_id"] for m in matches} + assert "package.thumbnail" in ids + hit = next(m for m in matches if m["capability_id"] == "package.thumbnail") + assert hit["match_field"] in {"id", "intent", "hint"} + assert "cli-anything-gimp" in hit["providers_summary"] + + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_search_capabilities_includes_availability(self, mock_fetch): + with patch("cli_hub.matrix.shutil.which", return_value=None), \ + patch("cli_hub.matrix._package_available", return_value=False): + hits = search_capabilities("audio") + assert hits + hit = hits[0] + assert "providers" in hit and "available" in hit["providers"][0] + + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_all_recipes_filters_by_query(self, mock_fetch): + assert {r["id"] for r in all_recipes()} == {"social-short"} + assert all_recipes("nonexistent-recipe-xyz") == [] + + +class TestMatrixF1F2Commands: + """CLI-level tests for the F1/F2 matrix commands.""" + + def setup_method(self): + self.runner = click.testing.CliRunner() + self.human_detection = {"is_agent": False, "agent": None, "source": "tty"} + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_can_command_finds_capability(self, mock_fetch, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + with patch("cli_hub.matrix.shutil.which", return_value=None), \ + patch("cli_hub.matrix._package_available", return_value=False): + result = self.runner.invoke(main, ["can", "thumbnail"]) + assert result.exit_code == 0 + assert "package.thumbnail" in result.output + assert "cli-hub matrix preflight video-creation -c package.thumbnail" in result.output + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_can_command_no_match_exits_1(self, mock_fetch, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["can", "zzz-no-such-capability"]) + assert result.exit_code == 1 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.matrix.fetch_all_matrices", return_value=SAMPLE_MATRIX_REGISTRY["matrices"]) + def test_recipes_command_lists_recipes(self, mock_fetch, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "recipes"]) + assert result.exit_code == 0 + assert "social-short" in result.output + assert "preflight video-creation --recipe social-short" in result.output + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.get_installed", return_value={}) + @patch("cli_hub.installer.get_cli") + @patch("cli_hub.installer.get_matrix", return_value=SAMPLE_MATRIX) + def test_install_dry_run_no_side_effects(self, mock_get_matrix, mock_get_cli, + mock_installed, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + mock_get_cli.side_effect = lambda n: {"name": n, "display_name": n.title(), + "_source": "harness", "entry_point": n} + with patch("cli_hub.installer.install_cli") as mock_install: + result = self.runner.invoke( + main, ["matrix", "install", "video-creation", "--capability", "package.thumbnail", "--dry-run"]) + mock_install.assert_not_called() + assert result.exit_code == 0 + assert "Install plan" in result.output + assert "gimp" in result.output + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.installer.get_matrix", return_value=SAMPLE_MATRIX) + def test_install_unknown_capability_exits_2(self, mock_get_matrix, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + result = self.runner.invoke( + main, ["matrix", "install", "video-creation", "--capability", "nope", "--dry-run"]) + assert result.exit_code == 2 + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.doctor_matrix", return_value=(False, { + "matrix": SAMPLE_MATRIX, + "last_run": "2026-06-14T10:00:00", + "checks": [{"name": "gimp", "entry_point": "gimp", "status": "not_installed", + "detail": "Not installed", "fix": "cli-hub install gimp"}], + "summary": {"total": 1, "ok": 0, "broken": 0, "not_installed": 1}, + })) + def test_doctor_command_reports_gaps_exit_3(self, mock_doctor, mock_detect, mock_visit, mock_first): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "doctor", "video-creation"]) + assert result.exit_code == 3 + assert "cli-hub install gimp" in result.output diff --git a/cli-hub/tests/test_matrix_skill_dist.py b/cli-hub/tests/test_matrix_skill_dist.py new file mode 100644 index 000000000..d60b5cb07 --- /dev/null +++ b/cli-hub/tests/test_matrix_skill_dist.py @@ -0,0 +1,265 @@ +"""Tests for matrix skill distribution (P1-4): co-installed assets and the +content lookup chain in cli_hub/matrix_skill.py.""" + +from pathlib import Path +from unittest.mock import patch + +import pytest +import requests +import click.testing + +from cli_hub import matrix_skill +from cli_hub.matrix_skill import ( + get_rendered_matrix_skill_path, + render_matrix_skill_file, +) +from cli_hub.cli import main + + +def _make_content_dir(root, name="demo", with_pycache=False): + """Create a fake repo checkout with cli-hub-matrix// content.""" + content = root / "cli-hub-matrix" / name + (content / "references").mkdir(parents=True) + (content / "scripts").mkdir(parents=True) + (content / "SKILL.md").write_text( + "# Demo Matrix\n\nRead [`references/guide.md`](references/guide.md) " + "and run `scripts/doctor.py`.\n", + encoding="utf-8", + ) + (content / "references" / "guide.md").write_text("guide", encoding="utf-8") + (content / "scripts" / "doctor.py").write_text("print('ok')", encoding="utf-8") + if with_pycache: + pycache = content / "scripts" / "__pycache__" + pycache.mkdir() + (pycache / "doctor.cpython-310.pyc").write_bytes(b"\x00") + (content / "scripts" / "stray.pyc").write_bytes(b"\x00") + return content + + +def _demo_matrix(name="demo"): + return { + "name": name, + "display_name": "Demo Matrix", + "description": "A demo matrix.", + "skill_md": f"cli-hub-matrix/{name}/SKILL.md", + "clis": [], + } + + +class TestAssetCoInstall: + """references/ and scripts/ land beside the rendered SKILL.md.""" + + def test_references_and_scripts_copied_beside_skill(self, tmp_path, monkeypatch): + repo = tmp_path / "repo" + _make_content_dir(repo) + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: repo) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + + assert rendered == skill_dir / "demo" / "SKILL.md" + assert rendered.exists() + # Relative links in the skill resolve in the installed layout. + assert (rendered.parent / "references" / "guide.md").exists() + assert (rendered.parent / "scripts" / "doctor.py").exists() + content = rendered.read_text(encoding="utf-8") + assert "references/guide.md" in content + assert "MATRIX_SKILL_PATHS:START" in content + # Assets were found locally, so no remote-fallback note is injected. + assert "will not resolve locally" not in content + + def test_pycache_and_pyc_excluded(self, tmp_path, monkeypatch): + repo = tmp_path / "repo" + _make_content_dir(repo, with_pycache=True) + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: repo) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + + installed_files = [p.name for p in rendered.parent.rglob("*")] + assert "doctor.py" in installed_files + assert "__pycache__" not in installed_files + assert not any(name.endswith(".pyc") for name in installed_files) + + def test_reinstall_is_idempotent_and_removes_stale_files(self, tmp_path, monkeypatch): + repo = tmp_path / "repo" + content = _make_content_dir(repo) + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: repo) + + first = render_matrix_skill_file(_demo_matrix(), installed={}) + # Simulate drift: stale file in the installed copy, updated source. + (first.parent / "references" / "stale.md").write_text("old", encoding="utf-8") + (content / "references" / "guide.md").write_text("guide v2", encoding="utf-8") + + second = render_matrix_skill_file(_demo_matrix(), installed={}) + + assert second == first + assert not (second.parent / "references" / "stale.md").exists() + assert (second.parent / "references" / "guide.md").read_text(encoding="utf-8") == "guide v2" + + +class TestLookupChain: + """Checkout -> bundled data -> published URL -> stub.""" + + def test_bundled_data_used_when_no_checkout(self, tmp_path, monkeypatch): + bundled_root = tmp_path / "bundled" + _make_content_dir(bundled_root) + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: None) + monkeypatch.setattr( + matrix_skill, "BUNDLED_MATRIX_DATA_DIR", bundled_root / "cli-hub-matrix" + ) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + + content = rendered.read_text(encoding="utf-8") + assert "# Demo Matrix" in content + assert (rendered.parent / "references" / "guide.md").exists() + assert (rendered.parent / "scripts" / "doctor.py").exists() + + def test_published_url_used_when_no_local_content(self, tmp_path, monkeypatch): + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: None) + monkeypatch.setattr( + matrix_skill, "BUNDLED_MATRIX_DATA_DIR", tmp_path / "missing" + ) + + class FakeResponse: + status_code = 200 + text = "# Demo Matrix (published)\n\npublished body\n" + + requested = {} + + def fake_get(url, timeout): + requested["url"] = url + return FakeResponse() + + monkeypatch.setattr(matrix_skill.requests, "get", fake_get) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + + content = rendered.read_text(encoding="utf-8") + assert "# Demo Matrix (published)" in content + assert requested["url"] == ( + f"{matrix_skill.MATRIX_CONTENT_BASE_URL}/demo/SKILL.md" + ) + # No local assets: the rendered skill points at the published copies. + assert not (rendered.parent / "references").exists() + assert "will not resolve locally" in content + assert f"{matrix_skill.MATRIX_CONTENT_BASE_URL}/demo/" in content + + def test_stub_used_when_everything_else_fails(self, tmp_path, monkeypatch): + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: None) + monkeypatch.setattr( + matrix_skill, "BUNDLED_MATRIX_DATA_DIR", tmp_path / "missing" + ) + + def fake_get(url, timeout): + raise requests.ConnectionError("offline") + + monkeypatch.setattr(matrix_skill.requests, "get", fake_get) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + + content = rendered.read_text(encoding="utf-8") + assert "# Demo Matrix" in content + assert "A demo matrix." in content + assert "Install with `cli-hub matrix install demo`." in content + + def test_published_url_non_200_falls_back_to_stub(self, tmp_path, monkeypatch): + skill_dir = tmp_path / "home" / ".cli-hub" / "matrix" + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", skill_dir) + monkeypatch.setattr(matrix_skill, "_find_repo_root", lambda: None) + monkeypatch.setattr( + matrix_skill, "BUNDLED_MATRIX_DATA_DIR", tmp_path / "missing" + ) + + class FakeResponse: + status_code = 404 + text = "Not Found" + + monkeypatch.setattr(matrix_skill.requests, "get", lambda url, timeout: FakeResponse()) + + rendered = render_matrix_skill_file(_demo_matrix(), installed={}) + assert "Install with `cli-hub matrix install demo`." in rendered.read_text(encoding="utf-8") + + +class TestRenderedPathCompat: + """get_rendered_matrix_skill_path prefers the new layout, keeps legacy.""" + + def test_prefers_directory_layout(self, tmp_path, monkeypatch): + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", tmp_path) + new_path = tmp_path / "demo" / "SKILL.md" + new_path.parent.mkdir(parents=True) + new_path.write_text("new", encoding="utf-8") + (tmp_path / "demo.SKILL.md").write_text("legacy", encoding="utf-8") + + assert get_rendered_matrix_skill_path("demo") == new_path + + def test_falls_back_to_legacy_flat_file(self, tmp_path, monkeypatch): + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", tmp_path) + legacy = tmp_path / "demo.SKILL.md" + legacy.write_text("legacy", encoding="utf-8") + + assert get_rendered_matrix_skill_path("demo") == legacy + + def test_defaults_to_directory_layout_when_nothing_exists(self, tmp_path, monkeypatch): + monkeypatch.setattr(matrix_skill, "MATRIX_SKILL_DIR", tmp_path) + assert get_rendered_matrix_skill_path("demo") == tmp_path / "demo" / "SKILL.md" + + +class TestSkillOnlyInstall: + """`cli-hub matrix install --skill-only` renders without CLI installs.""" + + def setup_method(self): + self.runner = click.testing.CliRunner() + self.human_detection = { + "is_agent": False, + "traffic_type": "human", + "category": "human", + "reason": "human", + "signals": [], + "stdin_tty": True, + "is_interactive": True, + } + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.render_matrix_skill_file") + @patch("cli_hub.cli.get_installed", return_value={}) + @patch("cli_hub.cli.get_matrix") + @patch("cli_hub.cli.install_matrix") + def test_skill_only_renders_and_skips_cli_installs( + self, mock_install_matrix, mock_get_matrix, mock_installed, + mock_render, mock_detect, mock_visit, mock_first_run, tmp_path, + ): + mock_detect.return_value = self.human_detection + mock_get_matrix.return_value = _demo_matrix() + mock_render.return_value = tmp_path / "demo" / "SKILL.md" + + result = self.runner.invoke(main, ["matrix", "install", "demo", "--skill-only"]) + + assert result.exit_code == 0 + assert "Local matrix skill:" in result.output + mock_render.assert_called_once() + mock_install_matrix.assert_not_called() + + @patch("cli_hub.cli.track_first_run") + @patch("cli_hub.cli.track_visit") + @patch("cli_hub.cli.detect_invocation_context") + @patch("cli_hub.cli.get_matrix", return_value=None) + def test_skill_only_unknown_matrix_fails( + self, mock_get_matrix, mock_detect, mock_visit, mock_first_run, + ): + mock_detect.return_value = self.human_detection + result = self.runner.invoke(main, ["matrix", "install", "missing", "--skill-only"]) + assert result.exit_code == 1 diff --git a/docs/hub/assets/demos/arcgis-pro-live-bridge-demo.gif b/docs/hub/assets/demos/arcgis-pro-live-bridge-demo.gif new file mode 100644 index 000000000..645a33fd2 Binary files /dev/null and b/docs/hub/assets/demos/arcgis-pro-live-bridge-demo.gif differ diff --git a/docs/hub/assets/demos/blender-orbital-relay-drone-preview-trajectory.gif b/docs/hub/assets/demos/blender-orbital-relay-drone-preview-trajectory.gif new file mode 100644 index 000000000..501b2eb2f Binary files /dev/null and b/docs/hub/assets/demos/blender-orbital-relay-drone-preview-trajectory.gif differ diff --git a/docs/hub/assets/demos/drawio-demo.gif b/docs/hub/assets/demos/drawio-demo.gif new file mode 100644 index 000000000..6a07b989e Binary files /dev/null and b/docs/hub/assets/demos/drawio-demo.gif differ diff --git a/docs/hub/assets/demos/drawio-https-handshake.png b/docs/hub/assets/demos/drawio-https-handshake.png new file mode 100644 index 000000000..83a7eb1c0 Binary files /dev/null and b/docs/hub/assets/demos/drawio-https-handshake.png differ diff --git a/docs/hub/assets/demos/freecad-curiosity-preview-trajectory.gif b/docs/hub/assets/demos/freecad-curiosity-preview-trajectory.gif new file mode 100644 index 000000000..65bb8eeb6 Binary files /dev/null and b/docs/hub/assets/demos/freecad-curiosity-preview-trajectory.gif differ diff --git a/docs/hub/assets/demos/slay-the-spire-ii-gameplay.gif b/docs/hub/assets/demos/slay-the-spire-ii-gameplay.gif new file mode 100644 index 000000000..dd87679dd Binary files /dev/null and b/docs/hub/assets/demos/slay-the-spire-ii-gameplay.gif differ diff --git a/docs/hub/assets/demos/videocaptioner-after.png b/docs/hub/assets/demos/videocaptioner-after.png new file mode 100644 index 000000000..fe30d9c18 Binary files /dev/null and b/docs/hub/assets/demos/videocaptioner-after.png differ diff --git a/docs/hub/assets/demos/videocaptioner-before.png b/docs/hub/assets/demos/videocaptioner-before.png new file mode 100644 index 000000000..f43628847 Binary files /dev/null and b/docs/hub/assets/demos/videocaptioner-before.png differ diff --git a/docs/hub/demos.html b/docs/hub/demos.html new file mode 100644 index 000000000..262c4bd49 --- /dev/null +++ b/docs/hub/demos.html @@ -0,0 +1,593 @@ + + + + + + + Real-World Demos — CLI-Anything Hub + + + + + + + + + + + + + + + + + + +
+
+ + + + + +
+ Real-World Demos +

Agents, doing real work

+

AI agents using generated CLIs to produce complete, useful artifacts — no GUI needed. Each run below is a real session: the agent reads state, issues commands, and ships an artifact you can open.

+
+ + +
+ + +
+
+
More CLI demos coming soon.
+

Built something with a CLI-Anything harness? Show it off.

+

Record a short run of your agent driving a generated CLI, and add it here. The Contributing Guide and PR Template walk you through it.

+ +
+
+ + + + + + + + + diff --git a/docs/hub/docs.html b/docs/hub/docs.html new file mode 100644 index 000000000..683620bd0 --- /dev/null +++ b/docs/hub/docs.html @@ -0,0 +1,718 @@ + + + + + + + Documentation — CLI-Anything Hub + + + + + + + + + + + + + + + + + + +
+
+ + + + + +
+ Documentation +

Drive any app from the command line

+

CLI-Hub is a registry and installer for agent-native CLIs. Install a tool by name, launch it, and — when a task spans several tools — pull in a capability-based Workflow Matrix. Every command is built for both humans and AI agents, with --json output and a documented exit-code contract.

+
+ +
+ + + +
+ + +
+ Getting started +

Install

+

CLI-Hub ships as the cli-anything-hub package and installs the cli-hub command. It needs Python 3.10 or newer.

+
$ pip install cli-anything-hub
+$ cli-hub --version
+

That single command is the entry point for everything below — browsing the catalog, installing tools, and working with matrices. From here you can also hand an agent the meta-skill SKILL.md so it can discover and install CLIs autonomously.

+
+ + +
+ Two-minute tour +

Quick start

+

Find a tool, install it, and run it — or scope a whole workflow to just what your task needs.

+
# Browse and install a single CLI
+$ cli-hub search diagram
+$ cli-hub install drawio
+$ cli-hub launch drawio --help
+
+# Ask what's needed for a task across every matrix
+$ cli-hub can "transcribe audio"
+
+# Check a matrix, then install only the capability you need
+$ cli-hub matrix preflight video-creation --summary
+$ cli-hub matrix install video-creation --capability text.transcribe
+
+ +

Tip. Add --json to almost any command for machine-readable output, and chain on exit codes (see Exit codes) to make scripts and agents react without parsing text.

+
+
+ + +
+ The catalog +

Catalog commands

+

The catalog is the flat list of individual CLIs — first-party harness tools published by CLI-Anything and curated public tools. These commands discover, install, and run them.

+ +
+
cli-hub list
+
List every available CLI, grouped by category, with an installed marker.
+
+
-c, --category <name>Filter to one category.
+
-s, --source <harness|public|npm|all>Filter by source (npm is an alias for public).
+
--jsonMachine-readable output.
+
+
+ +
+
cli-hub search <query>
+
Search CLIs by name, description, or category.
+
+
--jsonMachine-readable output.
+
+
+ +
+
cli-hub info <name>
+
Show full detail for one CLI — description, category, source, version, requirements, entry point, homepage, and contributors.
+
+ +
+
cli-hub install <name>
+
Install a CLI from the registry. Harness tools install via pip; public tools use their declared strategy (npm / uv / command).
+
+ +
+
cli-hub launch <name> [args…]
+
Launch an installed CLI, passing every argument after the name straight through to it.
+
+ +
+
cli-hub update <name>  ·  cli-hub uninstall <name>
+
Update an installed CLI to the latest version, or remove it.
+
+ +
+
cli-hub previews <inspect|html|watch|open>
+
Inspect, render, watch, or open preview bundles and live sessions produced by harness CLIs (e.g. FreeCAD / Blender preview loops). watch and open serve a live, auto-refreshing page.
+
+
+ + +
+ Concepts +

Workflow matrices

+

A real task — “make a captioned video” — rarely maps to one tool. A matrix bundles a domain into capabilities (the verbs an agent calls) and, for each, several providers (concrete ways to fulfil it). You pick a path by capability, not by memorising which of fourteen packages to install.

+ +
Matrix  ── a capability bundle for one domain (e.g. video-creation)
+
+├─ capabilities[]   verbs an agent can call — visual.generate, text.transcribe …
+│   └─ providers[]   concrete ways to fulfil a capability, each tagged with a kind
+
+├─ recipes[]        named subsets of capabilities for a specific task
+├─ clis[]           the flat list that matrix install actually installs
+└─ SKILL.md         the provider-selection handbook, rendered locally on install
+ +

Provider kinds

+

Every provider declares a kind that tells you how it’s obtained and what it costs:

+
+

harness-cli

A first-party CLI you can install with cli-hub; each ships its own SKILL.md.

+

public-cli

A third-party CLI installed via brew / npm / pip.

+

python

A Python library used directly (a free fallback).

+

native

A native binary on the system, such as ffmpeg.

+

api

A cloud API — needs a key and explicit user consent.

+

agent-skill

An external agent skill the agent installs itself; not counted by preflight.

+
+ +
+ +

Preflight reports, it doesn’t choose. Registry order is documentation order, not a ranking. preflight tells you what’s available; the agent picks the provider using the rules in the matrix’s SKILL.md. Paid APIs are never used without configured keys and user consent.

+
+
+ + +
+ The matrix command family +

Matrix commands

+

All matrix operations live under cli-hub matrix, plus the top-level cli-hub can shortcut for capability search. Every subcommand supports --json.

+ +

Discover

+
+
cli-hub can <query>
+
Search capabilities across all matrices for a task, with per-provider availability on this machine. Exit 0 if there are hits, 1 if none.
+
+
--jsonEmit capability matches as JSON.
+
+
+
$ cli-hub can "download bilibili video"
+  video.download  (video-creation)
+    available here: yt-dlp · missing: BBDown, lux
+    next: cli-hub matrix preflight video-creation -c video.download
+ +
+
cli-hub matrix list
+
List all matrices with their description and how many member CLIs are installed.
+
--jsonMachine-readable output.
+
+ +
+
cli-hub matrix search <query>
+
Search matrices by name, capability, provider, recipe, or gap — with match attribution (which capability matched, and on which field).
+
--jsonIncludes matched_capabilities[].
+
+ +
+
cli-hub matrix info <name>
+
Full detail for one matrix — members and status, capabilities with provider counts, recipes, and known gaps.
+
--jsonMachine-readable output.
+
+ +
+
cli-hub matrix recipes
+
List task-oriented recipes across every matrix, each with its capabilities and a ready-to-run preflight command.
+
+
--search <text>Filter by id, description, or capability.
+
--jsonMachine-readable output.
+
+
+ +

Preflight

+

Preflight is the pivot of the whole flow: it reports which providers are actually usable in your current environment, so you (or an agent) decide what to install before touching anything.

+
+
cli-hub matrix preflight <name>
+
Check provider availability per capability. Exits 0 when every (filtered) capability has at least one usable provider, 3 when there’s a gap.
+
+
-c, --capability <id>Check just one capability.
+
--recipe <id>Check only the capabilities a recipe uses.
+
--offlineConsider only offline-capable providers.
+
--fix-hintsPrint an install command under each missing provider.
+
--summaryPrint only the two-line summary.
+
--jsonMachine-readable output.
+
+
+
# Gate an action on coverage — only proceed if the recipe is fully satisfiable
+$ cli-hub matrix preflight video-creation --recipe screencast-tutorial --json \
+    || echo "capability gap — read the missing list before installing"
+ +

Install

+
+
cli-hub matrix install <name>
+
Install a matrix’s CLIs. With no scope flag it installs every member CLI; the scope flags below install just a subset. Installation is idempotent — already-installed tools are skipped.
+
+
--dry-runShow the install plan with zero side effects.
+
-c, --capability <id>Install only the CLIs behind one capability.
+
--recipe <id>Install only the CLIs a recipe uses.
+
--only <a,b,c>Install a named comma-separated subset of the matrix’s CLIs.
+
--resumeRetry only the CLIs that failed last time.
+
--skill-onlyRender the matrix skill (SKILL.md + references + scripts) without installing CLIs.
+
--jsonMachine-readable plan or result.
+
+
+
$ cli-hub matrix install video-creation --dry-run
+  Plan: video-creation
+  skip (already installed): audacity, gimp, kdenlive
+  install via pip:          inkscape, krita, obs-studio, …
+  not managed here:         python libs, native binaries, cloud APIs — see preflight
+

The scope flags (--capability, --recipe, --only) are mutually exclusive. Per-matrix install state is recorded under ~/.cli-hub/matrix_state.json, which is what powers --resume.

+ +

Repair

+
+
cli-hub matrix doctor <name>
+
Audit install completeness for a matrix’s CLIs — flags anything recorded as installed but missing from PATH, and suggests a fix. (Preflight asks “is a provider usable?”; doctor asks “is what I installed still healthy?”)
+
--jsonMachine-readable audit.
+
+
+ + +
+ Contract +

Exit codes

+

The matrix command family follows one exit-code contract, so scripts and agents can branch without reading text. Catalog commands use the simpler 0 / 1 success-or-failure convention.

+
+
0
Success
All checks passed, or every capability gap is covered.
+
1
Failure
Matrix or capability not found; nothing could be installed.
+
2
Usage error
Mutually exclusive flags, or an invalid capability / recipe / --only name.
+
3
Partial
A capability gap, or some CLIs installed while others failed or are broken.
+
+
+ + +
+ For agents & scripts +

JSON & automation

+

CLI-Hub is built to be driven by AI agents as much as by people. Two design choices make that reliable:

+
    +
  • Structured output everywhere. Catalog commands and the whole matrix family accept --json — including matrix install, whose JSON carries the full plan or result. No screen-scraping required.
  • +
  • Exit codes as control flow. Combine the contract above with && / || to gate steps — e.g. preflight a recipe, and only install when it returns 0.
  • +
+

After installing a matrix, its SKILL.md is rendered to your machine. Read it: it contains the provider-selection rules, the suggest-to-user template for paid upgrades, and references — the judgement an agent needs that the registry alone doesn’t encode.

+
+ +

The meta-skill SKILL.md includes a Workflow Matrices section, and the site publishes llms.txt plus the machine-readable registries — give an agent those entry points and it can go from cold start to “preflight → install by capability → JSON result” on its own.

+
+
+ + +
+ Reference +

Registries & config

+

CLI-Hub reads three published registries and caches them locally (one-hour TTL) under ~/.cli-hub/:

+
    +
  • registry.json — first-party harness CLIs.
  • +
  • public_registry.json — curated public CLIs.
  • +
  • matrix_registry.json — matrices: capabilities × providers, with preflight contracts.
  • +
+

They’re served from the project’s GitHub Pages domain (for example …/CLI-Anything/matrix_registry.json). Local state lives alongside the cache: installed.json tracks installed CLIs, and matrix_state.json records per-matrix install outcomes for --resume.

+

Environment

+
    +
  • CLI_HUB_NO_ANALYTICS=1 — opt out of anonymous, fire-and-forget usage analytics.
  • +
  • CLI-Hub auto-detects when it’s invoked by an AI agent (Claude Code, Codex, Cursor, and others) for analytics bucketing only.
  • +
  • Color is suppressed automatically when output is piped or not a TTY, keeping logs and parsers clean.
  • +
+
+ +

Analytics never block your workflow and can be turned off entirely. Matrices are the differentiator here — start from the Matrices section or run cli-hub matrix list to see what’s available.

+
+
+ +
+
+ + + + + + diff --git a/docs/hub/index.html b/docs/hub/index.html index e38ecf1de..b7e71c45f 100644 --- a/docs/hub/index.html +++ b/docs/hub/index.html @@ -1,21 +1,17 @@ - + + CLI-Anything Hub - Agent-Friendly CLI Registry - - - - - @@ -26,1881 +22,935 @@ - - + - + + + -