diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 899dae258c..5cf5514a0a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -167,7 +167,7 @@ the command templates in templates/commands/ to understand what each command invokes. Use these mapping rules: - templates/commands/X.md → the command it defines -- scripts/bash/Y.sh or scripts/powershell/Y.ps1 → every command that invokes that script (grep templates/commands/ for the script name). Also check transitive dependencies: if the changed script is sourced by other scripts (e.g., common.sh is sourced by create-new-feature.sh, check-prerequisites.sh, setup-plan.sh, update-agent-context.sh), then every command invoking those downstream scripts is also affected +- scripts/bash/Y.sh or scripts/powershell/Y.ps1 → every command that invokes that script (grep templates/commands/ for the script name). Also check transitive dependencies: if the changed script is sourced by other scripts (e.g., common.sh is sourced by create-new-feature.sh, check-prerequisites.sh, setup-plan.sh), then every command invoking those downstream scripts is also affected - templates/Z-template.md → every command that consumes that template during execution - src/specify_cli/*.py → CLI commands (`specify init`, `specify check`, `specify extension *`, `specify preset *`); test the affected CLI command and, for init/scaffolding changes, at minimum test /speckit.specify - extensions/X/commands/* → the extension command it defines diff --git a/src/specify_cli/integrations/manifest.py b/src/specify_cli/integrations/manifest.py index 23121be013..58719e1469 100644 --- a/src/specify_cli/integrations/manifest.py +++ b/src/specify_cli/integrations/manifest.py @@ -232,6 +232,30 @@ def record_existing(self, rel_path: str | Path, *, recovered: bool = False) -> N # transition. ``discard`` is a no-op when the key is absent. self._recovered_files.discard(normalized) + def remove(self, rel_path: str | Path) -> bool: + """Drop *rel_path* from the tracked file set and any recovered marker. + + Operates purely on the manifest's recorded key; it does NOT touch the + file on disk. Returns ``True`` if an entry was present and removed. + Used to keep the manifest consistent after a caller deletes a stale + managed file that the current install no longer ships. + + Input is normalized through the same lexical pipeline as + ``record_existing`` / ``is_recovered``: absolute paths and paths + containing ``..`` segments are rejected (return ``False``) — such paths + can never be canonical manifest keys, so there is nothing to remove. + """ + rel = Path(rel_path) + if rel.is_absolute() or ".." in rel.parts: + return False + try: + abs_path = _validate_rel_path(rel, self.project_root) + normalized = abs_path.relative_to(self.project_root).as_posix() + except ValueError: + return False + self._recovered_files.discard(normalized) + return self._files.pop(normalized, None) is not None + # -- Querying --------------------------------------------------------- @property diff --git a/src/specify_cli/shared_infra.py b/src/specify_cli/shared_infra.py index 0db8687058..83fa9d4205 100644 --- a/src/specify_cli/shared_infra.py +++ b/src/specify_cli/shared_infra.py @@ -304,7 +304,7 @@ def install_shared_infra( customization warning to tell the user which flag would overwrite their customizations. """ - from .integrations.manifest import _sha256 + from .integrations.manifest import _sha256, _validate_rel_path manifest = load_speckit_manifest(project_path, version=version, console=console) prior_hashes = dict(manifest.files) @@ -325,6 +325,11 @@ def _is_managed(rel: str, dst: Path) -> bool: symlinked_files: list[str] = [] planned_copies: list[tuple[Path, str, bytes, int]] = [] planned_templates: list[tuple[Path, str, str]] = [] + # Track every shared path the current bundle produces so we can detect + # manifest entries the core no longer ships (stale-script cleanup, #3076). + seen_rels: set[str] = set() + scripts_scanned = False + variant_dir = "bash" if script_type == "sh" else "powershell" def _decide_overwrite(rel: str, dst: Path) -> tuple[bool, str | None]: """Return (write, bucket) where bucket is 'skip', 'preserved', or None.""" @@ -379,7 +384,6 @@ def _ensure_or_bucket_dir(directory: Path) -> bool: if scripts_src.is_dir(): dest_scripts = project_path / ".specify" / "scripts" if _ensure_or_bucket_dir(dest_scripts): - variant_dir = "bash" if script_type == "sh" else "powershell" variant_src = scripts_src / variant_dir if variant_src.is_dir(): dest_variant = dest_scripts / variant_dir @@ -387,10 +391,18 @@ def _ensure_or_bucket_dir(directory: Path) -> bool: for src_path in variant_src.rglob("*"): if not src_path.is_file(): continue + # Mark scanned only once a real source file is seen. An + # empty (or symlink-skipped) variant keeps this False, so + # stale-cleanup is skipped — otherwise it would treat every + # tracked script as obsolete and delete it. (The safety + # hinge is this flag, not ``seen_rels``, which also holds + # template paths populated later.) + scripts_scanned = True rel_path = src_path.relative_to(variant_src) dst_path = dest_variant / rel_path rel = dst_path.relative_to(project_path).as_posix() + seen_rels.add(rel) if not _safe_dest_or_bucket(dst_path, rel, parent_must_exist=False): continue write, bucket = _decide_overwrite(rel, dst_path) @@ -442,6 +454,7 @@ def _ensure_or_bucket_dir(directory: Path) -> bool: dst = dest_templates / src.name rel = dst.relative_to(project_path).as_posix() + seen_rels.add(rel) if not _safe_dest_or_bucket(dst, rel): continue write, bucket = _decide_overwrite(rel, dst) @@ -521,5 +534,63 @@ def _ensure_or_bucket_dir(directory: Path) -> bool: if refresh_hint: console.print(refresh_hint) + # Remove stale managed scripts: paths a previous install recorded that the + # current core no longer ships — e.g. the legacy + # ``scripts//update-agent-context.sh`` superseded by the bundled + # agent-context extension. Left behind, such an orphan can crash when it + # sources a refreshed ``common.sh`` (#3076). Only run when the script source + # was actually scanned (so a missing/empty source never triggers mass + # deletion), scoped to the active variant, and only for *managed* copies — + # a user-customized file (hash diverges), a symlink, or a recovered entry is + # preserved by ``_is_managed``. + if scripts_scanned: + stale_removed: list[str] = [] + script_prefix = f".specify/scripts/{variant_dir}/" + for rel in list(prior_hashes): + if rel in seen_rels or not rel.startswith(script_prefix): + continue + # Guard corrupted/hand-edited manifest keys BEFORE any filesystem + # access: absolute, ``..``, or (on Windows) drive-relative keys such + # as ``C:tmp`` are not ``is_absolute()`` yet discard the project root + # when joined. The lexical check is a fast reject; ``_validate_rel_path`` + # resolves the join and confirms containment, catching the rest. A key + # that still escapes is *skipped*, never turned into an install-time + # hard failure. Mirrors IntegrationManifest.is_recovered / remove. + rel_path = Path(rel) + if rel_path.is_absolute() or ".." in rel_path.parts: + continue + try: + _validate_rel_path(rel_path, project_path) + except ValueError: + continue + dst = project_path / rel_path + # Already gone from disk but still tracked: drop the orphaned manifest + # entry so the manifest stays consistent (nothing to unlink). + if not dst.exists() and not dst.is_symlink(): + manifest.remove(rel) + continue + if not _is_managed(rel, dst): + continue # user-modified / symlink / recovered → preserve + # Never unlink through a symlinked ancestor (writes/deletes could + # escape the project root). The safe-destination check buckets such + # paths under ``symlinked_files`` and we leave them in place. + if not _safe_dest_or_bucket(dst, rel): + continue + try: + dst.unlink() + except OSError as exc: + console.print(f"[yellow]⚠[/yellow] could not remove stale {rel}: {exc}") + continue + manifest.remove(rel) + stale_removed.append(rel) + + if stale_removed: + console.print( + f"[yellow]⚠[/yellow] Removed {len(stale_removed)} obsolete shared " + "script(s) left by a previous install:" + ) + for path in stale_removed: + console.print(f" {path}") + manifest.save() return True diff --git a/tests/integrations/test_cli.py b/tests/integrations/test_cli.py index 30bcb015d1..21302abf5f 100644 --- a/tests/integrations/test_cli.py +++ b/tests/integrations/test_cli.py @@ -263,6 +263,206 @@ def test_shared_infra_overwrites_existing_files_with_force(self, tmp_path): assert (scripts_dir / "setup-plan.sh").exists() assert (templates_dir / "plan-template.md").exists() + def test_shared_infra_removes_stale_managed_script(self, tmp_path): + """A managed script the core no longer ships (e.g. the legacy + update-agent-context.sh, superseded by the agent-context extension) is + removed, and the manifest stops tracking it (#3076).""" + from specify_cli import _install_shared_infra + from specify_cli.integrations.manifest import IntegrationManifest + + project = tmp_path / "stale-test" + project.mkdir() + (project / ".specify").mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + + # Legacy orphan the current bundle no longer ships, recorded in the + # manifest as a managed file (hash matches on disk) — a pre-refactor install. + stale_rel = ".specify/scripts/bash/update-agent-context.sh" + (scripts_dir / "update-agent-context.sh").write_text("# legacy orphan\n", encoding="utf-8") + manifest = IntegrationManifest("speckit", project, version="test") + manifest.record_existing(stale_rel) + manifest.save() + + _install_shared_infra(project, "sh", force=False) + + # The orphan is gone and the manifest no longer tracks it. + assert not (scripts_dir / "update-agent-context.sh").exists() + refreshed = IntegrationManifest.load("speckit", project) + assert stale_rel not in refreshed.files + # Scripts the core DOES ship are installed and tracked. + assert (scripts_dir / "common.sh").exists() + assert ".specify/scripts/bash/common.sh" in refreshed.files + + def test_shared_infra_preserves_modified_stale_script(self, tmp_path): + """A user-modified stale script is preserved (hash diverges from the + managed baseline), never silently deleted (#3076).""" + from specify_cli import _install_shared_infra + from specify_cli.integrations.manifest import IntegrationManifest + + project = tmp_path / "stale-modified" + project.mkdir() + (project / ".specify").mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + + stale = scripts_dir / "update-agent-context.sh" + stale.write_text("# original managed\n", encoding="utf-8") + manifest = IntegrationManifest("speckit", project, version="test") + manifest.record_existing(".specify/scripts/bash/update-agent-context.sh") + manifest.save() + + # User customizes it after install → on-disk hash now diverges. + stale.write_text("# user customization\n", encoding="utf-8") + + _install_shared_infra(project, "sh", force=False) + + # Preserved: it is no longer a managed (hash-matching) copy. + assert stale.exists() + assert stale.read_text(encoding="utf-8") == "# user customization\n" + + def test_shared_infra_prunes_orphan_manifest_entry_when_file_absent(self, tmp_path): + """A stale manifest entry whose file is already gone from disk is pruned + so the manifest stays consistent, not left tracked forever (#3076 review).""" + from specify_cli import _install_shared_infra + from specify_cli.integrations.manifest import IntegrationManifest + + project = tmp_path / "orphan-entry" + project.mkdir() + (project / ".specify").mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + + stale_rel = ".specify/scripts/bash/update-agent-context.sh" + stale = scripts_dir / "update-agent-context.sh" + stale.write_text("# legacy orphan\n", encoding="utf-8") + manifest = IntegrationManifest("speckit", project, version="test") + manifest.record_existing(stale_rel) + manifest.save() + # File removed out of band, but the manifest still tracks it. + stale.unlink() + + _install_shared_infra(project, "sh", force=False) + + refreshed = IntegrationManifest.load("speckit", project) + assert stale_rel not in refreshed.files + + def test_shared_infra_empty_script_source_keeps_tracked_scripts(self, tmp_path, monkeypatch): + """If the bundle's script source dir exists but is empty, stale-cleanup + must NOT run (no source files seen → can't tell what's obsolete): a + previously-tracked script is preserved, never mass-deleted (#3076 review).""" + from specify_cli import _install_shared_infra, shared_infra + from specify_cli.integrations.manifest import IntegrationManifest + + # Point the script source at an empty ``bash/`` directory. + empty_src = tmp_path / "empty-bundle" / "scripts" + (empty_src / "bash").mkdir(parents=True) + monkeypatch.setattr(shared_infra, "shared_scripts_source", lambda **kw: empty_src) + + project = tmp_path / "empty-source" + project.mkdir() + (project / ".specify").mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + tracked_rel = ".specify/scripts/bash/common.sh" + (scripts_dir / "common.sh").write_text("# tracked\n", encoding="utf-8") + manifest = IntegrationManifest("speckit", project, version="test") + manifest.record_existing(tracked_rel) + manifest.save() + + _install_shared_infra(project, "sh", force=False) + + # Empty source → scripts_scanned stays False → nothing deleted. + assert (scripts_dir / "common.sh").exists() + refreshed = IntegrationManifest.load("speckit", project) + assert tracked_rel in refreshed.files + + def test_shared_infra_stale_cleanup_ignores_unsafe_manifest_keys(self, tmp_path): + """A corrupted/hand-edited manifest key with a ``..`` segment is skipped + before any filesystem access — its traversal target is never deleted + (#3076 review, containment guard).""" + import hashlib + import json + from specify_cli import _install_shared_infra + + project = tmp_path / "unsafe-key" + project.mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + manifest_dir = project / ".specify" / "integrations" + manifest_dir.mkdir(parents=True) + + # A file the traversal key would resolve to (outside scripts/bash/). + victim = project / ".specify" / "scripts" / "keep-me.sh" + victim_bytes = b"# do not touch\n" + victim.write_bytes(victim_bytes) + + # Hand-crafted manifest: a key under the script prefix but with a ``..`` + # segment, with the *matching* hash so that — absent the containment guard + # — stale-cleanup would consider it managed and unlink the target. + traversal_key = ".specify/scripts/bash/../keep-me.sh" + (manifest_dir / "speckit.manifest.json").write_text( + json.dumps({ + "integration": "speckit", + "version": "test", + "files": {traversal_key: hashlib.sha256(victim_bytes).hexdigest()}, + }), + encoding="utf-8", + ) + + _install_shared_infra(project, "sh", force=False) + + # The unsafe key was skipped; its target file is untouched. + assert victim.exists() + assert victim.read_bytes() == victim_bytes + + def test_shared_infra_stale_cleanup_skips_escaping_key_without_failing( + self, tmp_path, monkeypatch + ): + """A key that passes the lexical guard but escapes containment — e.g. a + Windows drive-relative ``C:tmp`` that is not ``is_absolute()`` yet discards + the project root when joined — is skipped via ``_validate_rel_path``, never + unlinked, and never turned into an install-time hard failure (#3076 review + round 4). Simulated portably by forcing ``_validate_rel_path`` to reject the + managed key, since real drive-relative paths only escape on Windows.""" + from specify_cli import _install_shared_infra + from specify_cli.integrations import manifest as manifest_mod + from specify_cli.integrations.manifest import IntegrationManifest + + project = tmp_path / "escaping-key" + project.mkdir() + (project / ".specify").mkdir() + scripts_dir = project / ".specify" / "scripts" / "bash" + scripts_dir.mkdir(parents=True) + + # A managed stale orphan that would normally be removed. + stale_rel = ".specify/scripts/bash/update-agent-context.sh" + stale = scripts_dir / "update-agent-context.sh" + stale.write_text("# legacy orphan\n", encoding="utf-8") + manifest = IntegrationManifest("speckit", project, version="test") + manifest.record_existing(stale_rel) + manifest.save() + + # Force the containment check to reject this key, as it would for a + # drive-relative escape on Windows. The cleanup must skip it gracefully. + real_validate = manifest_mod._validate_rel_path + + def fake_validate(rel, root): + if str(rel).endswith("update-agent-context.sh"): + raise ValueError("simulated drive-relative escape") + return real_validate(rel, root) + + monkeypatch.setattr(manifest_mod, "_validate_rel_path", fake_validate) + + # Must not raise (no install-time hard failure from a corrupted key). + _install_shared_infra(project, "sh", force=False) + + # The escaping key was skipped, so its file is left untouched... + assert stale.exists() + assert stale.read_text(encoding="utf-8") == "# legacy orphan\n" + # ...yet the install otherwise completed: real scripts are installed. + assert (scripts_dir / "common.sh").exists() + def test_shared_infra_skip_warning_displayed(self, tmp_path, capsys): """Console warning is displayed when files are skipped.""" from specify_cli import _install_shared_infra diff --git a/tests/integrations/test_manifest.py b/tests/integrations/test_manifest.py index 503ee133e5..32ff6efbdf 100644 --- a/tests/integrations/test_manifest.py +++ b/tests/integrations/test_manifest.py @@ -116,6 +116,34 @@ def test_uninstall_skips_traversal_paths(self, tmp_path): assert len(removed) == 1 assert removed[0].name == "safe.txt" + def test_remove_drops_entry_and_is_noop_second_time(self, tmp_path): + (tmp_path / "f.txt").write_text("x", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("f.txt") + assert "f.txt" in m.files + assert m.remove("f.txt") is True + assert "f.txt" not in m.files + assert m.remove("f.txt") is False # already gone → no-op + + def test_remove_rejects_absolute_path(self, tmp_path): + # Matches record_existing/is_recovered: an absolute key can never be a + # canonical manifest key, so remove() rejects it lexically and leaves + # the tracked entry untouched. + (tmp_path / "f.txt").write_text("x", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("f.txt") + import sys + abs_input = "C:\\tmp\\f.txt" if sys.platform == "win32" else "/tmp/f.txt" + assert m.remove(abs_input) is False + assert "f.txt" in m.files + + def test_remove_rejects_parent_traversal(self, tmp_path): + (tmp_path / "f.txt").write_text("x", encoding="utf-8") + m = IntegrationManifest("test", tmp_path) + m.record_existing("f.txt") + assert m.remove("../f.txt") is False + assert "f.txt" in m.files + class TestManifestCheckModified: def test_unmodified_file(self, tmp_path):