diff --git a/.github/fixtures/api-url-compromise.json b/.github/fixtures/api-url-compromise.json new file mode 100644 index 0000000..4ac8e35 --- /dev/null +++ b/.github/fixtures/api-url-compromise.json @@ -0,0 +1,27 @@ +{ + "fixture-known-provider": { + "api": "https://attacker.invalid/provider/v1", + "models": { + "fixture-known-model": { + "provider": { + "api": "https://attacker.invalid/model/v1" + } + }, + "fixture-new-model": { + "provider": { + "api": "https://attacker.invalid/new-model/v1" + } + } + } + }, + "fixture-new-provider": { + "api": "https://attacker.invalid/new-provider/v1", + "models": { + "fixture-new-provider-model": { + "provider": { + "api": "https://attacker.invalid/new-provider-model/v1" + } + } + } + } +} diff --git a/.github/scripts/check_upstream_api_urls.py b/.github/scripts/check_upstream_api_urls.py new file mode 100644 index 0000000..be148f2 --- /dev/null +++ b/.github/scripts/check_upstream_api_urls.py @@ -0,0 +1,527 @@ +"""Gate PyPI publishes when upstream changes API URL routing. + +modelsdotdev publishes a bundled snapshot of models.dev data. If upstream is +compromised between releases, a malicious data-only change could redirect a +previously-known provider or model override to an attacker-controlled API URL +without changing this repository. This script compares the fetched upstream JSON +against the latest published wheel and emits a GitHub Actions approval signal +plus a reviewer-friendly Markdown diff. +""" + +from __future__ import annotations + +import argparse +import hashlib +import html +import json +import os +import sqlite3 +import sys +import tempfile +import urllib.error +import urllib.request +import uuid +import zipfile +from dataclasses import dataclass +from io import BytesIO +from pathlib import Path +from typing import Any + +PACKAGE_NAME = "modelsdotdev" +PYPI_JSON_URL = f"https://pypi.org/pypi/{PACKAGE_NAME}/json" +WHEEL_DB_PATH = "modelsdotdev/_db.sqlite" + + +@dataclass(frozen=True, slots=True) +class ApiUrlState: + """Provider and model API URL state extracted from one data source.""" + + provider_ids: frozenset[str] + provider_urls: dict[str, str | None] + model_urls: dict[tuple[str, str], str] + + +@dataclass(frozen=True, slots=True) +class Baseline: + """Previously published package version and its extracted URL state.""" + + version: str + state: ApiUrlState + + +@dataclass(frozen=True, slots=True) +class ApiUrlChange: + """One protected API URL difference requiring reviewer attention.""" + + kind: str + provider_id: str + model_id: str | None + previous_url: str | None + current_url: str | None + + +class BaselineUnavailableError(RuntimeError): + """Raised when the previous published package cannot be inspected.""" + + +def main() -> int: + """Run the API URL gate and write GitHub Actions outputs.""" + parser = argparse.ArgumentParser( + description="Detect protected upstream API URL changes.", + ) + parser.add_argument("current_json", type=Path) + parser.add_argument("--baseline-db", type=Path) + parser.add_argument("--baseline-version", default="") + parser.add_argument("--package-name", default=PACKAGE_NAME) + parser.add_argument("--digest", default="") + parser.add_argument( + "--markdown-output", + type=Path, + default=Path(".tmp/api-url-diff.md"), + ) + parser.add_argument( + "--json-output", + type=Path, + default=Path(".tmp/api-url-diff.json"), + ) + args = parser.parse_args() + + current_state = load_current_state(args.current_json) + fail_closed_reason = "" + try: + if args.baseline_db is None: + baseline = load_baseline_from_pypi(args.package_name) + else: + baseline = load_baseline_from_db( + args.baseline_db, + args.baseline_version or "local baseline", + ) + changes = compare_api_urls(baseline.state, current_state) + except BaselineUnavailableError as error: + # If we cannot inspect the previous release, require a human review + # rather than silently treating the publish as safe. + baseline = Baseline(version="unknown", state=_empty_state()) + changes = [] + fail_closed_reason = str(error) + + approval_required = bool(changes) or bool(fail_closed_reason) + summary = _summary(changes, fail_closed_reason) + markdown = render_markdown( + changes=changes, + baseline_version=baseline.version, + digest=args.digest, + fail_closed_reason=fail_closed_reason, + ) + payload = { + "approval_required": approval_required, + "baseline_version": baseline.version, + "change_count": len(changes), + "changes": [_change_payload(change) for change in changes], + "digest": args.digest, + "fail_closed": bool(fail_closed_reason), + "error": fail_closed_reason or None, + "summary": summary, + } + + args.markdown_output.parent.mkdir(parents=True, exist_ok=True) + args.markdown_output.write_text(markdown, encoding="utf-8") + args.json_output.parent.mkdir(parents=True, exist_ok=True) + args.json_output.write_text( + json.dumps(payload, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + _write_github_outputs( + { + "approval_required": str(approval_required).lower(), + "baseline_version": baseline.version, + "change_count": str(len(changes)), + "fail_closed": str(bool(fail_closed_reason)).lower(), + "json_path": str(args.json_output), + "markdown_path": str(args.markdown_output), + "summary": summary, + }, + ) + sys.stdout.write(f"{summary}\n") + return 0 + + +def load_current_state(path: Path) -> ApiUrlState: + """Load provider and model override URLs from fetched upstream JSON.""" + raw_data = json.loads(path.read_text(encoding="utf-8")) + if not isinstance(raw_data, dict): + raise TypeError("upstream JSON root must be an object") + + provider_urls: dict[str, str | None] = {} + model_urls: dict[tuple[str, str], str] = {} + for provider_id, raw_provider in raw_data.items(): + if not isinstance(provider_id, str): + raise TypeError("upstream JSON provider keys must be strings") + provider = _object(raw_provider, f"provider {provider_id}") + provider_urls[provider_id] = _optional_string( + provider.get("api"), + f"provider {provider_id}.api", + ) + models = _object( + provider.get("models", {}), + f"provider {provider_id}.models", + ) + for model_id, raw_model in models.items(): + if not isinstance(model_id, str): + raise TypeError( + f"provider {provider_id}.models keys must be strings", + ) + model = _object(raw_model, f"model {provider_id}/{model_id}") + raw_provider_override = model.get("provider") + if raw_provider_override is None: + continue + provider_override = _object( + raw_provider_override, + f"model {provider_id}/{model_id}.provider", + ) + api_url = _optional_string( + provider_override.get("api"), + f"model {provider_id}/{model_id}.provider.api", + ) + if api_url is not None: + model_urls[provider_id, model_id] = api_url + + return ApiUrlState( + provider_ids=frozenset(provider_urls), + provider_urls=provider_urls, + model_urls=model_urls, + ) + + +def load_baseline_from_pypi(package_name: str) -> Baseline: + """Download the latest PyPI wheel and load its bundled database URLs.""" + pypi_json_url = f"https://pypi.org/pypi/{package_name}/json" + try: + with urllib.request.urlopen( + _request(pypi_json_url), + timeout=60, + ) as response: + pypi_data = json.loads(response.read()) + except urllib.error.HTTPError as error: + if error.code == 404: + return Baseline(version="none", state=_empty_state()) + raise BaselineUnavailableError( + f"unable to inspect PyPI release metadata: HTTP {error.code}", + ) from error + except (OSError, json.JSONDecodeError) as error: + raise BaselineUnavailableError( + f"unable to inspect PyPI release metadata: {error}", + ) from error + + version = _pypi_version(pypi_data) + wheel_url = _wheel_url(pypi_data, version) + try: + with urllib.request.urlopen( + _request(wheel_url), + timeout=60, + ) as response: + wheel_payload = response.read() + except OSError as error: + raise BaselineUnavailableError( + f"unable to download baseline wheel for {package_name} {version}: " + f"{error}", + ) from error + + try: + with ( + zipfile.ZipFile(BytesIO(wheel_payload)) as wheel, + tempfile.TemporaryDirectory() as temporary_dir, + ): + db_path = Path(temporary_dir) / "_db.sqlite" + db_path.write_bytes(wheel.read(WHEEL_DB_PATH)) + return load_baseline_from_db(db_path, version) + except (KeyError, OSError, sqlite3.Error, zipfile.BadZipFile) as error: + raise BaselineUnavailableError( + f"unable to inspect baseline wheel for {package_name} {version}: " + f"{error}", + ) from error + + +def load_baseline_from_db(path: Path, version: str) -> Baseline: + """Load provider and model override URLs from a bundled SQLite DB.""" + try: + connection = sqlite3.connect(f"file:{path}?mode=ro", uri=True) + try: + provider_rows = connection.execute( + "SELECT id, api FROM providers", + ).fetchall() + model_rows = connection.execute( + """ + SELECT provider_id, id, provider_api + FROM models + WHERE provider_api IS NOT NULL + """, + ).fetchall() + finally: + connection.close() + except sqlite3.Error as error: + raise BaselineUnavailableError( + f"unable to inspect baseline database {path}: {error}", + ) from error + + provider_urls = { + str(provider_id): _optional_string( + api_url, + f"provider {provider_id}.api", + ) + for provider_id, api_url in provider_rows + } + model_urls = { + (str(provider_id), str(model_id)): _string( + api_url, + f"model {provider_id}/{model_id}.provider.api", + ) + for provider_id, model_id, api_url in model_rows + } + return Baseline( + version=version, + state=ApiUrlState( + provider_ids=frozenset(provider_urls), + provider_urls=provider_urls, + model_urls=model_urls, + ), + ) + + +def compare_api_urls( + baseline: ApiUrlState, + current: ApiUrlState, +) -> list[ApiUrlChange]: + """Return URL changes for existing providers and their model overrides.""" + changes: list[ApiUrlChange] = [] + # New providers are allowed to introduce URLs. The protected case is URL + # routing changing for provider IDs that consumers may already trust. + for provider_id in sorted(baseline.provider_ids): + previous_url = baseline.provider_urls.get(provider_id) + current_url = current.provider_urls.get(provider_id) + if previous_url != current_url: + changes.append( + ApiUrlChange( + kind="Provider", + provider_id=provider_id, + model_id=None, + previous_url=previous_url, + current_url=current_url, + ), + ) + + # Model provider overrides can route individual model calls differently from + # the provider default, so protect those URLs under known providers too. + baseline_model_keys = { + key for key in baseline.model_urls if key[0] in baseline.provider_ids + } + current_model_keys = { + key for key in current.model_urls if key[0] in baseline.provider_ids + } + for provider_id, model_id in sorted( + baseline_model_keys | current_model_keys, + ): + previous_url = baseline.model_urls.get((provider_id, model_id)) + current_url = current.model_urls.get((provider_id, model_id)) + if previous_url != current_url: + changes.append( + ApiUrlChange( + kind="Model override", + provider_id=provider_id, + model_id=model_id, + previous_url=previous_url, + current_url=current_url, + ), + ) + + return changes + + +def render_markdown( + *, + changes: list[ApiUrlChange], + baseline_version: str, + digest: str, + fail_closed_reason: str = "", +) -> str: + """Render the workflow summary shown before environment approval.""" + lines = ["## Upstream API URL Check", ""] + lines.append(f"Baseline: `{_display_baseline(baseline_version)}`") + if digest: + lines.append(f"Current upstream JSON SHA256: `{digest}`") + lines.append("") + + if fail_closed_reason: + lines.extend( + [ + "Manual approval is required because the previous published " + "package could not be inspected.", + "", + f"Reason: `{html.escape(fail_closed_reason)}`", + "", + ], + ) + return "\n".join(lines) + + if not changes: + lines.append( + "No protected upstream API URL changes were detected for " + "previously-known providers.", + ) + lines.append("") + return "\n".join(lines) + + # Environment-approval jobs pause before steps run, so render the diff in + # the planning job summary and point the approval environment URL at it. + lines.append( + f"Manual approval is required for {len(changes)} protected upstream " + "API URL change(s).", + ) + lines.append("") + lines.extend( + [ + "| Type | Provider | Model | Previous URL | Current URL |", + "| --- | --- | --- | --- | --- |", + ], + ) + for change in changes: + lines.append( + "| " + f"{_table_text(change.kind)} | " + f"{_code(change.provider_id)} | " + f"{_model_cell(change.model_id)} | " + f"{_url_cell(change.previous_url)} | " + f"{_url_cell(change.current_url)} |", + ) + lines.append("") + return "\n".join(lines) + + +def _empty_state() -> ApiUrlState: + return ApiUrlState( + provider_ids=frozenset(), + provider_urls={}, + model_urls={}, + ) + + +def _request(url: str) -> urllib.request.Request: + return urllib.request.Request( + url, + headers={"User-Agent": "modelsdotdev-python-publish"}, + ) + + +def _pypi_version(data: Any) -> str: + if not isinstance(data, dict): + raise BaselineUnavailableError("PyPI metadata root is not an object") + info = _object(data.get("info"), "PyPI metadata info") + version = _string(info.get("version"), "PyPI latest version") + if not version: + raise BaselineUnavailableError("PyPI latest version is empty") + return version + + +def _wheel_url(data: Any, version: str) -> str: + if not isinstance(data, dict): + raise BaselineUnavailableError("PyPI metadata root is not an object") + releases = _object(data.get("releases"), "PyPI metadata releases") + files = releases.get(version) + if not isinstance(files, list): + raise BaselineUnavailableError( + f"PyPI release {version} has no files", + ) + + for release_file in files: + file_info = _object(release_file, f"PyPI release {version} file") + if file_info.get("packagetype") != "bdist_wheel": + continue + return _string( + file_info.get("url"), + f"PyPI release {version} wheel URL", + ) + raise BaselineUnavailableError(f"PyPI release {version} has no wheel") + + +def _object(value: Any, context: str) -> dict[str, Any]: + if not isinstance(value, dict): + raise TypeError(f"{context} must be an object") + return value + + +def _optional_string(value: Any, context: str) -> str | None: + if value is None: + return None + return _string(value, context) + + +def _string(value: Any, context: str) -> str: + if not isinstance(value, str): + raise TypeError(f"{context} must be a string") + return value + + +def _summary(changes: list[ApiUrlChange], fail_closed_reason: str) -> str: + if fail_closed_reason: + return ( + "Manual approval required: baseline package could not be inspected" + ) + if changes: + return ( + f"Manual approval required: {len(changes)} protected upstream " + "API URL change(s) detected" + ) + return "No protected upstream API URL changes detected" + + +def _change_payload(change: ApiUrlChange) -> dict[str, str | None]: + return { + "type": change.kind, + "provider": change.provider_id, + "model": change.model_id, + "previous_url": change.previous_url, + "current_url": change.current_url, + } + + +def _write_github_outputs(outputs: dict[str, str]) -> None: + output_path = os.environ.get("GITHUB_OUTPUT") + if not output_path: + return + + with Path(output_path).open("a", encoding="utf-8") as output_file: + for name, value in outputs.items(): + if "\n" not in value: + output_file.write(f"{name}={value}\n") + continue + delimiter = f"EOF_{hashlib.sha256(uuid.uuid4().bytes).hexdigest()}" + output_file.write(f"{name}<<{delimiter}\n{value}\n{delimiter}\n") + + +def _display_baseline(version: str) -> str: + if version == "none": + return "no published baseline" + return f"modelsdotdev {version}" + + +def _table_text(value: str) -> str: + return html.escape(value).replace("|", "|") + + +def _code(value: str) -> str: + return f"{_table_text(value)}" + + +def _model_cell(model_id: str | None) -> str: + if model_id is None: + return "provider default" + return _code(model_id) + + +def _url_cell(url: str | None) -> str: + if url is None: + return "not set" + return _code(url) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/create_api_url_dry_run_baseline.py b/.github/scripts/create_api_url_dry_run_baseline.py new file mode 100644 index 0000000..cfd397e --- /dev/null +++ b/.github/scripts/create_api_url_dry_run_baseline.py @@ -0,0 +1,65 @@ +"""Create a tiny baseline database for approval-gate dry runs. + +The dry-run workflow must exercise the same URL checker and protected +environment approval path as the real publish workflow, but it must not depend +on the latest PyPI wheel or any real provider data. This script creates the +minimal SQLite tables the checker reads, with one previously-known provider and +one previously-known model override. +""" + +from __future__ import annotations + +import argparse +import sqlite3 +from pathlib import Path + + +def main() -> int: + """Write the deterministic dry-run baseline SQLite database.""" + parser = argparse.ArgumentParser( + description="Create an API URL approval dry-run baseline database.", + ) + parser.add_argument("output", type=Path) + args = parser.parse_args() + + output = args.output + output.parent.mkdir(parents=True, exist_ok=True) + if output.exists(): + output.unlink() + + with sqlite3.connect(output) as connection: + connection.execute( + "CREATE TABLE providers (id TEXT PRIMARY KEY, api TEXT)", + ) + connection.execute( + """ + CREATE TABLE models ( + provider_id TEXT NOT NULL, + id TEXT NOT NULL, + provider_api TEXT + ) + """, + ) + connection.execute( + "INSERT INTO providers (id, api) VALUES (?, ?)", + ( + "fixture-known-provider", + "https://api.fixture-provider.invalid/v1", + ), + ) + connection.execute( + """ + INSERT INTO models (provider_id, id, provider_api) + VALUES (?, ?, ?) + """, + ( + "fixture-known-provider", + "fixture-known-model", + "https://api.fixture-model.invalid/v1", + ), + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/workflows/publish-dry-run.yml b/.github/workflows/publish-dry-run.yml new file mode 100644 index 0000000..63b990c --- /dev/null +++ b/.github/workflows/publish-dry-run.yml @@ -0,0 +1,97 @@ +name: Publish Approval Dry Run + +on: + workflow_dispatch: + +permissions: + contents: read + +jobs: + plan: + name: Plan dry run + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + api_url_approval_required: ${{ steps.api-url-check.outputs.approval_required }} + api_url_change_count: ${{ steps.api-url-check.outputs.change_count }} + api_url_diff_summary: ${{ steps.api-url-check.outputs.summary }} + run_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Prepare dry-run baseline + run: | + python .github/scripts/create_api_url_dry_run_baseline.py \ + .tmp/api-url-dry-run-baseline.sqlite + + - name: Compute fixture digest + id: fixture + run: | + DIGEST=$(python - <<'PY' + import hashlib + from pathlib import Path + + payload = Path(".github/fixtures/api-url-compromise.json").read_bytes() + print(hashlib.sha256(payload).hexdigest()) + PY + ) + printf 'digest=%s\n' "$DIGEST" >> "$GITHUB_OUTPUT" + + - name: Check fixture API URL changes + id: api-url-check + env: + FIXTURE_DIGEST: ${{ steps.fixture.outputs.digest }} + run: | + python .github/scripts/check_upstream_api_urls.py \ + .github/fixtures/api-url-compromise.json \ + --baseline-db .tmp/api-url-dry-run-baseline.sqlite \ + --baseline-version dry-run-baseline \ + --digest "$FIXTURE_DIGEST" \ + --json-output .tmp/api-url-diff.json \ + --markdown-output .tmp/api-url-diff.md + + - name: Print upstream API URL check + run: | + python - <<'PY' + import os + from pathlib import Path + + with Path(os.environ["GITHUB_STEP_SUMMARY"]).open("a") as summary: + summary.write(Path(".tmp/api-url-diff.md").read_text()) + summary.write("\n") + PY + + api_url_change_approval: + name: Approve dry-run API URL changes + needs: plan + if: ${{ needs.plan.outputs.api_url_approval_required == 'true' }} + runs-on: ubuntu-latest + permissions: {} + # This uses the same protected environment as the real publish workflow, + # but this dry-run workflow never builds or publishes a package. + environment: + name: pypi-api-url-change + url: ${{ needs.plan.outputs.run_url }} + steps: + - name: Approval recorded + env: + CHANGE_COUNT: ${{ needs.plan.outputs.api_url_change_count }} + run: | + printf '%s\n' \ + "Approved dry-run API URL changes: $CHANGE_COUNT change(s)" + + dry_run_publish: + name: Dry-run publish continuation + needs: + - plan + - api_url_change_approval + if: ${{ always() && !cancelled() && (needs.plan.outputs.api_url_approval_required != 'true' || needs.api_url_change_approval.result == 'success') }} + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Confirm dry run completed + run: | + printf '%s\n' "Dry run complete. No build, upload, tag, or release was created." diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index b442147..4e8d7fc 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -4,12 +4,99 @@ on: workflow_dispatch: permissions: - contents: write # required for creating tags and releases - id-token: write # required for Trusted Publishing (OIDC) + contents: read jobs: + plan: + name: Plan publish + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + api_url_approval_required: ${{ steps.api-url-check.outputs.approval_required }} + api_url_change_count: ${{ steps.api-url-check.outputs.change_count }} + api_url_diff_summary: ${{ steps.api-url-check.outputs.summary }} + changed: ${{ steps.release-plan.outputs.changed }} + reason: ${{ steps.release-plan.outputs.reason }} + run_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} + tag: ${{ steps.release-plan.outputs.tag }} + upstream_json_digest: ${{ steps.upstream-json.outputs.digest }} + version: ${{ steps.release-plan.outputs.version }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Fetch upstream JSON + id: upstream-json + run: | + DIGEST=$(python .github/scripts/fetch_upstream_json.py .tmp/models.dev-api.json) + echo "digest=$DIGEST" >> "$GITHUB_OUTPUT" + + - name: Check upstream API URL changes + id: api-url-check + env: + UPSTREAM_JSON_DIGEST: ${{ steps.upstream-json.outputs.digest }} + run: | + python .github/scripts/check_upstream_api_urls.py \ + .tmp/models.dev-api.json \ + --digest "$UPSTREAM_JSON_DIGEST" \ + --json-output .tmp/api-url-diff.json \ + --markdown-output .tmp/api-url-diff.md + + - name: Print upstream API URL check + run: | + python - <<'PY' + import os + from pathlib import Path + + with Path(os.environ["GITHUB_STEP_SUMMARY"]).open("a") as summary: + summary.write(Path(".tmp/api-url-diff.md").read_text()) + summary.write("\n") + PY + + - name: Plan release + id: release-plan + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + UPSTREAM_JSON_DIGEST: ${{ steps.upstream-json.outputs.digest }} + with: + script: | + await require("./.github/scripts/plan_release")({github, context, core}); + + - name: Print release plan + env: + REASON: ${{ steps.release-plan.outputs.reason }} + VERSION: ${{ steps.release-plan.outputs.version }} + run: printf '%s\n' "$REASON" + + api_url_change_approval: + name: Approve upstream API URL changes + needs: plan + if: ${{ needs.plan.outputs.changed == 'true' && needs.plan.outputs.api_url_approval_required == 'true' }} + runs-on: ubuntu-latest + permissions: {} + # The protected environment is the manual supply-chain gate. Its URL points + # reviewers to the plan job summary, where the API URL diff is rendered. + environment: + name: pypi-api-url-change + url: ${{ needs.plan.outputs.run_url }} + steps: + - name: Approval recorded + env: + VERSION: ${{ needs.plan.outputs.version }} + run: printf '%s\n' "Approved upstream API URL changes for $VERSION" + publish: + name: Publish + needs: + - plan + - api_url_change_approval + if: ${{ always() && !cancelled() && needs.plan.outputs.changed == 'true' && (needs.plan.outputs.api_url_approval_required != 'true' || needs.api_url_change_approval.result == 'success') }} runs-on: ubuntu-latest + permissions: + contents: write # required for creating tags and releases + id-token: write # required for Trusted Publishing (OIDC) environment: name: pypi url: https://pypi.org/p/modelsdotdev @@ -30,45 +117,35 @@ jobs: - name: Fetch upstream JSON id: upstream-json + env: + EXPECTED_DIGEST: ${{ needs.plan.outputs.upstream_json_digest }} run: | DIGEST=$(uv run python .github/scripts/fetch_upstream_json.py .tmp/models.dev-api.json) + if [ "$DIGEST" != "$EXPECTED_DIGEST" ]; then + printf '%s\n' "upstream JSON digest changed after approval planning" + printf '%s\n' "planned: $EXPECTED_DIGEST" + printf '%s\n' "current: $DIGEST" + exit 1 + fi echo "digest=$DIGEST" >> "$GITHUB_OUTPUT" - - name: Plan release - id: release-plan - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - UPSTREAM_JSON_DIGEST: ${{ steps.upstream-json.outputs.digest }} - with: - script: | - await require("./.github/scripts/plan_release")({github, context, core}); - - - name: Print release plan - env: - REASON: ${{ steps.release-plan.outputs.reason }} - run: printf '%s\n' "$REASON" - - name: Build - if: steps.release-plan.outputs.changed == 'true' env: MODELDOTDEV_BUILD_SOURCE: .tmp/models.dev-api.json - UV_DYNAMIC_VERSIONING_BYPASS: ${{ steps.release-plan.outputs.version }} + UV_DYNAMIC_VERSIONING_BYPASS: ${{ needs.plan.outputs.version }} run: uv build - name: Test installed wheel - if: steps.release-plan.outputs.changed == 'true' run: sh .github/scripts/test_installed_wheel.sh - name: Publish to PyPI - if: steps.release-plan.outputs.changed == 'true' run: uv publish --trusted-publishing always - name: Create GitHub release - if: steps.release-plan.outputs.changed == 'true' uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 env: - RELEASE_TAG: ${{ steps.release-plan.outputs.tag }} - RELEASE_VERSION: ${{ steps.release-plan.outputs.version }} + RELEASE_TAG: ${{ needs.plan.outputs.tag }} + RELEASE_VERSION: ${{ needs.plan.outputs.version }} RELEASE_COMMIT_SHA: ${{ github.sha }} UPSTREAM_JSON_DIGEST: ${{ steps.upstream-json.outputs.digest }} with: diff --git a/pyproject.toml b/pyproject.toml index 5a8b51e..8af89d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ strict = true warn_return_any = true warn_unused_configs = true mypy_path = ["src"] -packages = ["modelsdotdev", "tests"] +files = ["src/modelsdotdev", "tests", ".github/scripts"] explicit_package_bases = true [[tool.mypy.overrides]] diff --git a/tests/test_sourcecode.py b/tests/test_sourcecode.py index 69527d8..e1c8bd6 100644 --- a/tests/test_sourcecode.py +++ b/tests/test_sourcecode.py @@ -82,6 +82,9 @@ def test_cqa_typecheck_ty(self) -> None: "-m", "ty", "check", + "src", + "tests", + ".github/scripts", ], check=True, capture_output=True, diff --git a/tests/test_upstream_api_url_check.py b/tests/test_upstream_api_url_check.py new file mode 100644 index 0000000..55598ae --- /dev/null +++ b/tests/test_upstream_api_url_check.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +import json +import sqlite3 +import subprocess +import sys +from typing import TYPE_CHECKING, cast + +from modelsdotdev._internal.dist import PROJECT_ROOT + +if TYPE_CHECKING: + from pathlib import Path + +CHECK_SCRIPT = ( + PROJECT_ROOT / ".github" / "scripts" / "check_upstream_api_urls.py" +) + + +def test_provider_url_change_requires_approval(tmp_path: Path) -> None: + baseline_db = _write_baseline_db( + tmp_path, + providers={"known": "https://old.example/v1"}, + ) + current_json = _write_current_json( + tmp_path, + { + "known": _provider( + api="https://new.example/v1", + ), + }, + ) + + payload, markdown = _run_check(tmp_path, baseline_db, current_json) + + assert payload["approval_required"] is True + assert payload["change_count"] == 1 + assert _changes(payload) == [ + { + "type": "Provider", + "provider": "known", + "model": None, + "previous_url": "https://old.example/v1", + "current_url": "https://new.example/v1", + }, + ] + assert "Manual approval is required" in markdown + assert "https://old.example/v1" in markdown + assert "https://new.example/v1" in markdown + + +def test_provider_url_add_and_remove_require_approval(tmp_path: Path) -> None: + baseline_db = _write_baseline_db( + tmp_path, + providers={ + "added": None, + "removed": "https://old.example/v1", + }, + ) + current_json = _write_current_json( + tmp_path, + { + "added": _provider(api="https://new.example/v1"), + "removed": _provider(), + }, + ) + + payload, _markdown = _run_check(tmp_path, baseline_db, current_json) + + assert payload["approval_required"] is True + assert _changes(payload) == [ + { + "type": "Provider", + "provider": "added", + "model": None, + "previous_url": None, + "current_url": "https://new.example/v1", + }, + { + "type": "Provider", + "provider": "removed", + "model": None, + "previous_url": "https://old.example/v1", + "current_url": None, + }, + ] + + +def test_model_override_url_changes_require_approval(tmp_path: Path) -> None: + baseline_db = _write_baseline_db( + tmp_path, + providers={"known": None}, + model_urls={ + ("known", "changed"): "https://old.example/v1", + ("known", "removed"): "https://removed.example/v1", + }, + ) + current_json = _write_current_json( + tmp_path, + { + "known": _provider( + models={ + "added": _model(api="https://added.example/v1"), + "changed": _model(api="https://new.example/v1"), + "removed": _model(), + }, + ), + }, + ) + + payload, markdown = _run_check(tmp_path, baseline_db, current_json) + + assert payload["approval_required"] is True + assert _changes(payload) == [ + { + "type": "Model override", + "provider": "known", + "model": "added", + "previous_url": None, + "current_url": "https://added.example/v1", + }, + { + "type": "Model override", + "provider": "known", + "model": "changed", + "previous_url": "https://old.example/v1", + "current_url": "https://new.example/v1", + }, + { + "type": "Model override", + "provider": "known", + "model": "removed", + "previous_url": "https://removed.example/v1", + "current_url": None, + }, + ] + assert "added" in markdown + assert "changed" in markdown + assert "removed" in markdown + + +def test_new_provider_urls_do_not_require_approval(tmp_path: Path) -> None: + baseline_db = _write_baseline_db( + tmp_path, + providers={"known": "https://known.example/v1"}, + ) + current_json = _write_current_json( + tmp_path, + { + "known": _provider(api="https://known.example/v1"), + "new": _provider( + api="https://new-provider.example/v1", + models={ + "new-model": _model( + api="https://new-model.example/v1", + ), + }, + ), + }, + ) + + payload, markdown = _run_check(tmp_path, baseline_db, current_json) + + assert payload["approval_required"] is False + assert payload["change_count"] == 0 + assert _changes(payload) == [] + assert "No protected upstream API URL changes" in markdown + + +def _write_baseline_db( + tmp_path: Path, + *, + providers: dict[str, str | None], + model_urls: dict[tuple[str, str], str] | None = None, +) -> Path: + path = tmp_path / "baseline.sqlite" + with sqlite3.connect(path) as connection: + connection.execute( + "CREATE TABLE providers (id TEXT PRIMARY KEY, api TEXT)" + ) + connection.execute( + """ + CREATE TABLE models ( + provider_id TEXT NOT NULL, + id TEXT NOT NULL, + provider_api TEXT + ) + """, + ) + connection.executemany( + "INSERT INTO providers (id, api) VALUES (?, ?)", + sorted(providers.items()), + ) + connection.executemany( + "INSERT INTO models (provider_id, id, provider_api) " + "VALUES (?, ?, ?)", + [ + (provider_id, model_id, api) + for (provider_id, model_id), api in sorted( + (model_urls or {}).items(), + ) + ], + ) + return path + + +def _write_current_json( + tmp_path: Path, + data: dict[str, object], +) -> Path: + path = tmp_path / "current.json" + path.write_text(json.dumps(data), encoding="utf-8") + return path + + +def _provider( + *, + api: str | None = None, + models: dict[str, object] | None = None, +) -> dict[str, object]: + provider: dict[str, object] = {"models": models or {}} + if api is not None: + provider["api"] = api + return provider + + +def _model(*, api: str | None = None) -> dict[str, object]: + model: dict[str, object] = {} + if api is not None: + model["provider"] = {"api": api} + return model + + +def _run_check( + tmp_path: Path, + baseline_db: Path, + current_json: Path, +) -> tuple[dict[str, object], str]: + json_output = tmp_path / "api-url-diff.json" + markdown_output = tmp_path / "api-url-diff.md" + subprocess.run( + [ + sys.executable, + str(CHECK_SCRIPT), + str(current_json), + "--baseline-db", + str(baseline_db), + "--baseline-version", + "0.20260515.1", + "--digest", + "abc123", + "--json-output", + str(json_output), + "--markdown-output", + str(markdown_output), + ], + check=True, + capture_output=True, + text=True, + ) + return ( + cast("dict[str, object]", json.loads(json_output.read_text())), + markdown_output.read_text(encoding="utf-8"), + ) + + +def _changes(payload: dict[str, object]) -> list[dict[str, object]]: + return cast("list[dict[str, object]]", payload["changes"])