Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pr-connect-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
name: Deploy and test on Posit Connect
runs-on: ubuntu-latest
# Skip gracefully if the license secret is absent (forks, community PRs)
if: ${{ vars.CONNECT_LICENSE_AVAILABLE == 'true' || github.repository_owner == 'posit-dev' }}
if: ${{ (vars.CONNECT_LICENSE_AVAILABLE == 'true' || github.repository_owner == 'posit-dev') && secrets.CONNECT_LICENSE != '' }}

steps:
# ── Checkout & build ─────────────────────────────────────────────────
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr-evals.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:

# ── Semantic quality evals (needs ANTHROPIC_API_KEY) ──────────────────
- name: Run list_indicators_quality eval
if: ${{ env.ANTHROPIC_API_KEY != '' }}
if: ${{ secrets.ANTHROPIC_API_KEY != '' }}
working-directory: evals/inspect
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
Expand All @@ -62,7 +62,7 @@ jobs:

# ── Integration evals (needs GITHUB_TOKEN) ────────────────────────────
- name: Run adoption_agent_eval
if: ${{ env.ANTHROPIC_API_KEY != '' }}
if: ${{ secrets.ANTHROPIC_API_KEY != '' }}
working-directory: evals/inspect
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
Expand Down
4 changes: 4 additions & 0 deletions agentic-adoption-scan/mcpserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,10 @@ func makeAdoptionSummaryHandler(cfg MCPServerConfig) server.ToolHandlerFunc {
topRepos = topRepos[:20]
}

if totalRepos == 0 {
return mcp.NewToolResultError(fmt.Sprintf("no cached scan data for org %s — run scan_org first", org)), nil
}

reposWithAny := len(repoIndicators)

return marshalToolResult(adoptionSummary{
Expand Down
6 changes: 5 additions & 1 deletion evals/connect/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,14 @@ def mcp_url(deployed_content_url: str) -> str:
@pytest.fixture(scope="session")
def http_client(connect_api_key: str) -> httpx.Client:
"""An httpx client authenticated to Connect."""
return httpx.Client(
client = httpx.Client(
headers={"Authorization": f"Key {connect_api_key}"},
timeout=60,
)
try:
yield client
finally:
client.close()


def wait_for_deployment(url: str, timeout: int = 120) -> None:
Expand Down
4 changes: 4 additions & 0 deletions evals/connect/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[build-system]
requires = ["uv_build>=0.6.0,<0.11.0"]
build-backend = "uv_build"

[project]
name = "connect-deploy-tests"
version = "0.1.0"
Expand Down
Empty file.
19 changes: 12 additions & 7 deletions evals/inspect/mcp_eval_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,11 @@ def _call_mcp_tool(tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:

with tempfile.TemporaryDirectory() as cache_dir:
result = subprocess.run(
[binary, "serve"],
[binary, "serve", "--cache-dir", cache_dir],
input=stdin_data,
capture_output=True,
text=True,
timeout=30,
env={**os.environ, "MCP_CACHE_DIR": cache_dir},
)

# Parse newline-delimited JSON-RPC responses, pick the one with id=2
Expand Down Expand Up @@ -315,6 +314,13 @@ def list_indicators_quality() -> Task:
Requires ANTHROPIC_API_KEY. Run with:
inspect eval mcp_eval_tasks.py --task list_indicators_quality -M anthropic/claude-sonnet-4-6
"""
if "ANTHROPIC_API_KEY" not in os.environ:
# Return a trivial no-op task so this eval can run safely without secrets.
return Task(
dataset=MemoryDataset([]),
solver=[],
scorer=accuracy(),
)
return Task(
dataset=MemoryDataset(
[
Expand Down Expand Up @@ -371,23 +377,22 @@ def adoption_agent_eval() -> Task:

The answer is scored by a judge model.

Skip gracefully if GITHUB_TOKEN is unavailable.
Skip gracefully if GITHUB_TOKEN or ANTHROPIC_API_KEY is unavailable.
"""
if not _has_github_token():
if not _has_github_token() or "ANTHROPIC_API_KEY" not in os.environ:
# Return a trivially-passing placeholder task so CI doesn't fail
return Task(
dataset=MemoryDataset(
[
Sample(
input="SKIP: GITHUB_TOKEN not set",
input="SKIP: GITHUB_TOKEN or ANTHROPIC_API_KEY not set",
target="SKIPPED",
)
]
),
solver=_skip_solver(),
scorer=_skip_scorer(),
)

# Use posit-dev/py-shiny as a well-known small org with known indicators
target_org = "posit-dev"
target_repo = "py-shiny"
Expand Down Expand Up @@ -433,6 +438,6 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
@scorer(metrics=[accuracy()])
def _skip_scorer():
async def score(state: TaskState, target: Target) -> Score:
return Score(value=1, explanation="Skipped — GITHUB_TOKEN not set")
return Score(value=1, explanation="Skipped — required secrets not available")

return score
4 changes: 4 additions & 0 deletions evals/inspect/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[build-system]
requires = ["uv_build>=0.6.0,<0.11.0"]
build-backend = "uv_build"

[project]
name = "mcp-evals"
version = "0.1.0"
Expand Down
Empty file.
Loading