diff --git a/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py b/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py index 792c2da148..37fc0bb755 100644 --- a/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py +++ b/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py @@ -33,6 +33,7 @@ "vllm-buildkite-agent-events": "vllm.vllm_buildkite_agents", "vllm-buildkite-build-events": "vllm.vllm_buildkite_builds", "vllm-buildkite-job-events": "vllm.vllm_buildkite_jobs", + "torchci-oot-workflow-job": "default.oot_workflow_job", } diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/params.json b/torchci/clickhouse_queries/oot_backend_dashboard/params.json new file mode 100644 index 0000000000..360d321eb7 --- /dev/null +++ b/torchci/clickhouse_queries/oot_backend_dashboard/params.json @@ -0,0 +1,12 @@ +{ + "params": { + "repo": "String", + "days": "UInt64" + }, + "tests": [ + { + "repo": "/", + "days": "7" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql new file mode 100644 index 0000000000..57131f88fa --- /dev/null +++ b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql @@ -0,0 +1,29 @@ +SELECT + pr_number, + pytorch_head_sha, + workflow_name, + job_name, + check_run_id, + run_id, + run_attempt, + status, + conclusion, + started_at, + completed_at, + duration_seconds, + total_tests, + passed_tests, + failed_tests, + skipped_tests, + workflow_run_url, + artifact_url, + queue_time, + execution_time +FROM + default.oot_workflow_job FINAL +WHERE + downstream_repo = {repo: String} + AND started_at > now() - INTERVAL {days: UInt64} DAY +ORDER BY + started_at DESC +LIMIT 500 diff --git a/torchci/clickhouse_queries/oot_pr_results/params.json b/torchci/clickhouse_queries/oot_pr_results/params.json new file mode 100644 index 0000000000..532f1d52f9 --- /dev/null +++ b/torchci/clickhouse_queries/oot_pr_results/params.json @@ -0,0 +1,10 @@ +{ + "params": { + "pr": "UInt64" + }, + "tests": [ + { + "pr": "179565" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_pr_results/query.sql b/torchci/clickhouse_queries/oot_pr_results/query.sql new file mode 100644 index 0000000000..11710adceb --- /dev/null +++ b/torchci/clickhouse_queries/oot_pr_results/query.sql @@ -0,0 +1,21 @@ +SELECT + downstream_repo, + workflow_name, + job_name, + check_run_id, + run_id, + run_attempt, + status, + conclusion, + duration_seconds, + workflow_run_url, + artifact_url, + started_at, + queue_time, + execution_time +FROM + default.oot_workflow_job FINAL +WHERE + pr_number = {pr: UInt64} +ORDER BY + downstream_repo, started_at DESC diff --git a/torchci/clickhouse_queries/oot_summary/params.json b/torchci/clickhouse_queries/oot_summary/params.json new file mode 100644 index 0000000000..108eccb7e9 --- /dev/null +++ b/torchci/clickhouse_queries/oot_summary/params.json @@ -0,0 +1,10 @@ +{ + "params": { + "days": "UInt64" + }, + "tests": [ + { + "days": "7" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_summary/query.sql b/torchci/clickhouse_queries/oot_summary/query.sql new file mode 100644 index 0000000000..3e92d385e5 --- /dev/null +++ b/torchci/clickhouse_queries/oot_summary/query.sql @@ -0,0 +1,18 @@ +SELECT + downstream_repo AS repo, + anyLast(downstream_repo_level) AS downstream_repo_level, + countIf(conclusion = 'success') AS successes, + countIf(conclusion = 'failure') AS failures, + count() AS total, + if(total > 0, successes / total, 0) AS pass_rate, + avg(duration_seconds) AS avg_duration_s, + max(started_at) AS last_run +FROM + default.oot_workflow_job FINAL +WHERE + started_at > now() - INTERVAL {days: UInt64} DAY + AND status = 'completed' +GROUP BY + repo +ORDER BY + pass_rate ASC diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx new file mode 100644 index 0000000000..0829394c7c --- /dev/null +++ b/torchci/components/oot/OotPrSection.tsx @@ -0,0 +1,145 @@ +import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; +import { + Accordion, + AccordionDetails, + AccordionSummary, + Chip, + Link, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from "@mui/material"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils"; +import useSWR from "swr"; + +interface OotPrResult { + downstream_repo: string; + workflow_name: string; + job_name: string; + check_run_id: string; + run_id: string; + run_attempt: number; + status: string; + conclusion: string; + duration_seconds: number; + workflow_run_url: string; + artifact_url: string; + started_at: string; + queue_time: number | null; + execution_time: number | null; +} + +export default function OotPrSection({ prNumber }: { prNumber: number }) { + const url = `/api/clickhouse/oot_pr_results?parameters=${encodeURIComponent( + JSON.stringify({ pr: String(prNumber) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + if (error || !data || data.length === 0) return null; + + const successCount = data.filter( + (r) => r.status === "completed" && r.conclusion === "success" + ).length; + const totalCompleted = data.filter((r) => r.status === "completed").length; + const inProgress = data.filter((r) => r.status === "in_progress").length; + + const summaryText = [ + totalCompleted > 0 ? `${successCount}/${totalCompleted} passed` : null, + inProgress > 0 ? `${inProgress} running` : null, + ] + .filter(Boolean) + .join(", "); + + return ( + + }> + + + Out-of-Tree Backends + + + ({summaryText}) + + + + + + + + + + Backend + + + Job + + + Status + + + Duration + + + Links + + + + + {data.map((row, i) => ( + + {row.downstream_repo} + {row.job_name} + + + + + {row.duration_seconds + ? durationDisplay(Math.round(row.duration_seconds)) + : "–"} + + + + {row.workflow_run_url && ( + + Run + + )} + {row.artifact_url && ( + + Artifacts + + )} + + + + ))} + +
+
+
+
+ ); +} diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts new file mode 100644 index 0000000000..994cbf5f51 --- /dev/null +++ b/torchci/lib/oot/ootUtils.ts @@ -0,0 +1,237 @@ +import { getDynamoClient } from "lib/dynamo"; + +const OOT_TABLE = "torchci-oot-workflow-job"; +const MAX_PAYLOAD_BYTES = 2 * 1024 * 1024; // 2MB + +// ---- Types ---- + +export interface RelayTrusted { + verified_repo: string; + downstream_repo_level?: string; // "L1" | "L2" | "L3" | "L4" — relay-determined from allowlist + ci_metrics?: { + queue_time?: number | null; + execution_time?: number | null; + }; +} + +export interface RelayWorkflow { + schema_version?: string; + status: string; + conclusion?: string | null; + name: string; + url: string; + job_name?: string; + check_run_id?: string; + run_id?: string; + run_attempt?: number | string; + started_at?: string; + completed_at?: string; + test_results?: any; + artifact_url?: string; +} + +export interface RelayCallbackPayload { + event_type: string; + delivery_id: string; + payload: { + pull_request?: { number: number; head?: { sha: string } }; + repository?: { full_name: string }; + [key: string]: any; + }; + workflow: RelayWorkflow; +} + +export interface RelayUntrusted { + callback_payload: RelayCallbackPayload; +} + +export interface RelayPayload { + trusted: RelayTrusted; + untrusted: RelayUntrusted; +} + +export interface OotWorkflowJobRecord { + dynamoKey: string; + status: string; + downstream_repo: string; + upstream_repo: string; + pr_number: number; + pytorch_head_sha: string; + delivery_id: string; + workflow_run_url: string; + workflow_name: string; + job_name: string; + check_run_id: string; + run_id: string; + run_attempt: number; + conclusion?: string; + queue_time?: number | null; + execution_time?: number | null; + started_at?: string; + completed_at?: string; + total_tests?: number; + passed_tests?: number; + failed_tests?: number; + skipped_tests?: number; + failed_tests_json?: string; + downstream_repo_level?: string; + artifact_url?: string; + environment?: string; +} + +// ---- Validation ---- + +export function validatePayloadSize(bodyString: string): void { + if (Buffer.byteLength(bodyString, "utf-8") > MAX_PAYLOAD_BYTES) { + throw new ApiError(413, "Payload exceeds 2MB limit"); + } +} + +// ---- Extraction ---- + +export function extractDynamoRecord( + payload: RelayPayload +): OotWorkflowJobRecord { + const { trusted, untrusted } = payload; + const cb = untrusted.callback_payload; + const wf = cb.workflow; + const pr = cb.payload?.pull_request; + const upstreamRepo = cb.payload?.repository?.full_name ?? "pytorch/pytorch"; + + if (!wf.job_name) { + throw new ApiError(400, "Missing required field: workflow.job_name"); + } + if (wf.check_run_id == null) { + throw new ApiError(400, "Missing required field: workflow.check_run_id"); + } + const jobName = wf.job_name; + const checkRunId = String(wf.check_run_id); + const runAttempt = Number(wf.run_attempt ?? 1) || 1; + const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}/${jobName}/${checkRunId}`; + + const record: OotWorkflowJobRecord = { + dynamoKey, + status: wf.status, + downstream_repo: trusted.verified_repo, + upstream_repo: upstreamRepo, + pr_number: pr?.number ?? 0, + pytorch_head_sha: pr?.head?.sha ?? "", + delivery_id: cb.delivery_id, + workflow_run_url: wf.url ?? "", + workflow_name: wf.name, + job_name: jobName, + check_run_id: checkRunId, + run_id: wf.run_id ?? "", + run_attempt: runAttempt, + }; + + if (trusted.downstream_repo_level) { + record.downstream_repo_level = trusted.downstream_repo_level; + } + + // Only set timing metrics when the relay provides a non-null value. + // in_progress sets queue_time; completed sets execution_time. + // Using UpdateItem ensures the completed callback doesn't clobber + // queue_time with null. + if (trusted.ci_metrics?.queue_time != null) { + record.queue_time = trusted.ci_metrics.queue_time; + } + if (trusted.ci_metrics?.execution_time != null) { + record.execution_time = trusted.ci_metrics.execution_time; + } + + // Use downstream-reported timestamps, not HUD wall-clock time + if (wf.started_at) { + record.started_at = wf.started_at; + } + + if (wf.artifact_url) { + record.artifact_url = wf.artifact_url; + } + + if (wf.status === "completed") { + record.conclusion = wf.conclusion ?? undefined; + if (wf.completed_at) { + record.completed_at = wf.completed_at; + } + + if (wf.test_results) { + const tr = wf.test_results; + if (typeof tr.passed === "number") record.passed_tests = tr.passed; + if (typeof tr.failed === "number") record.failed_tests = tr.failed; + if (typeof tr.skipped === "number") record.skipped_tests = tr.skipped; + record.total_tests = + typeof tr.total === "number" + ? tr.total + : (tr.passed ?? 0) + (tr.failed ?? 0) + (tr.skipped ?? 0); + } + } + + return record; +} + +// ---- DynamoDB Write (UpdateItem) ---- + +export async function writeToDynamo( + record: OotWorkflowJobRecord +): Promise { + const client = getDynamoClient(); + + // Build SET expression dynamically — only set non-undefined fields. + // This prevents completed callbacks from clobbering in_progress-only + // fields (queue_time, started_at) with null. + const expressionParts: string[] = []; + const expressionValues: Record = {}; + const expressionNames: Record = {}; + + for (const [key, value] of Object.entries(record)) { + if (key === "dynamoKey" || value === undefined) continue; + const placeholder = `:v_${key}`; + const nameAlias = `#n_${key}`; + expressionParts.push(`${nameAlias} = ${placeholder}`); + expressionValues[placeholder] = value; + expressionNames[nameAlias] = key; + } + + await client.update({ + TableName: OOT_TABLE, + Key: { dynamoKey: record.dynamoKey }, + UpdateExpression: `SET ${expressionParts.join(", ")}`, + ExpressionAttributeValues: expressionValues, + ExpressionAttributeNames: expressionNames, + }); +} + +// ---- UI Helpers ---- + +export type ChipColor = "success" | "error" | "warning" | "info" | "default"; + +export function conclusionColor(status: string, conclusion: string): ChipColor { + if (status === "in_progress") return "info"; + switch (conclusion) { + case "success": + return "success"; + case "failure": + return "error"; + case "cancelled": + case "timed_out": + return "warning"; + default: + return "default"; + } +} + +export function conclusionLabel(status: string, conclusion: string): string { + if (status === "in_progress") return "running"; + return conclusion || status; +} + +// ---- Error Helper ---- + +export class ApiError extends Error { + statusCode: number; + constructor(statusCode: number, message: string) { + super(message); + this.statusCode = statusCode; + } +} diff --git a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx index b1830bcd15..387bfc6a16 100644 --- a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx +++ b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx @@ -3,6 +3,7 @@ import { CommitInfo } from "components/commit/CommitInfo"; import DrCIButton from "components/common/DrCIButton"; import ErrorBoundary from "components/common/ErrorBoundary"; import { useSetTitle } from "components/layout/DynamicTitle"; +import OotPrSection from "components/oot/OotPrSection"; import { fetcher } from "lib/GeneralUtils"; import { PRData } from "lib/types"; import { useRouter } from "next/router"; @@ -122,6 +123,9 @@ function Page() { /> )} + + {prNumber && } + ); } diff --git a/torchci/pages/api/oot/results.ts b/torchci/pages/api/oot/results.ts new file mode 100644 index 0000000000..8bcfa6f62b --- /dev/null +++ b/torchci/pages/api/oot/results.ts @@ -0,0 +1,67 @@ +import { timingSafeEqual } from "crypto"; +import { + ApiError, + extractDynamoRecord, + validatePayloadSize, + writeToDynamo, +} from "lib/oot/ootUtils"; +import type { NextApiRequest, NextApiResponse } from "next"; + +export const config = { + api: { + bodyParser: { + sizeLimit: "2mb", + }, + }, +}; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ error: "Method not allowed" }); + } + + try { + // 1. Auth: dedicated X-OOT-Relay-Token header (timing-safe comparison) + const expected = process.env.OOT_RELAY_TOKEN; + if (!expected) { + return res.status(500).json({ error: "Server misconfigured" }); + } + const raw = req.headers["x-oot-relay-token"]; + if (typeof raw !== "string") { + return res.status(401).json({ error: "Unauthorized" }); + } + const a = new Uint8Array(Buffer.from(raw)); + const b = new Uint8Array(Buffer.from(expected)); + if (a.length !== b.length || !timingSafeEqual(a, b)) { + return res.status(401).json({ error: "Unauthorized" }); + } + + // 2. Payload size cap (safety net — relay should also enforce this) + const rawBody = + typeof req.body === "string" ? req.body : JSON.stringify(req.body); + validatePayloadSize(rawBody); + + // 3. Extract and write to DynamoDB via UpdateItem + // Schema validation is done by the relay before forwarding. + const body = typeof req.body === "string" ? JSON.parse(req.body) : req.body; + const record = extractDynamoRecord(body); + await writeToDynamo(record); + + return res.status(200).json({ + ok: true, + status: record.status, + dynamoKey: record.dynamoKey, + }); + } catch (err: any) { + if (err instanceof ApiError) { + return res.status(err.statusCode).json({ error: err.message }); + } + console.error("OOT results handler error:", err); + return res + .status(500) + .json({ error: "Internal error writing to DynamoDB" }); + } +} diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx new file mode 100644 index 0000000000..48624f1e71 --- /dev/null +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -0,0 +1,288 @@ +import { + Box, + Chip, + FormControl, + InputLabel, + Link, + MenuItem, + Paper, + Select, + SelectChangeEvent, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Tooltip, + Typography, +} from "@mui/material"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils"; +import Head from "next/head"; +import NextLink from "next/link"; +import { useRouter } from "next/router"; +import { useMemo, useState } from "react"; +import useSWR from "swr"; + +interface OotJobRow { + pr_number: number; + pytorch_head_sha: string; + workflow_name: string; + job_name: string; + check_run_id: string; + run_id: string; + run_attempt: number; + status: string; + conclusion: string; + started_at: string; + completed_at: string; + duration_seconds: number; + total_tests: number; + passed_tests: number; + failed_tests: number; + skipped_tests: number; + workflow_run_url: string; + artifact_url: string; + queue_time: number | null; + execution_time: number | null; +} + +function JobChip({ job }: { job: OotJobRow }) { + const color = conclusionColor(job.status, job.conclusion); + const label = conclusionLabel(job.status, job.conclusion); + const tooltipContent = [ + `Job: ${job.job_name}`, + job.run_attempt > 1 ? `Attempt: ${job.run_attempt}` : null, + `Duration: ${ + job.duration_seconds + ? durationDisplay(Math.round(job.duration_seconds)) + : "–" + }`, + job.total_tests + ? `Tests: ${job.passed_tests}/${job.total_tests} passed` + : null, + job.queue_time != null ? `Queue: ${job.queue_time.toFixed(1)}s` : null, + ] + .filter(Boolean) + .join("\n"); + + return ( + {tooltipContent}} + > + + + ); +} + +interface MatrixRow { + prNumber: number; + sha: string; + jobs: Map; +} + +function buildMatrix(data: OotJobRow[]): { + jobNames: string[]; + rows: MatrixRow[]; +} { + const jobNamesSet = new Set(); + const prMap = new Map(); + + for (const job of data) { + jobNamesSet.add(job.job_name); + let row = prMap.get(job.pr_number); + if (!row) { + row = { + prNumber: job.pr_number, + sha: job.pytorch_head_sha, + jobs: new Map(), + }; + prMap.set(job.pr_number, row); + } + // Keep the latest attempt per job_name (highest run_attempt wins) + const existing = row.jobs.get(job.job_name); + if (!existing || job.run_attempt > existing.run_attempt) { + row.jobs.set(job.job_name, job); + } + } + + const jobNames = Array.from(jobNamesSet).sort(); + const rows = Array.from(prMap.values()).sort( + (a, b) => b.prNumber - a.prNumber + ); + return { jobNames, rows }; +} + +function HealthSummary({ data }: { data: OotJobRow[] }) { + const completed = data.filter((j) => j.status === "completed"); + const total = completed.length; + const success = completed.filter((j) => j.conclusion === "success").length; + const rate = total > 0 ? success / total : 0; + + return ( + + = 0.95 ? "success" : rate >= 0.8 ? "warning" : "error"} + /> + + {success}/{total} jobs passed + + + ); +} + +function OotMatrix({ + repoFullName, + days, +}: { + repoFullName: string; + days: number; +}) { + const url = `/api/clickhouse/oot_backend_dashboard?parameters=${encodeURIComponent( + JSON.stringify({ repo: repoFullName, days: String(days) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + const matrix = useMemo(() => (data ? buildMatrix(data) : null), [data]); + + if (error) { + return ( + + Failed to load dashboard: {error.message} + + ); + } + if (!data || !matrix) { + return ; + } + if (data.length === 0) { + return ( + + No results for {repoFullName} in the last {days} days. + + ); + } + + return ( + <> + + + + + + + PR + + + SHA + + {matrix.jobNames.map((name) => ( + + {name} + + ))} + + + + {matrix.rows.map((row) => ( + + + + + #{row.prNumber} + + + + + + {row.sha.slice(0, 7)} + + + {matrix.jobNames.map((name) => { + const job = row.jobs.get(name); + return ( + + {job ? : "–"} + + ); + })} + + ))} + +
+
+ + ); +} + +export default function OotBackendPage() { + const router = useRouter(); + const { org, repo } = router.query; + const [days, setDays] = useState(7); + + if (!org || !repo) return null; + + const repoFullName = `${org}/${repo}`; + + return ( + <> + + {repoFullName} — OOT CI | PyTorch HUD + + + + + {repoFullName} + + + ← Back to OOT Summary + + + + + Time Range + + + + + + Rows = PyTorch PRs, columns = downstream CI jobs. Click a chip to open + the workflow run. + + + + + + ); +} diff --git a/torchci/pages/oot/index.tsx b/torchci/pages/oot/index.tsx new file mode 100644 index 0000000000..49b30350ae --- /dev/null +++ b/torchci/pages/oot/index.tsx @@ -0,0 +1,181 @@ +import { + Box, + Chip, + FormControl, + InputLabel, + Link, + MenuItem, + Paper, + Select, + SelectChangeEvent, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from "@mui/material"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import Head from "next/head"; +import NextLink from "next/link"; +import { useState } from "react"; +import useSWR from "swr"; + +interface OotSummaryRow { + repo: string; + downstream_repo_level: string; + successes: number; + failures: number; + total: number; + pass_rate: number; + avg_duration_s: number; + last_run: string; +} + +function PassRateChip({ rate }: { rate: number }) { + const pct = (rate * 100).toFixed(1) + "%"; + if (rate >= 0.95) return ; + if (rate >= 0.8) return ; + return ; +} + +function OotSummaryTable({ days }: { days: number }) { + const url = `/api/clickhouse/oot_summary?parameters=${encodeURIComponent( + JSON.stringify({ days: String(days) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + if (error) { + return ( + + Failed to load OOT summary: {error.message} + + ); + } + if (!data) { + return ; + } + if (data.length === 0) { + return ( + + No OOT CI results in the last {days} days. + + ); + } + + return ( + + + + + + Backend Repository + + + Level + + + Pass Rate + + + Success + + + Failures + + + Total + + + Avg Duration + + + Last Run + + + + + {data.map((row) => { + const [org, repo] = row.repo.split("/"); + return ( + + + + {row.repo} + + + + + + + + + {row.successes} + {row.failures} + {row.total} + + {durationDisplay(Math.round(row.avg_duration_s))} + + + {new Date(row.last_run).toLocaleString()} + + + ); + })} + +
+
+ ); +} + +export default function OotSummaryPage() { + const [days, setDays] = useState(7); + + return ( + <> + + Out-of-Tree CI Summary | PyTorch HUD + + + + Out-of-Tree CI Summary + + Time Range + + + + + + Cross-repo CI health overview. Repos sorted by pass rate (worst + first). Click a row to see the per-backend dashboard. + + + + + + ); +} diff --git a/torchci/test/ootResults.test.ts b/torchci/test/ootResults.test.ts new file mode 100644 index 0000000000..968277aaf5 --- /dev/null +++ b/torchci/test/ootResults.test.ts @@ -0,0 +1,167 @@ +import { NextApiRequest, NextApiResponse } from "next"; +import * as ootUtils from "../lib/oot/ootUtils"; +import handler from "../pages/api/oot/results"; + +jest.mock("../lib/oot/ootUtils", () => { + const actual = jest.requireActual("../lib/oot/ootUtils"); + return { + ...actual, + writeToDynamo: jest.fn().mockResolvedValue(undefined), + }; +}); + +const VALID_TOKEN = "test-relay-token-abc123"; + +function mockReq(overrides: Partial = {}): NextApiRequest { + return { + method: "POST", + headers: { "x-oot-relay-token": VALID_TOKEN }, + body: { + trusted: { + verified_repo: "Ascend/pytorch", + downstream_repo_level: "L2", + ci_metrics: { queue_time: 5.0, execution_time: null }, + }, + untrusted: { + callback_payload: { + event_type: "workflow_job", + delivery_id: "del-001", + payload: { + pull_request: { number: 100, head: { sha: "abc123" } }, + repository: { full_name: "pytorch/pytorch" }, + }, + workflow: { + status: "in_progress", + name: "npu-ci", + url: "https://github.com/Ascend/pytorch/actions/runs/1", + job_name: "build", + check_run_id: "9001", + run_id: "555", + run_attempt: 1, + started_at: "2026-05-20T10:00:00Z", + }, + }, + }, + }, + ...overrides, + } as unknown as NextApiRequest; +} + +function mockRes(): NextApiResponse & { _status: number; _json: any } { + const res: any = { + _status: 0, + _json: null, + status(code: number) { + res._status = code; + return res; + }, + json(data: any) { + res._json = data; + return res; + }, + }; + return res; +} + +describe("POST /api/oot/results", () => { + const originalEnv = process.env; + + beforeEach(() => { + process.env = { ...originalEnv, OOT_RELAY_TOKEN: VALID_TOKEN }; + jest.clearAllMocks(); + }); + + afterAll(() => { + process.env = originalEnv; + }); + + test("rejects non-POST methods with 405", async () => { + const res = mockRes(); + await handler(mockReq({ method: "GET" }), res); + expect(res._status).toBe(405); + expect(res._json.error).toBe("Method not allowed"); + }); + + test("returns 500 when OOT_RELAY_TOKEN env is not set", async () => { + delete process.env.OOT_RELAY_TOKEN; + const res = mockRes(); + await handler(mockReq(), res); + expect(res._status).toBe(500); + expect(res._json.error).toBe("Server misconfigured"); + }); + + test("returns 401 when token header is missing", async () => { + const res = mockRes(); + await handler(mockReq({ headers: {} }), res); + expect(res._status).toBe(401); + expect(res._json.error).toBe("Unauthorized"); + }); + + test("returns 401 when token header is wrong", async () => { + const res = mockRes(); + await handler( + mockReq({ headers: { "x-oot-relay-token": "wrong-token" } }), + res + ); + expect(res._status).toBe(401); + expect(res._json.error).toBe("Unauthorized"); + }); + + test("returns 401 when token has different length", async () => { + const res = mockRes(); + await handler(mockReq({ headers: { "x-oot-relay-token": "short" } }), res); + expect(res._status).toBe(401); + expect(res._json.error).toBe("Unauthorized"); + }); + + test("returns 200 and writes to DynamoDB on valid request", async () => { + const res = mockRes(); + await handler(mockReq(), res); + expect(res._status).toBe(200); + expect(res._json.ok).toBe(true); + expect(res._json.status).toBe("in_progress"); + expect(res._json.dynamoKey).toContain("Ascend/pytorch"); + expect(ootUtils.writeToDynamo).toHaveBeenCalledTimes(1); + }); + + test("returns 400 when required fields are missing", async () => { + const body = { + trusted: { verified_repo: "test/repo" }, + untrusted: { + callback_payload: { + event_type: "workflow_job", + delivery_id: "del-002", + payload: {}, + workflow: { + status: "in_progress", + name: "ci", + url: "https://example.com", + // job_name intentionally missing + }, + }, + }, + }; + const res = mockRes(); + await handler(mockReq({ body }), res); + expect(res._status).toBe(400); + expect(res._json.error).toContain("job_name"); + }); + + test("returns 500 when DynamoDB write fails", async () => { + (ootUtils.writeToDynamo as jest.Mock).mockRejectedValueOnce( + new Error("DynamoDB connection failed") + ); + const res = mockRes(); + await handler(mockReq(), res); + expect(res._status).toBe(500); + expect(res._json.error).toContain("Internal error"); + }); + + test("handles string body by parsing JSON", async () => { + const body = JSON.stringify(mockReq().body); + const res = mockRes(); + await handler(mockReq({ body }), res); + expect(res._status).toBe(200); + expect(res._json.ok).toBe(true); + }); +}); diff --git a/torchci/test/ootUtils.test.ts b/torchci/test/ootUtils.test.ts new file mode 100644 index 0000000000..847235461e --- /dev/null +++ b/torchci/test/ootUtils.test.ts @@ -0,0 +1,318 @@ +import { + ApiError, + conclusionColor, + conclusionLabel, + extractDynamoRecord, + RelayPayload, + validatePayloadSize, +} from "../lib/oot/ootUtils"; + +function makePayload( + overrides: { + trusted?: any; + workflow?: any; + callback?: any; + } = {} +): RelayPayload { + return { + trusted: { + verified_repo: "Ascend/pytorch", + downstream_repo_level: "L2", + ci_metrics: { queue_time: 12.5, execution_time: null }, + ...overrides.trusted, + }, + untrusted: { + callback_payload: { + event_type: "workflow_job", + delivery_id: "delivery-abc-123", + payload: { + pull_request: { number: 12345, head: { sha: "abcdef1234567890" } }, + repository: { full_name: "pytorch/pytorch" }, + }, + workflow: { + schema_version: "1", + status: "in_progress", + name: "npu-ci", + url: "https://github.com/Ascend/pytorch/actions/runs/123", + job_name: "build-and-test", + check_run_id: "99001", + run_id: "123456", + run_attempt: 1, + started_at: "2026-05-20T10:00:00Z", + ...overrides.workflow, + }, + ...overrides.callback, + }, + }, + }; +} + +describe("extractDynamoRecord", () => { + test("extracts correct dynamoKey format", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.dynamoKey).toBe( + "Ascend/pytorch/delivery-abc-123/npu-ci/build-and-test/99001" + ); + }); + + test("maps all basic fields correctly", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.status).toBe("in_progress"); + expect(record.downstream_repo).toBe("Ascend/pytorch"); + expect(record.upstream_repo).toBe("pytorch/pytorch"); + expect(record.pr_number).toBe(12345); + expect(record.pytorch_head_sha).toBe("abcdef1234567890"); + expect(record.delivery_id).toBe("delivery-abc-123"); + expect(record.workflow_name).toBe("npu-ci"); + expect(record.job_name).toBe("build-and-test"); + expect(record.check_run_id).toBe("99001"); + expect(record.run_id).toBe("123456"); + expect(record.run_attempt).toBe(1); + }); + + test("sets downstream_repo_level from trusted payload", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.downstream_repo_level).toBe("L2"); + }); + + test("omits downstream_repo_level when not provided", () => { + const record = extractDynamoRecord( + makePayload({ trusted: { downstream_repo_level: undefined } }) + ); + expect(record.downstream_repo_level).toBeUndefined(); + }); + + test("sets queue_time from ci_metrics when non-null", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.queue_time).toBe(12.5); + }); + + test("omits execution_time when ci_metrics value is null", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.execution_time).toBeUndefined(); + }); + + test("sets execution_time when provided", () => { + const record = extractDynamoRecord( + makePayload({ + trusted: { ci_metrics: { queue_time: null, execution_time: 45.2 } }, + }) + ); + expect(record.execution_time).toBe(45.2); + expect(record.queue_time).toBeUndefined(); + }); + + test("sets started_at from workflow", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.started_at).toBe("2026-05-20T10:00:00Z"); + }); + + test("sets artifact_url when provided", () => { + const record = extractDynamoRecord( + makePayload({ + workflow: { artifact_url: "https://example.com/artifacts.zip" }, + }) + ); + expect(record.artifact_url).toBe("https://example.com/artifacts.zip"); + }); + + test("omits artifact_url when not provided", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.artifact_url).toBeUndefined(); + }); + + test("coerces string run_attempt to number", () => { + const record = extractDynamoRecord( + makePayload({ workflow: { run_attempt: "3" } }) + ); + expect(record.run_attempt).toBe(3); + }); + + test("defaults run_attempt to 1 when missing", () => { + const record = extractDynamoRecord( + makePayload({ workflow: { run_attempt: undefined } }) + ); + expect(record.run_attempt).toBe(1); + }); + + test("defaults upstream_repo to pytorch/pytorch when missing", () => { + const record = extractDynamoRecord( + makePayload({ callback: { payload: {} } }) + ); + expect(record.upstream_repo).toBe("pytorch/pytorch"); + }); + + test("defaults pr_number to 0 when no pull_request", () => { + const record = extractDynamoRecord( + makePayload({ + callback: { payload: { repository: { full_name: "pytorch/pytorch" } } }, + }) + ); + expect(record.pr_number).toBe(0); + expect(record.pytorch_head_sha).toBe(""); + }); + + // --- completed status --- + + test("sets conclusion and completed_at for completed status", () => { + const record = extractDynamoRecord( + makePayload({ + workflow: { + status: "completed", + conclusion: "success", + completed_at: "2026-05-20T10:30:00Z", + }, + }) + ); + expect(record.conclusion).toBe("success"); + expect(record.completed_at).toBe("2026-05-20T10:30:00Z"); + }); + + test("does not set conclusion for in_progress status", () => { + const record = extractDynamoRecord(makePayload()); + expect(record.conclusion).toBeUndefined(); + expect(record.completed_at).toBeUndefined(); + }); + + // --- test_results --- + + test("extracts test results with explicit total", () => { + const record = extractDynamoRecord( + makePayload({ + workflow: { + status: "completed", + conclusion: "failure", + test_results: { passed: 100, failed: 5, skipped: 10, total: 115 }, + }, + }) + ); + expect(record.passed_tests).toBe(100); + expect(record.failed_tests).toBe(5); + expect(record.skipped_tests).toBe(10); + expect(record.total_tests).toBe(115); + }); + + test("computes total_tests from passed+failed+skipped when total absent", () => { + const record = extractDynamoRecord( + makePayload({ + workflow: { + status: "completed", + conclusion: "failure", + test_results: { passed: 80, failed: 3, skipped: 7 }, + }, + }) + ); + expect(record.total_tests).toBe(90); + }); + + test("does not set test results for in_progress status", () => { + const record = extractDynamoRecord( + makePayload({ + workflow: { test_results: { passed: 10, failed: 0, skipped: 0 } }, + }) + ); + expect(record.passed_tests).toBeUndefined(); + expect(record.total_tests).toBeUndefined(); + }); + + // --- validation errors --- + + test("throws 400 when job_name is missing", () => { + expect(() => + extractDynamoRecord(makePayload({ workflow: { job_name: "" } })) + ).toThrow(ApiError); + try { + extractDynamoRecord(makePayload({ workflow: { job_name: undefined } })); + } catch (e: any) { + expect(e).toBeInstanceOf(ApiError); + expect(e.statusCode).toBe(400); + expect(e.message).toContain("job_name"); + } + }); + + test("throws 400 when check_run_id is missing", () => { + expect(() => + extractDynamoRecord( + makePayload({ workflow: { check_run_id: undefined } }) + ) + ).toThrow(ApiError); + try { + extractDynamoRecord(makePayload({ workflow: { check_run_id: null } })); + } catch (e: any) { + expect(e).toBeInstanceOf(ApiError); + expect(e.statusCode).toBe(400); + expect(e.message).toContain("check_run_id"); + } + }); +}); + +describe("validatePayloadSize", () => { + test("accepts payload under 2MB", () => { + expect(() => validatePayloadSize("short payload")).not.toThrow(); + }); + + test("throws 413 for payload exceeding 2MB", () => { + const largePayload = "x".repeat(2 * 1024 * 1024 + 1); + expect(() => validatePayloadSize(largePayload)).toThrow(ApiError); + try { + validatePayloadSize(largePayload); + } catch (e: any) { + expect(e.statusCode).toBe(413); + } + }); + + test("accepts payload exactly at 2MB boundary", () => { + const exactPayload = "x".repeat(2 * 1024 * 1024); + expect(() => validatePayloadSize(exactPayload)).not.toThrow(); + }); +}); + +describe("conclusionColor", () => { + test("returns info for in_progress", () => { + expect(conclusionColor("in_progress", "")).toBe("info"); + }); + + test("returns success for success", () => { + expect(conclusionColor("completed", "success")).toBe("success"); + }); + + test("returns error for failure", () => { + expect(conclusionColor("completed", "failure")).toBe("error"); + }); + + test("returns warning for cancelled", () => { + expect(conclusionColor("completed", "cancelled")).toBe("warning"); + }); + + test("returns warning for timed_out", () => { + expect(conclusionColor("completed", "timed_out")).toBe("warning"); + }); + + test("returns default for unknown conclusion", () => { + expect(conclusionColor("completed", "unknown")).toBe("default"); + }); +}); + +describe("conclusionLabel", () => { + test("returns running for in_progress", () => { + expect(conclusionLabel("in_progress", "")).toBe("running"); + }); + + test("returns conclusion when completed", () => { + expect(conclusionLabel("completed", "success")).toBe("success"); + expect(conclusionLabel("completed", "failure")).toBe("failure"); + }); + + test("falls back to status when conclusion is empty", () => { + expect(conclusionLabel("completed", "")).toBe("completed"); + }); +}); + +describe("ApiError", () => { + test("has correct statusCode and message", () => { + const err = new ApiError(404, "Not found"); + expect(err).toBeInstanceOf(Error); + expect(err.statusCode).toBe(404); + expect(err.message).toBe("Not found"); + }); +});