From c22a50bed8d30122a00c3a73eecf044364919cf8 Mon Sep 17 00:00:00 2001 From: Subin George Date: Fri, 24 Apr 2026 15:52:09 +0530 Subject: [PATCH 01/15] [OOT HUD] Full pipeline: API endpoint, ClickHouse schema, replicator mapping, and frontend pages Implements the HUD-side ingestion and display for Out-of-Tree CI results, as described in the OOT HUD RFC V3. The relay (PR #7967) forwards {trusted, untrusted} payloads to the new /api/oot/results endpoint, which validates, extracts fields, and writes to DynamoDB. DynamoDB Streams replicates to ClickHouse via the existing replicator Lambda. Three frontend views display the results: a global OOT summary, a per-backend matrix dashboard, and a collapsible section on PR pages. --- .../lambda_function.py | 1 + .../default.oot_workflow_job/schema.sql | 36 ++ .../oot_backend_dashboard/params.json | 12 + .../oot_backend_dashboard/query.sql | 25 ++ .../oot_pr_results/params.json | 10 + .../oot_pr_results/query.sql | 17 + .../oot_summary/params.json | 10 + .../clickhouse_queries/oot_summary/query.sql | 17 + torchci/components/oot/OotPrSection.tsx | 170 ++++++++++ torchci/lib/oot/ootUtils.ts | 217 +++++++++++++ .../[repoName]/pull/[prNumber].tsx | 6 + torchci/pages/api/oot/results.ts | 63 ++++ torchci/pages/oot/[org]/[repo].tsx | 307 ++++++++++++++++++ torchci/pages/oot/index.tsx | 170 ++++++++++ 14 files changed, 1061 insertions(+) create mode 100644 clickhouse_db_schema/default.oot_workflow_job/schema.sql create mode 100644 torchci/clickhouse_queries/oot_backend_dashboard/params.json create mode 100644 torchci/clickhouse_queries/oot_backend_dashboard/query.sql create mode 100644 torchci/clickhouse_queries/oot_pr_results/params.json create mode 100644 torchci/clickhouse_queries/oot_pr_results/query.sql create mode 100644 torchci/clickhouse_queries/oot_summary/params.json create mode 100644 torchci/clickhouse_queries/oot_summary/query.sql create mode 100644 torchci/components/oot/OotPrSection.tsx create mode 100644 torchci/lib/oot/ootUtils.ts create mode 100644 torchci/pages/api/oot/results.ts create mode 100644 torchci/pages/oot/[org]/[repo].tsx create mode 100644 torchci/pages/oot/index.tsx diff --git a/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py b/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py index 792c2da148..37fc0bb755 100644 --- a/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py +++ b/aws/lambda/clickhouse-replicator-dynamo/lambda_function.py @@ -33,6 +33,7 @@ "vllm-buildkite-agent-events": "vllm.vllm_buildkite_agents", "vllm-buildkite-build-events": "vllm.vllm_buildkite_builds", "vllm-buildkite-job-events": "vllm.vllm_buildkite_jobs", + "torchci-oot-workflow-job": "default.oot_workflow_job", } diff --git a/clickhouse_db_schema/default.oot_workflow_job/schema.sql b/clickhouse_db_schema/default.oot_workflow_job/schema.sql new file mode 100644 index 0000000000..08ae31a1f3 --- /dev/null +++ b/clickhouse_db_schema/default.oot_workflow_job/schema.sql @@ -0,0 +1,36 @@ +CREATE TABLE default.oot_workflow_job +( + `dynamoKey` String, + `status` String, + `downstream_repo` String COMMENT 'Downstream repo org/name, from trusted.verified_repo', + `upstream_repo` String COMMENT 'Upstream repo, typically pytorch/pytorch', + `pr_number` UInt64 COMMENT 'PyTorch PR number', + `pytorch_head_sha` String COMMENT 'PyTorch PR commit SHA', + `delivery_id` String COMMENT 'GitHub webhook delivery ID from L1 dispatch', + `workflow_run_url` String COMMENT 'Link to downstream GHA workflow run', + `workflow_name` String COMMENT 'Downstream workflow name', + `conclusion` String COMMENT 'success, failure, cancelled, timed_out (set on completed)', + `queue_time` Nullable(Float64) COMMENT 'Relay-measured dispatch-to-in_progress time in seconds', + `execution_time` Nullable(Float64) COMMENT 'Relay-measured in_progress-to-completed time in seconds', + `started_at` DateTime64(9) COMMENT 'ISO 8601 timestamp when record was created', + `completed_at` DateTime64(9) COMMENT 'ISO 8601 timestamp when job completed', + `total_tests` UInt64 DEFAULT 0, + `passed_tests` UInt64 DEFAULT 0, + `failed_tests` UInt64 DEFAULT 0, + `skipped_tests` UInt64 DEFAULT 0, + `failed_tests_json` String DEFAULT '' COMMENT 'JSON array of failed/errored test details', + `artifact_url` String DEFAULT '' COMMENT 'URL to downstream-hosted artifacts (logs, reports)', + `environment` String DEFAULT '' COMMENT 'JSON: {"cuda": "12.8", "device": "H100", ...}', + `downstream_repo_level` String DEFAULT '' COMMENT 'Relay level at dispatch time: L2, L3, L4', + `_inserted_at` DateTime MATERIALIZED now(), + `repository_full_name` String ALIAS downstream_repo COMMENT 'Alias for consistency with workflow_job queries', + `duration_seconds` Float64 ALIAS if(completed_at = toDateTime64(0, 9), 0, dateDiff(second, started_at, completed_at)), + INDEX status_index status TYPE bloom_filter GRANULARITY 1, + INDEX started_at_index started_at TYPE minmax GRANULARITY 1, + INDEX completed_at_index completed_at TYPE minmax GRANULARITY 1, + INDEX pr_number_index pr_number TYPE bloom_filter GRANULARITY 1, + INDEX downstream_repo_index downstream_repo TYPE bloom_filter GRANULARITY 1 +) +ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') +ORDER BY (downstream_repo, delivery_id, dynamoKey) +SETTINGS index_granularity = 8192 diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/params.json b/torchci/clickhouse_queries/oot_backend_dashboard/params.json new file mode 100644 index 0000000000..9354a8e528 --- /dev/null +++ b/torchci/clickhouse_queries/oot_backend_dashboard/params.json @@ -0,0 +1,12 @@ +{ + "params": { + "repo": "String", + "days": "UInt64" + }, + "tests": [ + { + "repo": "intel/torch-xpu-ops", + "days": "7" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql new file mode 100644 index 0000000000..4612315354 --- /dev/null +++ b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql @@ -0,0 +1,25 @@ +SELECT + pr_number, + pytorch_head_sha, + workflow_name AS job_name, + status, + conclusion, + started_at, + completed_at, + duration_seconds, + total_tests, + passed_tests, + failed_tests, + skipped_tests, + workflow_run_url, + artifact_url, + queue_time, + execution_time +FROM + default.oot_workflow_job FINAL +WHERE + downstream_repo = {repo: String} + AND started_at > now() - INTERVAL {days: UInt64} DAY +ORDER BY + started_at DESC +LIMIT 500 diff --git a/torchci/clickhouse_queries/oot_pr_results/params.json b/torchci/clickhouse_queries/oot_pr_results/params.json new file mode 100644 index 0000000000..532f1d52f9 --- /dev/null +++ b/torchci/clickhouse_queries/oot_pr_results/params.json @@ -0,0 +1,10 @@ +{ + "params": { + "pr": "UInt64" + }, + "tests": [ + { + "pr": "179565" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_pr_results/query.sql b/torchci/clickhouse_queries/oot_pr_results/query.sql new file mode 100644 index 0000000000..a1da9f009f --- /dev/null +++ b/torchci/clickhouse_queries/oot_pr_results/query.sql @@ -0,0 +1,17 @@ +SELECT + downstream_repo, + workflow_name AS job_name, + status, + conclusion, + duration_seconds, + workflow_run_url, + artifact_url, + started_at, + queue_time, + execution_time +FROM + default.oot_workflow_job FINAL +WHERE + pr_number = {pr: UInt64} +ORDER BY + downstream_repo, started_at DESC diff --git a/torchci/clickhouse_queries/oot_summary/params.json b/torchci/clickhouse_queries/oot_summary/params.json new file mode 100644 index 0000000000..108eccb7e9 --- /dev/null +++ b/torchci/clickhouse_queries/oot_summary/params.json @@ -0,0 +1,10 @@ +{ + "params": { + "days": "UInt64" + }, + "tests": [ + { + "days": "7" + } + ] +} diff --git a/torchci/clickhouse_queries/oot_summary/query.sql b/torchci/clickhouse_queries/oot_summary/query.sql new file mode 100644 index 0000000000..f9ee865f22 --- /dev/null +++ b/torchci/clickhouse_queries/oot_summary/query.sql @@ -0,0 +1,17 @@ +SELECT + downstream_repo AS repo, + countIf(conclusion = 'success') AS successes, + countIf(conclusion = 'failure') AS failures, + count() AS total, + if(total > 0, successes / total, 0) AS pass_rate, + avg(duration_seconds) AS avg_duration_s, + max(started_at) AS last_run +FROM + default.oot_workflow_job FINAL +WHERE + started_at > now() - INTERVAL {days: UInt64} DAY + AND status = 'completed' +GROUP BY + repo +ORDER BY + pass_rate ASC diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx new file mode 100644 index 0000000000..8091ae5c32 --- /dev/null +++ b/torchci/components/oot/OotPrSection.tsx @@ -0,0 +1,170 @@ +import { + Accordion, + AccordionDetails, + AccordionSummary, + Chip, + Link, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from "@mui/material"; +import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import useSWR from "swr"; + +interface OotPrResult { + downstream_repo: string; + job_name: string; + status: string; + conclusion: string; + duration_seconds: number; + workflow_run_url: string; + artifact_url: string; + started_at: string; + queue_time: number | null; + execution_time: number | null; +} + +function conclusionColor( + status: string, + conclusion: string +): "success" | "error" | "warning" | "info" | "default" { + if (status === "in_progress") return "info"; + switch (conclusion) { + case "success": + return "success"; + case "failure": + return "error"; + case "cancelled": + case "timed_out": + return "warning"; + default: + return "default"; + } +} + +function conclusionLabel(status: string, conclusion: string): string { + if (status === "in_progress") return "running"; + return conclusion || status; +} + +export default function OotPrSection({ prNumber }: { prNumber: number }) { + const url = `/api/clickhouse/oot_pr_results?parameters=${encodeURIComponent( + JSON.stringify({ pr: String(prNumber) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + if (error || !data || data.length === 0) return null; + + const successCount = data.filter( + (r) => r.status === "completed" && r.conclusion === "success" + ).length; + const totalCompleted = data.filter( + (r) => r.status === "completed" + ).length; + const inProgress = data.filter( + (r) => r.status === "in_progress" + ).length; + + const summaryText = [ + totalCompleted > 0 + ? `${successCount}/${totalCompleted} passed` + : null, + inProgress > 0 ? `${inProgress} running` : null, + ] + .filter(Boolean) + .join(", "); + + return ( + + }> + + + Out-of-Tree Backends + + + ({summaryText}) + + + + + + + + + + Backend + + + Job + + + Status + + + Duration + + + Links + + + + + {data.map((row, i) => ( + + {row.downstream_repo} + {row.job_name} + + + + + {row.duration_seconds + ? durationDisplay(Math.round(row.duration_seconds)) + : "–"} + + + + {row.workflow_run_url && ( + + Run + + )} + {row.artifact_url && ( + + Artifacts + + )} + + + + ))} + +
+
+
+
+ ); +} diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts new file mode 100644 index 0000000000..2861603300 --- /dev/null +++ b/torchci/lib/oot/ootUtils.ts @@ -0,0 +1,217 @@ +import { getDynamoClient } from "lib/dynamo"; + +const OOT_TABLE = "torchci-oot-workflow-job"; +const MAX_PAYLOAD_BYTES = 2 * 1024 * 1024; // 2MB +const DAILY_BUDGET_PER_REPO = 1000; + +// ---- Types ---- + +export interface RelayTrusted { + verified_repo: string; + ci_metrics?: { + queue_time?: number | null; + execution_time?: number | null; + }; +} + +export interface RelayWorkflow { + status: string; + conclusion?: string | null; + name: string; + url: string; + test_results?: any; +} + +export interface RelayCallbackPayload { + event_type: string; + delivery_id: string; + payload: { + pull_request?: { number: number; head?: { sha: string } }; + repository?: { full_name: string }; + [key: string]: any; + }; + workflow: RelayWorkflow; +} + +export interface RelayUntrusted { + callback_payload: RelayCallbackPayload; +} + +export interface RelayPayload { + trusted: RelayTrusted; + untrusted: RelayUntrusted; +} + +export interface OotWorkflowJobRecord { + dynamoKey: string; + status: string; + downstream_repo: string; + upstream_repo: string; + pr_number: number; + pytorch_head_sha: string; + delivery_id: string; + workflow_run_url: string; + workflow_name: string; + conclusion?: string; + queue_time?: number | null; + execution_time?: number | null; + started_at: string; + completed_at?: string; + total_tests?: number; + passed_tests?: number; + failed_tests?: number; + skipped_tests?: number; + failed_tests_json?: string; + artifact_url?: string; + environment?: string; +} + +// ---- Validation ---- + +export function validatePayloadSize(bodyString: string): void { + if (Buffer.byteLength(bodyString, "utf-8") > MAX_PAYLOAD_BYTES) { + throw new ApiError(400, "Payload exceeds 2MB limit"); + } +} + +export function validateRelayPayload(body: any): RelayPayload { + if (!body?.trusted?.verified_repo) { + throw new ApiError(400, "Missing trusted.verified_repo"); + } + const cb = body?.untrusted?.callback_payload; + if (!cb) { + throw new ApiError(400, "Missing untrusted.callback_payload"); + } + if (!cb.delivery_id) { + throw new ApiError(400, "Missing delivery_id"); + } + if (!cb.workflow?.status) { + throw new ApiError(400, "Missing workflow.status"); + } + if (!cb.workflow?.name) { + throw new ApiError(400, "Missing workflow.name"); + } + if ( + cb.workflow.status !== "in_progress" && + cb.workflow.status !== "completed" + ) { + throw new ApiError( + 400, + `Invalid workflow.status: ${cb.workflow.status}. Must be "in_progress" or "completed".` + ); + } + if ( + cb.workflow.status === "completed" && + !cb.workflow.conclusion + ) { + throw new ApiError( + 400, + "workflow.conclusion is required when status is completed" + ); + } + return body as RelayPayload; +} + +// ---- Extraction ---- + +export function extractDynamoRecord( + payload: RelayPayload +): OotWorkflowJobRecord { + const { trusted, untrusted } = payload; + const cb = untrusted.callback_payload; + const wf = cb.workflow; + const pr = cb.payload?.pull_request; + const upstreamRepo = + cb.payload?.repository?.full_name ?? "pytorch/pytorch"; + + const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}`; + const now = new Date().toISOString(); + + const record: OotWorkflowJobRecord = { + dynamoKey, + status: wf.status, + downstream_repo: trusted.verified_repo, + upstream_repo: upstreamRepo, + pr_number: pr?.number ?? 0, + pytorch_head_sha: pr?.head?.sha ?? "", + delivery_id: cb.delivery_id, + workflow_run_url: wf.url ?? "", + workflow_name: wf.name, + queue_time: trusted.ci_metrics?.queue_time, + execution_time: trusted.ci_metrics?.execution_time, + started_at: now, + }; + + if (wf.status === "completed") { + record.conclusion = wf.conclusion ?? undefined; + record.completed_at = now; + + if (wf.test_results) { + const tr = wf.test_results; + if (typeof tr.total === "number") record.total_tests = tr.total; + if (typeof tr.passed === "number") record.passed_tests = tr.passed; + if (typeof tr.failed === "number") record.failed_tests = tr.failed; + if (typeof tr.skipped === "number") record.skipped_tests = tr.skipped; + if (tr.failures) { + record.failed_tests_json = JSON.stringify(tr.failures); + } + } + + if (typeof cb.workflow.url === "string" && cb.workflow.url) { + record.artifact_url = cb.workflow.url; + } + } + + return record; +} + +// ---- Daily Budget ---- + +export async function checkDailyBudget(repo: string): Promise { + const client = getDynamoClient(); + const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD + const keyPrefix = `${repo}/`; + + // Use a scan with a filter to count today's records for this repo. + // In production, a GSI on (downstream_repo, started_at) would be more efficient. + const result = await client.query({ + TableName: OOT_TABLE, + KeyConditionExpression: + "begins_with(dynamoKey, :prefix)", + FilterExpression: "begins_with(started_at, :today)", + ExpressionAttributeValues: { + ":prefix": keyPrefix, + ":today": today, + }, + Select: "COUNT", + }); + + if ((result.Count ?? 0) >= DAILY_BUDGET_PER_REPO) { + throw new ApiError( + 429, + `Daily budget exceeded for ${repo} (${DAILY_BUDGET_PER_REPO} callbacks/day)` + ); + } +} + +// ---- DynamoDB Write ---- + +export async function writeToDynamo( + record: OotWorkflowJobRecord +): Promise { + const client = getDynamoClient(); + await client.put({ + TableName: OOT_TABLE, + Item: record, + }); +} + +// ---- Error Helper ---- + +export class ApiError extends Error { + statusCode: number; + constructor(statusCode: number, message: string) { + super(message); + this.statusCode = statusCode; + } +} diff --git a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx index b1830bcd15..3d9cecd7fd 100644 --- a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx +++ b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx @@ -3,6 +3,7 @@ import { CommitInfo } from "components/commit/CommitInfo"; import DrCIButton from "components/common/DrCIButton"; import ErrorBoundary from "components/common/ErrorBoundary"; import { useSetTitle } from "components/layout/DynamicTitle"; +import OotPrSection from "components/oot/OotPrSection"; import { fetcher } from "lib/GeneralUtils"; import { PRData } from "lib/types"; import { useRouter } from "next/router"; @@ -122,6 +123,11 @@ function Page() { /> )} + + {prNumber && ( + + )} + ); } diff --git a/torchci/pages/api/oot/results.ts b/torchci/pages/api/oot/results.ts new file mode 100644 index 0000000000..51ac242164 --- /dev/null +++ b/torchci/pages/api/oot/results.ts @@ -0,0 +1,63 @@ +import type { NextApiRequest, NextApiResponse } from "next"; +import { checkAuthWithApiToken } from "lib/auth/auth"; +import { + ApiError, + validatePayloadSize, + validateRelayPayload, + extractDynamoRecord, + checkDailyBudget, + writeToDynamo, +} from "lib/oot/ootUtils"; + +export const config = { + api: { + bodyParser: { + sizeLimit: "2mb", + }, + }, +}; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method !== "POST") { + return res.status(405).json({ error: "Method not allowed" }); + } + + try { + // 1. Auth: x-hud-internal-bot header or session + const auth = await checkAuthWithApiToken(req, res); + if (!auth.ok) { + return res.status(401).json({ error: "Unauthorized" }); + } + + // 2. Payload size check + const rawBody = + typeof req.body === "string" ? req.body : JSON.stringify(req.body); + validatePayloadSize(rawBody); + + // 3. Schema validation + const body = typeof req.body === "string" ? JSON.parse(req.body) : req.body; + const payload = validateRelayPayload(body); + + // 4. Daily budget check + await checkDailyBudget(payload.trusted.verified_repo); + + // 5. Extract and write to DynamoDB + const record = extractDynamoRecord(payload); + await writeToDynamo(record); + + return res.status(200).json({ + ok: true, + status: record.status, + dynamoKey: record.dynamoKey, + }); + } catch (err: any) { + if (err instanceof ApiError) { + return res.status(err.statusCode).json({ error: err.message }); + } + console.error("OOT results handler error:", err); + return res.status(502).json({ error: "Internal error writing to DynamoDB" }); + } +} diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx new file mode 100644 index 0000000000..1feab6354f --- /dev/null +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -0,0 +1,307 @@ +import { + Box, + Chip, + FormControl, + InputLabel, + Link, + MenuItem, + Paper, + Select, + SelectChangeEvent, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Tooltip, + Typography, +} from "@mui/material"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import Head from "next/head"; +import NextLink from "next/link"; +import { useRouter } from "next/router"; +import { useMemo, useState } from "react"; +import useSWR from "swr"; + +interface OotJobRow { + pr_number: number; + pytorch_head_sha: string; + job_name: string; + status: string; + conclusion: string; + started_at: string; + completed_at: string; + duration_seconds: number; + total_tests: number; + passed_tests: number; + failed_tests: number; + skipped_tests: number; + workflow_run_url: string; + artifact_url: string; + queue_time: number | null; + execution_time: number | null; +} + +function conclusionColor( + status: string, + conclusion: string +): "success" | "error" | "warning" | "info" | "default" { + if (status === "in_progress") return "info"; + switch (conclusion) { + case "success": + return "success"; + case "failure": + return "error"; + case "cancelled": + case "timed_out": + return "warning"; + default: + return "default"; + } +} + +function conclusionLabel(status: string, conclusion: string): string { + if (status === "in_progress") return "running"; + return conclusion || status; +} + +function JobChip({ + job, +}: { + job: OotJobRow; +}) { + const color = conclusionColor(job.status, job.conclusion); + const label = conclusionLabel(job.status, job.conclusion); + const tooltipContent = [ + `Job: ${job.job_name}`, + `Duration: ${job.duration_seconds ? durationDisplay(Math.round(job.duration_seconds)) : "–"}`, + job.total_tests ? `Tests: ${job.passed_tests}/${job.total_tests} passed` : null, + job.queue_time != null ? `Queue: ${job.queue_time.toFixed(1)}s` : null, + ] + .filter(Boolean) + .join("\n"); + + return ( + {tooltipContent}}> + + + ); +} + +interface MatrixRow { + prNumber: number; + sha: string; + jobs: Map; +} + +function buildMatrix(data: OotJobRow[]): { + jobNames: string[]; + rows: MatrixRow[]; +} { + const jobNamesSet = new Set(); + const prMap = new Map(); + + for (const job of data) { + jobNamesSet.add(job.job_name); + let row = prMap.get(job.pr_number); + if (!row) { + row = { + prNumber: job.pr_number, + sha: job.pytorch_head_sha, + jobs: new Map(), + }; + prMap.set(job.pr_number, row); + } + // Keep the latest result per job name + const existing = row.jobs.get(job.job_name); + if (!existing || job.started_at > existing.started_at) { + row.jobs.set(job.job_name, job); + } + } + + const jobNames = Array.from(jobNamesSet).sort(); + const rows = Array.from(prMap.values()).sort( + (a, b) => b.prNumber - a.prNumber + ); + return { jobNames, rows }; +} + +function HealthSummary({ data }: { data: OotJobRow[] }) { + const completed = data.filter((j) => j.status === "completed"); + const total = completed.length; + const success = completed.filter((j) => j.conclusion === "success").length; + const rate = total > 0 ? success / total : 0; + + return ( + + = 0.95 ? "success" : rate >= 0.8 ? "warning" : "error"} + /> + + {success}/{total} jobs passed + + + ); +} + +function OotMatrix({ + repoFullName, + days, +}: { + repoFullName: string; + days: number; +}) { + const url = `/api/clickhouse/oot_backend_dashboard?parameters=${encodeURIComponent( + JSON.stringify({ repo: repoFullName, days: String(days) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + const matrix = useMemo(() => (data ? buildMatrix(data) : null), [data]); + + if (error) { + return ( + + Failed to load dashboard: {error.message} + + ); + } + if (!data || !matrix) { + return ; + } + if (data.length === 0) { + return ( + + No results for {repoFullName} in the last {days} days. + + ); + } + + return ( + <> + + + + + + + PR + + + SHA + + {matrix.jobNames.map((name) => ( + + {name} + + ))} + + + + {matrix.rows.map((row) => ( + + + + + #{row.prNumber} + + + + + + {row.sha.slice(0, 7)} + + + {matrix.jobNames.map((name) => { + const job = row.jobs.get(name); + return ( + + {job ? : "–"} + + ); + })} + + ))} + +
+
+ + ); +} + +export default function OotBackendPage() { + const router = useRouter(); + const { org, repo } = router.query; + const [days, setDays] = useState(7); + + if (!org || !repo) return null; + + const repoFullName = `${org}/${repo}`; + + return ( + <> + + + {repoFullName} — OOT CI | PyTorch HUD + + + + + + {repoFullName} + + + ← Back to OOT Summary + + + + + Time Range + + + + + + Rows = PyTorch PRs, columns = downstream CI jobs. Click a chip to + open the workflow run. + + + + + + ); +} diff --git a/torchci/pages/oot/index.tsx b/torchci/pages/oot/index.tsx new file mode 100644 index 0000000000..6727e05111 --- /dev/null +++ b/torchci/pages/oot/index.tsx @@ -0,0 +1,170 @@ +import { + Box, + Chip, + FormControl, + InputLabel, + Link, + MenuItem, + Paper, + Select, + SelectChangeEvent, + Skeleton, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from "@mui/material"; +import { durationDisplay } from "components/common/TimeUtils"; +import { fetcher } from "lib/GeneralUtils"; +import Head from "next/head"; +import NextLink from "next/link"; +import { useState } from "react"; +import useSWR from "swr"; + +interface OotSummaryRow { + repo: string; + successes: number; + failures: number; + total: number; + pass_rate: number; + avg_duration_s: number; + last_run: string; +} + +function PassRateChip({ rate }: { rate: number }) { + const pct = (rate * 100).toFixed(1) + "%"; + if (rate >= 0.95) return ; + if (rate >= 0.8) return ; + return ; +} + +function OotSummaryTable({ days }: { days: number }) { + const url = `/api/clickhouse/oot_summary?parameters=${encodeURIComponent( + JSON.stringify({ days: String(days) }) + )}`; + const { data, error } = useSWR(url, fetcher, { + refreshInterval: 60_000, + }); + + if (error) { + return ( + + Failed to load OOT summary: {error.message} + + ); + } + if (!data) { + return ; + } + if (data.length === 0) { + return ( + + No OOT CI results in the last {days} days. + + ); + } + + return ( + + + + + + Backend Repository + + + Pass Rate + + + Success + + + Failures + + + Total + + + Avg Duration + + + Last Run + + + + + {data.map((row) => { + const [org, repo] = row.repo.split("/"); + return ( + + + + {row.repo} + + + + + + {row.successes} + {row.failures} + {row.total} + + {durationDisplay(Math.round(row.avg_duration_s))} + + + {new Date(row.last_run).toLocaleString()} + + + ); + })} + +
+
+ ); +} + +export default function OotSummaryPage() { + const [days, setDays] = useState(7); + + return ( + <> + + Out-of-Tree CI Summary | PyTorch HUD + + + + Out-of-Tree CI Summary + + Time Range + + + + + + Cross-repo CI health overview. Repos sorted by pass rate (worst + first). Click a row to see the per-backend dashboard. + + + + + + ); +} From d952d48698f3fef852126c95ddb352f49e7fb21a Mon Sep 17 00:00:00 2001 From: Subin George Date: Mon, 4 May 2026 15:57:38 +0530 Subject: [PATCH 02/15] Sync reference implementation with RFC review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address @ZainRizvi's review on pytorch/rfcs#96: - Auth: X-Hud-Internal-Bot → dedicated X-OOT-Relay-Token header - Validation: removed schema validation from HUD (moved to relay) - Removed daily budget enforcement - DynamoDB: PutItem → UpdateItem to prevent null clobbering - DynamoKey: expanded to {repo}/{delivery_id}/{workflow_name}/{job_name}/{run_attempt} - Timestamps: use downstream-reported started_at/completed_at instead of now() - Timing metrics: only set queue_time/execution_time when non-null - ClickHouse schema: added job_name, run_attempt columns - Queries: select job_name, run_attempt as proper columns - Frontend: updated interfaces to include new fields --- .../default.oot_workflow_job/schema.sql | 8 +- .../oot_backend_dashboard/query.sql | 4 +- .../oot_pr_results/query.sql | 4 +- torchci/components/oot/OotPrSection.tsx | 2 + torchci/lib/oot/ootUtils.ts | 138 +++++++----------- torchci/pages/api/oot/results.ts | 22 +-- torchci/pages/oot/[org]/[repo].tsx | 3 + 7 files changed, 78 insertions(+), 103 deletions(-) diff --git a/clickhouse_db_schema/default.oot_workflow_job/schema.sql b/clickhouse_db_schema/default.oot_workflow_job/schema.sql index 08ae31a1f3..5426ba9bcb 100644 --- a/clickhouse_db_schema/default.oot_workflow_job/schema.sql +++ b/clickhouse_db_schema/default.oot_workflow_job/schema.sql @@ -8,12 +8,14 @@ CREATE TABLE default.oot_workflow_job `pytorch_head_sha` String COMMENT 'PyTorch PR commit SHA', `delivery_id` String COMMENT 'GitHub webhook delivery ID from L1 dispatch', `workflow_run_url` String COMMENT 'Link to downstream GHA workflow run', - `workflow_name` String COMMENT 'Downstream workflow name', + `workflow_name` String COMMENT 'Downstream workflow name (github.workflow)', + `job_name` String DEFAULT '' COMMENT 'Downstream job name (github.job)', + `run_attempt` UInt32 DEFAULT 1 COMMENT 'Workflow run attempt number (github.run_attempt)', `conclusion` String COMMENT 'success, failure, cancelled, timed_out (set on completed)', `queue_time` Nullable(Float64) COMMENT 'Relay-measured dispatch-to-in_progress time in seconds', `execution_time` Nullable(Float64) COMMENT 'Relay-measured in_progress-to-completed time in seconds', - `started_at` DateTime64(9) COMMENT 'ISO 8601 timestamp when record was created', - `completed_at` DateTime64(9) COMMENT 'ISO 8601 timestamp when job completed', + `started_at` DateTime64(9) COMMENT 'Downstream-reported timestamp when job started', + `completed_at` DateTime64(9) COMMENT 'Downstream-reported timestamp when job completed', `total_tests` UInt64 DEFAULT 0, `passed_tests` UInt64 DEFAULT 0, `failed_tests` UInt64 DEFAULT 0, diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql index 4612315354..e89ac7fcc1 100644 --- a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql +++ b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql @@ -1,7 +1,9 @@ SELECT pr_number, pytorch_head_sha, - workflow_name AS job_name, + workflow_name, + job_name, + run_attempt, status, conclusion, started_at, diff --git a/torchci/clickhouse_queries/oot_pr_results/query.sql b/torchci/clickhouse_queries/oot_pr_results/query.sql index a1da9f009f..6c856c3b3d 100644 --- a/torchci/clickhouse_queries/oot_pr_results/query.sql +++ b/torchci/clickhouse_queries/oot_pr_results/query.sql @@ -1,6 +1,8 @@ SELECT downstream_repo, - workflow_name AS job_name, + workflow_name, + job_name, + run_attempt, status, conclusion, duration_seconds, diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx index 8091ae5c32..e4df1076a0 100644 --- a/torchci/components/oot/OotPrSection.tsx +++ b/torchci/components/oot/OotPrSection.tsx @@ -21,7 +21,9 @@ import useSWR from "swr"; interface OotPrResult { downstream_repo: string; + workflow_name: string; job_name: string; + run_attempt: number; status: string; conclusion: string; duration_seconds: number; diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index 2861603300..2b4ff2b117 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -2,7 +2,6 @@ import { getDynamoClient } from "lib/dynamo"; const OOT_TABLE = "torchci-oot-workflow-job"; const MAX_PAYLOAD_BYTES = 2 * 1024 * 1024; // 2MB -const DAILY_BUDGET_PER_REPO = 1000; // ---- Types ---- @@ -19,6 +18,10 @@ export interface RelayWorkflow { conclusion?: string | null; name: string; url: string; + job_name?: string; + run_attempt?: number; + started_at?: string; + completed_at?: string; test_results?: any; } @@ -52,10 +55,12 @@ export interface OotWorkflowJobRecord { delivery_id: string; workflow_run_url: string; workflow_name: string; + job_name: string; + run_attempt: number; conclusion?: string; queue_time?: number | null; execution_time?: number | null; - started_at: string; + started_at?: string; completed_at?: string; total_tests?: number; passed_tests?: number; @@ -70,48 +75,10 @@ export interface OotWorkflowJobRecord { export function validatePayloadSize(bodyString: string): void { if (Buffer.byteLength(bodyString, "utf-8") > MAX_PAYLOAD_BYTES) { - throw new ApiError(400, "Payload exceeds 2MB limit"); + throw new ApiError(413, "Payload exceeds 2MB limit"); } } -export function validateRelayPayload(body: any): RelayPayload { - if (!body?.trusted?.verified_repo) { - throw new ApiError(400, "Missing trusted.verified_repo"); - } - const cb = body?.untrusted?.callback_payload; - if (!cb) { - throw new ApiError(400, "Missing untrusted.callback_payload"); - } - if (!cb.delivery_id) { - throw new ApiError(400, "Missing delivery_id"); - } - if (!cb.workflow?.status) { - throw new ApiError(400, "Missing workflow.status"); - } - if (!cb.workflow?.name) { - throw new ApiError(400, "Missing workflow.name"); - } - if ( - cb.workflow.status !== "in_progress" && - cb.workflow.status !== "completed" - ) { - throw new ApiError( - 400, - `Invalid workflow.status: ${cb.workflow.status}. Must be "in_progress" or "completed".` - ); - } - if ( - cb.workflow.status === "completed" && - !cb.workflow.conclusion - ) { - throw new ApiError( - 400, - "workflow.conclusion is required when status is completed" - ); - } - return body as RelayPayload; -} - // ---- Extraction ---- export function extractDynamoRecord( @@ -124,8 +91,9 @@ export function extractDynamoRecord( const upstreamRepo = cb.payload?.repository?.full_name ?? "pytorch/pytorch"; - const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}`; - const now = new Date().toISOString(); + const jobName = wf.job_name ?? "default"; + const runAttempt = wf.run_attempt ?? 1; + const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}/${jobName}/${runAttempt}`; const record: OotWorkflowJobRecord = { dynamoKey, @@ -137,14 +105,31 @@ export function extractDynamoRecord( delivery_id: cb.delivery_id, workflow_run_url: wf.url ?? "", workflow_name: wf.name, - queue_time: trusted.ci_metrics?.queue_time, - execution_time: trusted.ci_metrics?.execution_time, - started_at: now, + job_name: jobName, + run_attempt: runAttempt, }; + // Only set timing metrics when the relay provides a non-null value. + // in_progress sets queue_time; completed sets execution_time. + // Using UpdateItem ensures the completed callback doesn't clobber + // queue_time with null. + if (trusted.ci_metrics?.queue_time != null) { + record.queue_time = trusted.ci_metrics.queue_time; + } + if (trusted.ci_metrics?.execution_time != null) { + record.execution_time = trusted.ci_metrics.execution_time; + } + + // Use downstream-reported timestamps, not HUD wall-clock time + if (wf.started_at) { + record.started_at = wf.started_at; + } + if (wf.status === "completed") { record.conclusion = wf.conclusion ?? undefined; - record.completed_at = now; + if (wf.completed_at) { + record.completed_at = wf.completed_at; + } if (wf.test_results) { const tr = wf.test_results; @@ -156,53 +141,40 @@ export function extractDynamoRecord( record.failed_tests_json = JSON.stringify(tr.failures); } } - - if (typeof cb.workflow.url === "string" && cb.workflow.url) { - record.artifact_url = cb.workflow.url; - } } return record; } -// ---- Daily Budget ---- - -export async function checkDailyBudget(repo: string): Promise { - const client = getDynamoClient(); - const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD - const keyPrefix = `${repo}/`; - - // Use a scan with a filter to count today's records for this repo. - // In production, a GSI on (downstream_repo, started_at) would be more efficient. - const result = await client.query({ - TableName: OOT_TABLE, - KeyConditionExpression: - "begins_with(dynamoKey, :prefix)", - FilterExpression: "begins_with(started_at, :today)", - ExpressionAttributeValues: { - ":prefix": keyPrefix, - ":today": today, - }, - Select: "COUNT", - }); - - if ((result.Count ?? 0) >= DAILY_BUDGET_PER_REPO) { - throw new ApiError( - 429, - `Daily budget exceeded for ${repo} (${DAILY_BUDGET_PER_REPO} callbacks/day)` - ); - } -} - -// ---- DynamoDB Write ---- +// ---- DynamoDB Write (UpdateItem) ---- export async function writeToDynamo( record: OotWorkflowJobRecord ): Promise { const client = getDynamoClient(); - await client.put({ + + // Build SET expression dynamically — only set non-undefined fields. + // This prevents completed callbacks from clobbering in_progress-only + // fields (queue_time, started_at) with null. + const expressionParts: string[] = []; + const expressionValues: Record = {}; + const expressionNames: Record = {}; + + for (const [key, value] of Object.entries(record)) { + if (key === "dynamoKey" || value === undefined) continue; + const placeholder = `:v_${key}`; + const nameAlias = `#n_${key}`; + expressionParts.push(`${nameAlias} = ${placeholder}`); + expressionValues[placeholder] = value; + expressionNames[nameAlias] = key; + } + + await client.update({ TableName: OOT_TABLE, - Item: record, + Key: { dynamoKey: record.dynamoKey }, + UpdateExpression: `SET ${expressionParts.join(", ")}`, + ExpressionAttributeValues: expressionValues, + ExpressionAttributeNames: expressionNames, }); } diff --git a/torchci/pages/api/oot/results.ts b/torchci/pages/api/oot/results.ts index 51ac242164..2e469d95ca 100644 --- a/torchci/pages/api/oot/results.ts +++ b/torchci/pages/api/oot/results.ts @@ -1,11 +1,8 @@ import type { NextApiRequest, NextApiResponse } from "next"; -import { checkAuthWithApiToken } from "lib/auth/auth"; import { ApiError, validatePayloadSize, - validateRelayPayload, extractDynamoRecord, - checkDailyBudget, writeToDynamo, } from "lib/oot/ootUtils"; @@ -26,26 +23,21 @@ export default async function handler( } try { - // 1. Auth: x-hud-internal-bot header or session - const auth = await checkAuthWithApiToken(req, res); - if (!auth.ok) { + // 1. Auth: dedicated X-OOT-Relay-Token header + const relayToken = req.headers["x-oot-relay-token"]; + if (!relayToken || relayToken !== process.env.OOT_RELAY_TOKEN) { return res.status(401).json({ error: "Unauthorized" }); } - // 2. Payload size check + // 2. Payload size cap (safety net — relay should also enforce this) const rawBody = typeof req.body === "string" ? req.body : JSON.stringify(req.body); validatePayloadSize(rawBody); - // 3. Schema validation + // 3. Extract and write to DynamoDB via UpdateItem + // Schema validation is done by the relay before forwarding. const body = typeof req.body === "string" ? JSON.parse(req.body) : req.body; - const payload = validateRelayPayload(body); - - // 4. Daily budget check - await checkDailyBudget(payload.trusted.verified_repo); - - // 5. Extract and write to DynamoDB - const record = extractDynamoRecord(payload); + const record = extractDynamoRecord(body); await writeToDynamo(record); return res.status(200).json({ diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx index 1feab6354f..888499bcfa 100644 --- a/torchci/pages/oot/[org]/[repo].tsx +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -30,7 +30,9 @@ import useSWR from "swr"; interface OotJobRow { pr_number: number; pytorch_head_sha: string; + workflow_name: string; job_name: string; + run_attempt: number; status: string; conclusion: string; started_at: string; @@ -78,6 +80,7 @@ function JobChip({ const label = conclusionLabel(job.status, job.conclusion); const tooltipContent = [ `Job: ${job.job_name}`, + job.run_attempt > 1 ? `Attempt: ${job.run_attempt}` : null, `Duration: ${job.duration_seconds ? durationDisplay(Math.round(job.duration_seconds)) : "–"}`, job.total_tests ? `Tests: ${job.passed_tests}/${job.total_tests} passed` : null, job.queue_time != null ? `Queue: ${job.queue_time.toFixed(1)}s` : null, From 5a81a1c284912602eb0a33de646efe95a3f10a05 Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 12 May 2026 15:36:31 +0530 Subject: [PATCH 03/15] Source downstream_repo_level from trusted payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The L2 relay now includes downstream_repo_level (L1–L4) in the trusted dict, determined from the allowlist rather than self-reported by downstream. Updated RelayTrusted, OotWorkflowJobRecord, and extractDynamoRecord to read it from trusted. --- torchci/lib/oot/ootUtils.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index 2b4ff2b117..93d9bca4f0 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -7,6 +7,7 @@ const MAX_PAYLOAD_BYTES = 2 * 1024 * 1024; // 2MB export interface RelayTrusted { verified_repo: string; + downstream_repo_level?: string; // "L1" | "L2" | "L3" | "L4" — relay-determined from allowlist ci_metrics?: { queue_time?: number | null; execution_time?: number | null; @@ -67,6 +68,7 @@ export interface OotWorkflowJobRecord { failed_tests?: number; skipped_tests?: number; failed_tests_json?: string; + downstream_repo_level?: string; artifact_url?: string; environment?: string; } @@ -109,6 +111,10 @@ export function extractDynamoRecord( run_attempt: runAttempt, }; + if (trusted.downstream_repo_level) { + record.downstream_repo_level = trusted.downstream_repo_level; + } + // Only set timing metrics when the relay provides a non-null value. // in_progress sets queue_time; completed sets execution_time. // Using UpdateItem ensures the completed callback doesn't clobber From d382149da83567a3261308cb2b2d7b428541364c Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 12 May 2026 15:50:58 +0530 Subject: [PATCH 04/15] Add check_run_id, run_id, schema_version and fix test-results key Align with updated L2 workflow dict: add schema_version, check_run_id, and run_id to RelayWorkflow, OotWorkflowJobRecord, ClickHouse schema, queries, and frontend interfaces. Switch dynamoKey from run_attempt to check_run_id for per-execution uniqueness. Fix test-results key to use the hyphenated form matching the L2 action. Update buildMatrix to prefer highest run_attempt for deduplication. --- .../default.oot_workflow_job/schema.sql | 2 ++ .../oot_backend_dashboard/query.sql | 2 ++ .../clickhouse_queries/oot_pr_results/query.sql | 2 ++ torchci/components/oot/OotPrSection.tsx | 2 ++ torchci/lib/oot/ootUtils.ts | 17 +++++++++++++---- torchci/pages/oot/[org]/[repo].tsx | 8 ++++++-- 6 files changed, 27 insertions(+), 6 deletions(-) diff --git a/clickhouse_db_schema/default.oot_workflow_job/schema.sql b/clickhouse_db_schema/default.oot_workflow_job/schema.sql index 5426ba9bcb..4fb9a5346b 100644 --- a/clickhouse_db_schema/default.oot_workflow_job/schema.sql +++ b/clickhouse_db_schema/default.oot_workflow_job/schema.sql @@ -10,6 +10,8 @@ CREATE TABLE default.oot_workflow_job `workflow_run_url` String COMMENT 'Link to downstream GHA workflow run', `workflow_name` String COMMENT 'Downstream workflow name (github.workflow)', `job_name` String DEFAULT '' COMMENT 'Downstream job name (github.job)', + `check_run_id` String DEFAULT '' COMMENT 'GitHub-assigned unique ID per job execution (job.check_run_id)', + `run_id` String DEFAULT '' COMMENT 'GitHub workflow run ID (github.run_id), same across retries', `run_attempt` UInt32 DEFAULT 1 COMMENT 'Workflow run attempt number (github.run_attempt)', `conclusion` String COMMENT 'success, failure, cancelled, timed_out (set on completed)', `queue_time` Nullable(Float64) COMMENT 'Relay-measured dispatch-to-in_progress time in seconds', diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql index e89ac7fcc1..57131f88fa 100644 --- a/torchci/clickhouse_queries/oot_backend_dashboard/query.sql +++ b/torchci/clickhouse_queries/oot_backend_dashboard/query.sql @@ -3,6 +3,8 @@ SELECT pytorch_head_sha, workflow_name, job_name, + check_run_id, + run_id, run_attempt, status, conclusion, diff --git a/torchci/clickhouse_queries/oot_pr_results/query.sql b/torchci/clickhouse_queries/oot_pr_results/query.sql index 6c856c3b3d..11710adceb 100644 --- a/torchci/clickhouse_queries/oot_pr_results/query.sql +++ b/torchci/clickhouse_queries/oot_pr_results/query.sql @@ -2,6 +2,8 @@ SELECT downstream_repo, workflow_name, job_name, + check_run_id, + run_id, run_attempt, status, conclusion, diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx index e4df1076a0..db1672ad28 100644 --- a/torchci/components/oot/OotPrSection.tsx +++ b/torchci/components/oot/OotPrSection.tsx @@ -23,6 +23,8 @@ interface OotPrResult { downstream_repo: string; workflow_name: string; job_name: string; + check_run_id: string; + run_id: string; run_attempt: number; status: string; conclusion: string; diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index 93d9bca4f0..ff8bde0250 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -15,15 +15,18 @@ export interface RelayTrusted { } export interface RelayWorkflow { + schema_version?: string; status: string; conclusion?: string | null; name: string; url: string; job_name?: string; + check_run_id?: string; + run_id?: string; run_attempt?: number; started_at?: string; completed_at?: string; - test_results?: any; + "test-results"?: any; } export interface RelayCallbackPayload { @@ -57,6 +60,8 @@ export interface OotWorkflowJobRecord { workflow_run_url: string; workflow_name: string; job_name: string; + check_run_id: string; + run_id: string; run_attempt: number; conclusion?: string; queue_time?: number | null; @@ -94,8 +99,9 @@ export function extractDynamoRecord( cb.payload?.repository?.full_name ?? "pytorch/pytorch"; const jobName = wf.job_name ?? "default"; + const checkRunId = wf.check_run_id ?? "unknown"; const runAttempt = wf.run_attempt ?? 1; - const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}/${jobName}/${runAttempt}`; + const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}/${jobName}/${checkRunId}`; const record: OotWorkflowJobRecord = { dynamoKey, @@ -108,6 +114,8 @@ export function extractDynamoRecord( workflow_run_url: wf.url ?? "", workflow_name: wf.name, job_name: jobName, + check_run_id: checkRunId, + run_id: wf.run_id ?? "", run_attempt: runAttempt, }; @@ -137,8 +145,9 @@ export function extractDynamoRecord( record.completed_at = wf.completed_at; } - if (wf.test_results) { - const tr = wf.test_results; + const testResults = wf["test-results"]; + if (testResults) { + const tr = testResults; if (typeof tr.total === "number") record.total_tests = tr.total; if (typeof tr.passed === "number") record.passed_tests = tr.passed; if (typeof tr.failed === "number") record.failed_tests = tr.failed; diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx index 888499bcfa..89cf53a75c 100644 --- a/torchci/pages/oot/[org]/[repo].tsx +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -32,6 +32,8 @@ interface OotJobRow { pytorch_head_sha: string; workflow_name: string; job_name: string; + check_run_id: string; + run_id: string; run_attempt: number; status: string; conclusion: string; @@ -119,6 +121,8 @@ function buildMatrix(data: OotJobRow[]): { const prMap = new Map(); for (const job of data) { + // Group by job_name:run_id so reruns don't create duplicate columns + const groupKey = job.run_id ? `${job.job_name}:${job.run_id}` : job.job_name; jobNamesSet.add(job.job_name); let row = prMap.get(job.pr_number); if (!row) { @@ -129,9 +133,9 @@ function buildMatrix(data: OotJobRow[]): { }; prMap.set(job.pr_number, row); } - // Keep the latest result per job name + // Keep the latest attempt per job_name (highest run_attempt wins) const existing = row.jobs.get(job.job_name); - if (!existing || job.started_at > existing.started_at) { + if (!existing || job.run_attempt > existing.run_attempt) { row.jobs.set(job.job_name, job); } } From bd5268ace5e9d29416fe5fcb8b57db246266e78d Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 12 May 2026 16:33:42 +0530 Subject: [PATCH 05/15] Replace hardware names with generic placeholders in schema comment --- clickhouse_db_schema/default.oot_workflow_job/schema.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clickhouse_db_schema/default.oot_workflow_job/schema.sql b/clickhouse_db_schema/default.oot_workflow_job/schema.sql index 4fb9a5346b..43bdad83a4 100644 --- a/clickhouse_db_schema/default.oot_workflow_job/schema.sql +++ b/clickhouse_db_schema/default.oot_workflow_job/schema.sql @@ -24,7 +24,7 @@ CREATE TABLE default.oot_workflow_job `skipped_tests` UInt64 DEFAULT 0, `failed_tests_json` String DEFAULT '' COMMENT 'JSON array of failed/errored test details', `artifact_url` String DEFAULT '' COMMENT 'URL to downstream-hosted artifacts (logs, reports)', - `environment` String DEFAULT '' COMMENT 'JSON: {"cuda": "12.8", "device": "H100", ...}', + `environment` String DEFAULT '' COMMENT 'JSON: {"sdk": "", "device": "", ...}', `downstream_repo_level` String DEFAULT '' COMMENT 'Relay level at dispatch time: L2, L3, L4', `_inserted_at` DateTime MATERIALIZED now(), `repository_full_name` String ALIAS downstream_repo COMMENT 'Alias for consistency with workflow_job queries', From 5c278e26711af56b1d2bc649d94ffba5b505b6c6 Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 12 May 2026 16:36:48 +0530 Subject: [PATCH 06/15] Replace vendor name with generic placeholder in test params --- torchci/clickhouse_queries/oot_backend_dashboard/params.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchci/clickhouse_queries/oot_backend_dashboard/params.json b/torchci/clickhouse_queries/oot_backend_dashboard/params.json index 9354a8e528..360d321eb7 100644 --- a/torchci/clickhouse_queries/oot_backend_dashboard/params.json +++ b/torchci/clickhouse_queries/oot_backend_dashboard/params.json @@ -5,7 +5,7 @@ }, "tests": [ { - "repo": "intel/torch-xpu-ops", + "repo": "/", "days": "7" } ] From 7a45ca7354d937ae3fd93a8dfb21358019df59bc Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 12 May 2026 17:25:05 +0530 Subject: [PATCH 07/15] Fix test_results key name and align with L2 summary-only approach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The L2 action uses test_results (underscore) in the workflow dict, not test-results (hyphen). Removed failures/failed_tests_json handling — the L2 action sends summary counts only; detailed results go via artifact_url. --- torchci/lib/oot/ootUtils.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index ff8bde0250..ae16cbfa30 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -26,7 +26,7 @@ export interface RelayWorkflow { run_attempt?: number; started_at?: string; completed_at?: string; - "test-results"?: any; + test_results?: any; } export interface RelayCallbackPayload { @@ -145,16 +145,12 @@ export function extractDynamoRecord( record.completed_at = wf.completed_at; } - const testResults = wf["test-results"]; - if (testResults) { - const tr = testResults; + if (wf.test_results) { + const tr = wf.test_results; if (typeof tr.total === "number") record.total_tests = tr.total; if (typeof tr.passed === "number") record.passed_tests = tr.passed; if (typeof tr.failed === "number") record.failed_tests = tr.failed; if (typeof tr.skipped === "number") record.skipped_tests = tr.skipped; - if (tr.failures) { - record.failed_tests_json = JSON.stringify(tr.failures); - } } } From 34e23e202267b28747205e5f7db9f855696ee16d Mon Sep 17 00:00:00 2001 From: Subin George Date: Wed, 13 May 2026 12:13:44 +0530 Subject: [PATCH 08/15] Add downstream_repo_level to OOT Summary page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface the relay-determined repo level (L1–L4) in the OOT Summary table so users can see each backend's integration tier at a glance. --- torchci/clickhouse_queries/oot_summary/query.sql | 1 + torchci/pages/oot/index.tsx | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/torchci/clickhouse_queries/oot_summary/query.sql b/torchci/clickhouse_queries/oot_summary/query.sql index f9ee865f22..3e92d385e5 100644 --- a/torchci/clickhouse_queries/oot_summary/query.sql +++ b/torchci/clickhouse_queries/oot_summary/query.sql @@ -1,5 +1,6 @@ SELECT downstream_repo AS repo, + anyLast(downstream_repo_level) AS downstream_repo_level, countIf(conclusion = 'success') AS successes, countIf(conclusion = 'failure') AS failures, count() AS total, diff --git a/torchci/pages/oot/index.tsx b/torchci/pages/oot/index.tsx index 6727e05111..8bb668927a 100644 --- a/torchci/pages/oot/index.tsx +++ b/torchci/pages/oot/index.tsx @@ -27,6 +27,7 @@ import useSWR from "swr"; interface OotSummaryRow { repo: string; + downstream_repo_level: string; successes: number; failures: number; total: number; @@ -76,6 +77,9 @@ function OotSummaryTable({ days }: { days: number }) { Backend Repository + + Level + Pass Rate @@ -106,6 +110,13 @@ function OotSummaryTable({ days }: { days: number }) { {row.repo} + + + From e9f6292b15191027578dbc6c6a1b447318f5821a Mon Sep 17 00:00:00 2001 From: Subin George Date: Fri, 15 May 2026 09:37:17 +0530 Subject: [PATCH 09/15] Address review feedback: fix HTTP status code and remove dead code - Changed 502 to 500 for DynamoDB write errors in results.ts (502 Bad Gateway implies a proxy; this API is the originating server) - Removed unused groupKey variable in buildMatrix() Addresses review feedback from @KarhouTam. --- torchci/pages/api/oot/results.ts | 2 +- torchci/pages/oot/[org]/[repo].tsx | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/torchci/pages/api/oot/results.ts b/torchci/pages/api/oot/results.ts index 2e469d95ca..297535b8b6 100644 --- a/torchci/pages/api/oot/results.ts +++ b/torchci/pages/api/oot/results.ts @@ -50,6 +50,6 @@ export default async function handler( return res.status(err.statusCode).json({ error: err.message }); } console.error("OOT results handler error:", err); - return res.status(502).json({ error: "Internal error writing to DynamoDB" }); + return res.status(500).json({ error: "Internal error writing to DynamoDB" }); } } diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx index 89cf53a75c..ce958e7e56 100644 --- a/torchci/pages/oot/[org]/[repo].tsx +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -121,8 +121,6 @@ function buildMatrix(data: OotJobRow[]): { const prMap = new Map(); for (const job of data) { - // Group by job_name:run_id so reruns don't create duplicate columns - const groupKey = job.run_id ? `${job.job_name}:${job.run_id}` : job.job_name; jobNamesSet.add(job.job_name); let row = prMap.get(job.pr_number); if (!row) { From 8fe2f2bd3e8ff0cb9f9e29f7aa3af27089402d54 Mon Sep 17 00:00:00 2001 From: Subin George Date: Fri, 15 May 2026 09:46:30 +0530 Subject: [PATCH 10/15] Extract shared conclusionColor/conclusionLabel to ootUtils Both [repo].tsx and OotPrSection.tsx had identical copies of these functions. Moved to lib/oot/ootUtils.ts and imported from there. Addresses review feedback from @KarhouTam. --- torchci/components/oot/OotPrSection.tsx | 24 +--------------------- torchci/lib/oot/ootUtils.ts | 27 +++++++++++++++++++++++++ torchci/pages/oot/[org]/[repo].tsx | 24 +--------------------- 3 files changed, 29 insertions(+), 46 deletions(-) diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx index db1672ad28..8e510e3d10 100644 --- a/torchci/components/oot/OotPrSection.tsx +++ b/torchci/components/oot/OotPrSection.tsx @@ -17,6 +17,7 @@ import { import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; import { durationDisplay } from "components/common/TimeUtils"; import { fetcher } from "lib/GeneralUtils"; +import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils"; import useSWR from "swr"; interface OotPrResult { @@ -36,29 +37,6 @@ interface OotPrResult { execution_time: number | null; } -function conclusionColor( - status: string, - conclusion: string -): "success" | "error" | "warning" | "info" | "default" { - if (status === "in_progress") return "info"; - switch (conclusion) { - case "success": - return "success"; - case "failure": - return "error"; - case "cancelled": - case "timed_out": - return "warning"; - default: - return "default"; - } -} - -function conclusionLabel(status: string, conclusion: string): string { - if (status === "in_progress") return "running"; - return conclusion || status; -} - export default function OotPrSection({ prNumber }: { prNumber: number }) { const url = `/api/clickhouse/oot_pr_results?parameters=${encodeURIComponent( JSON.stringify({ pr: String(prNumber) }) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index ae16cbfa30..f4233ab565 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -189,6 +189,33 @@ export async function writeToDynamo( }); } +// ---- UI Helpers ---- + +export type ChipColor = "success" | "error" | "warning" | "info" | "default"; + +export function conclusionColor( + status: string, + conclusion: string +): ChipColor { + if (status === "in_progress") return "info"; + switch (conclusion) { + case "success": + return "success"; + case "failure": + return "error"; + case "cancelled": + case "timed_out": + return "warning"; + default: + return "default"; + } +} + +export function conclusionLabel(status: string, conclusion: string): string { + if (status === "in_progress") return "running"; + return conclusion || status; +} + // ---- Error Helper ---- export class ApiError extends Error { diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx index ce958e7e56..986e34b0ca 100644 --- a/torchci/pages/oot/[org]/[repo].tsx +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -21,6 +21,7 @@ import { } from "@mui/material"; import { durationDisplay } from "components/common/TimeUtils"; import { fetcher } from "lib/GeneralUtils"; +import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils"; import Head from "next/head"; import NextLink from "next/link"; import { useRouter } from "next/router"; @@ -50,29 +51,6 @@ interface OotJobRow { execution_time: number | null; } -function conclusionColor( - status: string, - conclusion: string -): "success" | "error" | "warning" | "info" | "default" { - if (status === "in_progress") return "info"; - switch (conclusion) { - case "success": - return "success"; - case "failure": - return "error"; - case "cancelled": - case "timed_out": - return "warning"; - default: - return "default"; - } -} - -function conclusionLabel(status: string, conclusion: string): string { - if (status === "in_progress") return "running"; - return conclusion || status; -} - function JobChip({ job, }: { From 5a7f77eeb55a2a380bb74a1e424d74068c889192 Mon Sep 17 00:00:00 2001 From: Subin George Date: Fri, 15 May 2026 09:59:57 +0530 Subject: [PATCH 11/15] Remove unused Skeleton import from OotPrSection --- torchci/components/oot/OotPrSection.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx index 8e510e3d10..b50598eef6 100644 --- a/torchci/components/oot/OotPrSection.tsx +++ b/torchci/components/oot/OotPrSection.tsx @@ -4,7 +4,6 @@ import { AccordionSummary, Chip, Link, - Skeleton, Stack, Table, TableBody, From 99f79864ee4513706735528fa3252c54aab09694 Mon Sep 17 00:00:00 2001 From: Subin George Date: Fri, 15 May 2026 10:22:17 +0530 Subject: [PATCH 12/15] Fix run_attempt type coercion and extract artifact_url The L2 relay action sends run_attempt as a string (from env var). Coerce to number with Number() to ensure consistent DynamoDB/ClickHouse types. Also add artifact_url to RelayWorkflow and extract it in extractDynamoRecord so downstream-provided artifact links flow through to DynamoDB and ClickHouse. --- torchci/lib/oot/ootUtils.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index f4233ab565..4b5fa54353 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -23,10 +23,11 @@ export interface RelayWorkflow { job_name?: string; check_run_id?: string; run_id?: string; - run_attempt?: number; + run_attempt?: number | string; started_at?: string; completed_at?: string; test_results?: any; + artifact_url?: string; } export interface RelayCallbackPayload { @@ -100,7 +101,7 @@ export function extractDynamoRecord( const jobName = wf.job_name ?? "default"; const checkRunId = wf.check_run_id ?? "unknown"; - const runAttempt = wf.run_attempt ?? 1; + const runAttempt = Number(wf.run_attempt ?? 1) || 1; const dynamoKey = `${trusted.verified_repo}/${cb.delivery_id}/${wf.name}/${jobName}/${checkRunId}`; const record: OotWorkflowJobRecord = { @@ -139,6 +140,10 @@ export function extractDynamoRecord( record.started_at = wf.started_at; } + if (wf.artifact_url) { + record.artifact_url = wf.artifact_url; + } + if (wf.status === "completed") { record.conclusion = wf.conclusion ?? undefined; if (wf.completed_at) { From 3ac25560b58ddb24c0777d78032d8208875252bb Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 19 May 2026 11:09:26 +0530 Subject: [PATCH 13/15] Run prettier to fix formatting --- torchci/components/oot/OotPrSection.tsx | 14 +++------- torchci/lib/oot/ootUtils.ts | 8 ++---- torchci/pages/api/oot/results.ts | 8 +++--- torchci/pages/oot/[org]/[repo].tsx | 34 ++++++++++++------------- torchci/pages/oot/index.tsx | 12 ++++----- 5 files changed, 33 insertions(+), 43 deletions(-) diff --git a/torchci/components/oot/OotPrSection.tsx b/torchci/components/oot/OotPrSection.tsx index b50598eef6..0829394c7c 100644 --- a/torchci/components/oot/OotPrSection.tsx +++ b/torchci/components/oot/OotPrSection.tsx @@ -1,3 +1,4 @@ +import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; import { Accordion, AccordionDetails, @@ -13,7 +14,6 @@ import { TableRow, Typography, } from "@mui/material"; -import ExpandMoreIcon from "@mui/icons-material/ExpandMore"; import { durationDisplay } from "components/common/TimeUtils"; import { fetcher } from "lib/GeneralUtils"; import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils"; @@ -49,17 +49,11 @@ export default function OotPrSection({ prNumber }: { prNumber: number }) { const successCount = data.filter( (r) => r.status === "completed" && r.conclusion === "success" ).length; - const totalCompleted = data.filter( - (r) => r.status === "completed" - ).length; - const inProgress = data.filter( - (r) => r.status === "in_progress" - ).length; + const totalCompleted = data.filter((r) => r.status === "completed").length; + const inProgress = data.filter((r) => r.status === "in_progress").length; const summaryText = [ - totalCompleted > 0 - ? `${successCount}/${totalCompleted} passed` - : null, + totalCompleted > 0 ? `${successCount}/${totalCompleted} passed` : null, inProgress > 0 ? `${inProgress} running` : null, ] .filter(Boolean) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index 4b5fa54353..04c85c1437 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -96,8 +96,7 @@ export function extractDynamoRecord( const cb = untrusted.callback_payload; const wf = cb.workflow; const pr = cb.payload?.pull_request; - const upstreamRepo = - cb.payload?.repository?.full_name ?? "pytorch/pytorch"; + const upstreamRepo = cb.payload?.repository?.full_name ?? "pytorch/pytorch"; const jobName = wf.job_name ?? "default"; const checkRunId = wf.check_run_id ?? "unknown"; @@ -198,10 +197,7 @@ export async function writeToDynamo( export type ChipColor = "success" | "error" | "warning" | "info" | "default"; -export function conclusionColor( - status: string, - conclusion: string -): ChipColor { +export function conclusionColor(status: string, conclusion: string): ChipColor { if (status === "in_progress") return "info"; switch (conclusion) { case "success": diff --git a/torchci/pages/api/oot/results.ts b/torchci/pages/api/oot/results.ts index 297535b8b6..51731eb89f 100644 --- a/torchci/pages/api/oot/results.ts +++ b/torchci/pages/api/oot/results.ts @@ -1,10 +1,10 @@ -import type { NextApiRequest, NextApiResponse } from "next"; import { ApiError, - validatePayloadSize, extractDynamoRecord, + validatePayloadSize, writeToDynamo, } from "lib/oot/ootUtils"; +import type { NextApiRequest, NextApiResponse } from "next"; export const config = { api: { @@ -50,6 +50,8 @@ export default async function handler( return res.status(err.statusCode).json({ error: err.message }); } console.error("OOT results handler error:", err); - return res.status(500).json({ error: "Internal error writing to DynamoDB" }); + return res + .status(500) + .json({ error: "Internal error writing to DynamoDB" }); } } diff --git a/torchci/pages/oot/[org]/[repo].tsx b/torchci/pages/oot/[org]/[repo].tsx index 986e34b0ca..48624f1e71 100644 --- a/torchci/pages/oot/[org]/[repo].tsx +++ b/torchci/pages/oot/[org]/[repo].tsx @@ -51,25 +51,29 @@ interface OotJobRow { execution_time: number | null; } -function JobChip({ - job, -}: { - job: OotJobRow; -}) { +function JobChip({ job }: { job: OotJobRow }) { const color = conclusionColor(job.status, job.conclusion); const label = conclusionLabel(job.status, job.conclusion); const tooltipContent = [ `Job: ${job.job_name}`, job.run_attempt > 1 ? `Attempt: ${job.run_attempt}` : null, - `Duration: ${job.duration_seconds ? durationDisplay(Math.round(job.duration_seconds)) : "–"}`, - job.total_tests ? `Tests: ${job.passed_tests}/${job.total_tests} passed` : null, + `Duration: ${ + job.duration_seconds + ? durationDisplay(Math.round(job.duration_seconds)) + : "–" + }`, + job.total_tests + ? `Tests: ${job.passed_tests}/${job.total_tests} passed` + : null, job.queue_time != null ? `Queue: ${job.queue_time.toFixed(1)}s` : null, ] .filter(Boolean) .join("\n"); return ( - {tooltipContent}}> + {tooltipContent}} + > - - {repoFullName} — OOT CI | PyTorch HUD - + {repoFullName} — OOT CI | PyTorch HUD - + {repoFullName} @@ -279,8 +277,8 @@ export default function OotBackendPage() { - Rows = PyTorch PRs, columns = downstream CI jobs. Click a chip to - open the workflow run. + Rows = PyTorch PRs, columns = downstream CI jobs. Click a chip to open + the workflow run. diff --git a/torchci/pages/oot/index.tsx b/torchci/pages/oot/index.tsx index 8bb668927a..49b30350ae 100644 --- a/torchci/pages/oot/index.tsx +++ b/torchci/pages/oot/index.tsx @@ -106,7 +106,11 @@ function OotSummaryTable({ days }: { days: number }) { return ( - + {row.repo} @@ -147,11 +151,7 @@ export default function OotSummaryPage() { Out-of-Tree CI Summary | PyTorch HUD - + Out-of-Tree CI Summary Time Range From 1d4abc592e405f117ca0828dfbf57cc4fbb18720 Mon Sep 17 00:00:00 2001 From: Subin George Date: Tue, 19 May 2026 11:42:26 +0530 Subject: [PATCH 14/15] Compute total_tests from passed+failed+skipped when total is absent The L2 relay action sends test_results with {passed, failed, skipped} but no total field. Compute total_tests as the sum when tr.total is not provided, preserving backward compatibility if total is present. --- torchci/lib/oot/ootUtils.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torchci/lib/oot/ootUtils.ts b/torchci/lib/oot/ootUtils.ts index 04c85c1437..905628a296 100644 --- a/torchci/lib/oot/ootUtils.ts +++ b/torchci/lib/oot/ootUtils.ts @@ -151,10 +151,13 @@ export function extractDynamoRecord( if (wf.test_results) { const tr = wf.test_results; - if (typeof tr.total === "number") record.total_tests = tr.total; if (typeof tr.passed === "number") record.passed_tests = tr.passed; if (typeof tr.failed === "number") record.failed_tests = tr.failed; if (typeof tr.skipped === "number") record.skipped_tests = tr.skipped; + record.total_tests = + typeof tr.total === "number" + ? tr.total + : (tr.passed ?? 0) + (tr.failed ?? 0) + (tr.skipped ?? 0); } } From eb25fb02575b43580daafce22f8d045e7a88a5b0 Mon Sep 17 00:00:00 2001 From: Subin George Date: Wed, 20 May 2026 12:44:27 +0530 Subject: [PATCH 15/15] Fix prettier formatting in PR page OotPrSection --- torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx index 3d9cecd7fd..387bfc6a16 100644 --- a/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx +++ b/torchci/pages/[repoOwner]/[repoName]/pull/[prNumber].tsx @@ -124,9 +124,7 @@ function Page() { )} - {prNumber && ( - - )} + {prNumber && } );