Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions aws/lambda/clickhouse-replicator-dynamo/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"vllm-buildkite-agent-events": "vllm.vllm_buildkite_agents",
"vllm-buildkite-build-events": "vllm.vllm_buildkite_builds",
"vllm-buildkite-job-events": "vllm.vllm_buildkite_jobs",
"torchci-oot-workflow-job": "default.oot_workflow_job",
}


Expand Down
40 changes: 40 additions & 0 deletions clickhouse_db_schema/default.oot_workflow_job/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
CREATE TABLE default.oot_workflow_job
(
`dynamoKey` String,
`status` String,
`downstream_repo` String COMMENT 'Downstream repo org/name, from trusted.verified_repo',
`upstream_repo` String COMMENT 'Upstream repo, typically pytorch/pytorch',
`pr_number` UInt64 COMMENT 'PyTorch PR number',
`pytorch_head_sha` String COMMENT 'PyTorch PR commit SHA',
`delivery_id` String COMMENT 'GitHub webhook delivery ID from L1 dispatch',
`workflow_run_url` String COMMENT 'Link to downstream GHA workflow run',
`workflow_name` String COMMENT 'Downstream workflow name (github.workflow)',
`job_name` String DEFAULT '' COMMENT 'Downstream job name (github.job)',
`check_run_id` String DEFAULT '' COMMENT 'GitHub-assigned unique ID per job execution (job.check_run_id)',
`run_id` String DEFAULT '' COMMENT 'GitHub workflow run ID (github.run_id), same across retries',
`run_attempt` UInt32 DEFAULT 1 COMMENT 'Workflow run attempt number (github.run_attempt)',
`conclusion` String COMMENT 'success, failure, cancelled, timed_out (set on completed)',
`queue_time` Nullable(Float64) COMMENT 'Relay-measured dispatch-to-in_progress time in seconds',
`execution_time` Nullable(Float64) COMMENT 'Relay-measured in_progress-to-completed time in seconds',
`started_at` DateTime64(9) COMMENT 'Downstream-reported timestamp when job started',
`completed_at` DateTime64(9) COMMENT 'Downstream-reported timestamp when job completed',
`total_tests` UInt64 DEFAULT 0,
`passed_tests` UInt64 DEFAULT 0,
`failed_tests` UInt64 DEFAULT 0,
`skipped_tests` UInt64 DEFAULT 0,
`failed_tests_json` String DEFAULT '' COMMENT 'JSON array of failed/errored test details',
`artifact_url` String DEFAULT '' COMMENT 'URL to downstream-hosted artifacts (logs, reports)',
`environment` String DEFAULT '' COMMENT 'JSON: {"sdk": "<version>", "device": "<hardware>", ...}',
`downstream_repo_level` String DEFAULT '' COMMENT 'Relay level at dispatch time: L2, L3, L4',
`_inserted_at` DateTime MATERIALIZED now(),
`repository_full_name` String ALIAS downstream_repo COMMENT 'Alias for consistency with workflow_job queries',
`duration_seconds` Float64 ALIAS if(completed_at = toDateTime64(0, 9), 0, dateDiff(second, started_at, completed_at)),
INDEX status_index status TYPE bloom_filter GRANULARITY 1,
INDEX started_at_index started_at TYPE minmax GRANULARITY 1,
INDEX completed_at_index completed_at TYPE minmax GRANULARITY 1,
INDEX pr_number_index pr_number TYPE bloom_filter GRANULARITY 1,
INDEX downstream_repo_index downstream_repo TYPE bloom_filter GRANULARITY 1
)
ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')
ORDER BY (downstream_repo, delivery_id, dynamoKey)
SETTINGS index_granularity = 8192
12 changes: 12 additions & 0 deletions torchci/clickhouse_queries/oot_backend_dashboard/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"params": {
"repo": "String",
"days": "UInt64"
},
"tests": [
{
"repo": "<company>/<repo>",
"days": "7"
}
]
}
29 changes: 29 additions & 0 deletions torchci/clickhouse_queries/oot_backend_dashboard/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
SELECT
pr_number,
pytorch_head_sha,
workflow_name,
job_name,
check_run_id,
run_id,
run_attempt,
status,
conclusion,
started_at,
completed_at,
duration_seconds,
total_tests,
passed_tests,
failed_tests,
skipped_tests,
workflow_run_url,
artifact_url,
queue_time,
execution_time
FROM
default.oot_workflow_job FINAL
WHERE
downstream_repo = {repo: String}
AND started_at > now() - INTERVAL {days: UInt64} DAY
ORDER BY
started_at DESC
LIMIT 500
10 changes: 10 additions & 0 deletions torchci/clickhouse_queries/oot_pr_results/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"params": {
"pr": "UInt64"
},
"tests": [
{
"pr": "179565"
}
]
}
21 changes: 21 additions & 0 deletions torchci/clickhouse_queries/oot_pr_results/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
SELECT
downstream_repo,
workflow_name,
job_name,
check_run_id,
run_id,
run_attempt,
status,
conclusion,
duration_seconds,
workflow_run_url,
artifact_url,
started_at,
queue_time,
execution_time
FROM
default.oot_workflow_job FINAL
WHERE
pr_number = {pr: UInt64}
ORDER BY
downstream_repo, started_at DESC
10 changes: 10 additions & 0 deletions torchci/clickhouse_queries/oot_summary/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"params": {
"days": "UInt64"
},
"tests": [
{
"days": "7"
}
]
}
18 changes: 18 additions & 0 deletions torchci/clickhouse_queries/oot_summary/query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
SELECT
downstream_repo AS repo,
anyLast(downstream_repo_level) AS downstream_repo_level,
countIf(conclusion = 'success') AS successes,
countIf(conclusion = 'failure') AS failures,
count() AS total,
if(total > 0, successes / total, 0) AS pass_rate,
avg(duration_seconds) AS avg_duration_s,
max(started_at) AS last_run
FROM
default.oot_workflow_job FINAL
WHERE
started_at > now() - INTERVAL {days: UInt64} DAY
AND status = 'completed'
GROUP BY
repo
ORDER BY
pass_rate ASC
145 changes: 145 additions & 0 deletions torchci/components/oot/OotPrSection.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import ExpandMoreIcon from "@mui/icons-material/ExpandMore";
import {
Accordion,
AccordionDetails,
AccordionSummary,
Chip,
Link,
Stack,
Table,
TableBody,
TableCell,
TableContainer,
TableHead,
TableRow,
Typography,
} from "@mui/material";
import { durationDisplay } from "components/common/TimeUtils";
import { fetcher } from "lib/GeneralUtils";
import { conclusionColor, conclusionLabel } from "lib/oot/ootUtils";
import useSWR from "swr";

interface OotPrResult {
downstream_repo: string;
workflow_name: string;
job_name: string;
check_run_id: string;
run_id: string;
run_attempt: number;
status: string;
conclusion: string;
duration_seconds: number;
workflow_run_url: string;
artifact_url: string;
started_at: string;
queue_time: number | null;
execution_time: number | null;
}

export default function OotPrSection({ prNumber }: { prNumber: number }) {
const url = `/api/clickhouse/oot_pr_results?parameters=${encodeURIComponent(
JSON.stringify({ pr: String(prNumber) })
)}`;
const { data, error } = useSWR<OotPrResult[]>(url, fetcher, {
refreshInterval: 60_000,
});

if (error || !data || data.length === 0) return null;

const successCount = data.filter(
(r) => r.status === "completed" && r.conclusion === "success"
).length;
const totalCompleted = data.filter((r) => r.status === "completed").length;
const inProgress = data.filter((r) => r.status === "in_progress").length;

const summaryText = [
totalCompleted > 0 ? `${successCount}/${totalCompleted} passed` : null,
inProgress > 0 ? `${inProgress} running` : null,
]
.filter(Boolean)
.join(", ");

return (
<Accordion defaultExpanded={false} sx={{ mt: 2 }}>
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
<Stack direction="row" spacing={1} alignItems="center">
<Typography variant="subtitle1">
<strong>Out-of-Tree Backends</strong>
</Typography>
<Typography variant="body2" color="text.secondary">
({summaryText})
</Typography>
</Stack>
</AccordionSummary>
<AccordionDetails>
<TableContainer>
<Table size="small">
<TableHead>
<TableRow>
<TableCell>
<strong>Backend</strong>
</TableCell>
<TableCell>
<strong>Job</strong>
</TableCell>
<TableCell align="center">
<strong>Status</strong>
</TableCell>
<TableCell align="right">
<strong>Duration</strong>
</TableCell>
<TableCell>
<strong>Links</strong>
</TableCell>
</TableRow>
</TableHead>
<TableBody>
{data.map((row, i) => (
<TableRow key={i} hover>
<TableCell>{row.downstream_repo}</TableCell>
<TableCell>{row.job_name}</TableCell>
<TableCell align="center">
<Chip
label={conclusionLabel(row.status, row.conclusion)}
color={conclusionColor(row.status, row.conclusion)}
size="small"
/>
</TableCell>
<TableCell align="right">
{row.duration_seconds
? durationDisplay(Math.round(row.duration_seconds))
: "–"}
</TableCell>
<TableCell>
<Stack direction="row" spacing={1}>
{row.workflow_run_url && (
<Link
href={row.workflow_run_url}
target="_blank"
rel="noopener"
variant="body2"
>
Run
</Link>
)}
{row.artifact_url && (
<Link
href={row.artifact_url}
target="_blank"
rel="noopener"
variant="body2"
>
Artifacts
</Link>
)}
</Stack>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
</AccordionDetails>
</Accordion>
);
}
Loading