Skip to content

Commit 34b69e1

Browse files
committed
fix(webapp,run-engine,scripts): harden v2 cross-table tests and tighten the recovery query
- updateMetadata cross-table test: wrap the body in try/finally so stopFlushing always runs and the flush loop cannot bleed into later tests on a failure path. - cancelling cross-table cancel-cascade test: poll for the child CANCELED status with a deadline instead of a fixed 1s sleep, to de-flake it under slow CI. - recover-stuck-runs: constrain each UNION branch by id = ANY(runIds) so the recovery query scans only candidate rows instead of unioning both full tables before the join.
1 parent 3418d9d commit 34b69e1

3 files changed

Lines changed: 109 additions & 98 deletions

File tree

apps/webapp/test/updateMetadata.test.ts

Lines changed: 99 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,98 +1306,105 @@ describe("UpdateMetadataService.call", () => {
13061306
logLevel: "debug",
13071307
});
13081308

1309-
const organization = await prisma.organization.create({
1310-
data: { title: "test", slug: "test" },
1311-
});
1312-
const project = await prisma.project.create({
1313-
data: { name: "test", slug: "test", organizationId: organization.id, externalRef: "test" },
1314-
});
1315-
const runtimeEnvironment = await prisma.runtimeEnvironment.create({
1316-
data: {
1317-
slug: "test",
1318-
type: "DEVELOPMENT",
1319-
projectId: project.id,
1320-
organizationId: organization.id,
1321-
apiKey: "test",
1322-
pkApiKey: "test",
1323-
shortcode: "test",
1324-
},
1325-
});
1326-
1327-
// Legacy parent (cuid id) lives in TaskRun. This is the mixed-window
1328-
// hierarchy: an org flips runTableV2 on while a pre-flip parent is live,
1329-
// and its post-flip child mints a ksuid into task_run_v2.
1330-
const parentId = RunId.generate();
1331-
expect(isKsuidId(parentId.id)).toBe(false);
1332-
const parentTaskRun = await prisma.taskRun.create({
1333-
data: {
1334-
id: parentId.id,
1335-
friendlyId: parentId.friendlyId,
1336-
taskIdentifier: "my-task",
1337-
payload: "{}",
1338-
payloadType: "application/json",
1339-
traceId: "t",
1340-
spanId: "s",
1341-
queue: "test",
1342-
runtimeEnvironmentId: runtimeEnvironment.id,
1343-
projectId: project.id,
1344-
organizationId: organization.id,
1345-
environmentType: "DEVELOPMENT",
1346-
engine: "V2",
1347-
},
1348-
});
1349-
1350-
// v2 child (ksuid id) lives in task_run_v2 and points at the legacy
1351-
// parent by the scalar parentTaskRunId (no cross-table FK).
1352-
const childId = RunId.generateKsuid();
1353-
expect(isKsuidId(childId.id)).toBe(true);
1354-
await prisma.taskRunV2.create({
1355-
data: {
1356-
id: childId.id,
1357-
friendlyId: childId.friendlyId,
1358-
taskIdentifier: "my-child-task",
1359-
payload: "{}",
1360-
payloadType: "application/json",
1361-
traceId: "t",
1362-
spanId: "s",
1363-
queue: "test",
1364-
runtimeEnvironmentId: runtimeEnvironment.id,
1365-
projectId: project.id,
1366-
organizationId: organization.id,
1367-
environmentType: "DEVELOPMENT",
1368-
engine: "V2",
1369-
parentTaskRunId: parentTaskRun.id,
1370-
},
1371-
});
1372-
1373-
// The child applies metadata.parent operations. Pre-fix, the table-bound
1374-
// parentTaskRun relation resolved null (parent is in the OTHER table), so
1375-
// the ops fell back to the child's own id — corrupting the child and
1376-
// never touching the parent.
1377-
await service.call(childId.id, {
1378-
parentOperations: [
1379-
{ type: "set", key: "foo", value: "bar" },
1380-
{ type: "append", key: "bar", value: "baz" },
1381-
],
1382-
});
1383-
1384-
// Wait for the buffered operations to flush.
1385-
await setTimeout(1000);
1386-
1387-
// The PARENT (in TaskRun) must have received the operations.
1388-
const updatedParent = await prisma.taskRun.findFirst({ where: { id: parentTaskRun.id } });
1389-
expect(
1390-
await parsePacket({
1391-
data: updatedParent?.metadata ?? undefined,
1392-
dataType: updatedParent?.metadataType ?? "application/json",
1393-
})
1394-
).toEqual({ foo: "bar", bar: ["baz"] });
1395-
1396-
// The CHILD (in task_run_v2) must NOT have been polluted with parent ops.
1397-
const updatedChild = await prisma.taskRunV2.findFirst({ where: { id: childId.id } });
1398-
expect(updatedChild?.metadata ?? null).toBeNull();
1399-
1400-
service.stopFlushing();
1309+
try {
1310+
const organization = await prisma.organization.create({
1311+
data: { title: "test", slug: "test" },
1312+
});
1313+
const project = await prisma.project.create({
1314+
data: {
1315+
name: "test",
1316+
slug: "test",
1317+
organizationId: organization.id,
1318+
externalRef: "test",
1319+
},
1320+
});
1321+
const runtimeEnvironment = await prisma.runtimeEnvironment.create({
1322+
data: {
1323+
slug: "test",
1324+
type: "DEVELOPMENT",
1325+
projectId: project.id,
1326+
organizationId: organization.id,
1327+
apiKey: "test",
1328+
pkApiKey: "test",
1329+
shortcode: "test",
1330+
},
1331+
});
1332+
1333+
// Legacy parent (cuid id) lives in TaskRun. This is the mixed-window
1334+
// hierarchy: an org flips runTableV2 on while a pre-flip parent is live,
1335+
// and its post-flip child mints a ksuid into task_run_v2.
1336+
const parentId = RunId.generate();
1337+
expect(isKsuidId(parentId.id)).toBe(false);
1338+
const parentTaskRun = await prisma.taskRun.create({
1339+
data: {
1340+
id: parentId.id,
1341+
friendlyId: parentId.friendlyId,
1342+
taskIdentifier: "my-task",
1343+
payload: "{}",
1344+
payloadType: "application/json",
1345+
traceId: "t",
1346+
spanId: "s",
1347+
queue: "test",
1348+
runtimeEnvironmentId: runtimeEnvironment.id,
1349+
projectId: project.id,
1350+
organizationId: organization.id,
1351+
environmentType: "DEVELOPMENT",
1352+
engine: "V2",
1353+
},
1354+
});
1355+
1356+
// v2 child (ksuid id) lives in task_run_v2 and points at the legacy
1357+
// parent by the scalar parentTaskRunId (no cross-table FK).
1358+
const childId = RunId.generateKsuid();
1359+
expect(isKsuidId(childId.id)).toBe(true);
1360+
await prisma.taskRunV2.create({
1361+
data: {
1362+
id: childId.id,
1363+
friendlyId: childId.friendlyId,
1364+
taskIdentifier: "my-child-task",
1365+
payload: "{}",
1366+
payloadType: "application/json",
1367+
traceId: "t",
1368+
spanId: "s",
1369+
queue: "test",
1370+
runtimeEnvironmentId: runtimeEnvironment.id,
1371+
projectId: project.id,
1372+
organizationId: organization.id,
1373+
environmentType: "DEVELOPMENT",
1374+
engine: "V2",
1375+
parentTaskRunId: parentTaskRun.id,
1376+
},
1377+
});
1378+
1379+
// The child applies metadata.parent operations. Pre-fix, the table-bound
1380+
// parentTaskRun relation resolved null (parent is in the OTHER table), so
1381+
// the ops fell back to the child's own id — corrupting the child and
1382+
// never touching the parent.
1383+
await service.call(childId.id, {
1384+
parentOperations: [
1385+
{ type: "set", key: "foo", value: "bar" },
1386+
{ type: "append", key: "bar", value: "baz" },
1387+
],
1388+
});
1389+
1390+
// Wait for the buffered operations to flush.
1391+
await setTimeout(1000);
1392+
1393+
// The PARENT (in TaskRun) must have received the operations.
1394+
const updatedParent = await prisma.taskRun.findFirst({ where: { id: parentTaskRun.id } });
1395+
expect(
1396+
await parsePacket({
1397+
data: updatedParent?.metadata ?? undefined,
1398+
dataType: updatedParent?.metadataType ?? "application/json",
1399+
})
1400+
).toEqual({ foo: "bar", bar: ["baz"] });
1401+
1402+
// The CHILD (in task_run_v2) must NOT have been polluted with parent ops.
1403+
const updatedChild = await prisma.taskRunV2.findFirst({ where: { id: childId.id } });
1404+
expect(updatedChild?.metadata ?? null).toBeNull();
1405+
} finally {
1406+
service.stopFlushing();
1407+
}
14011408
}
14021409
);
14031410
});

internal-packages/run-engine/src/engine/tests/cancelling.test.ts

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,14 @@ describe("RunEngine cancelling", () => {
330330
reason: "Cancelled by the user",
331331
});
332332

333-
// The child cancellation is enqueued as a job; give the worker a moment.
334-
await setTimeout(1000);
335-
336-
const childData = await engine.getRunExecutionData({ runId: childRun.id });
333+
// The child cancellation is enqueued as a job; wait for the worker to process it
334+
// (poll instead of a fixed sleep so the test isn't flaky under slow CI).
335+
let childData = await engine.getRunExecutionData({ runId: childRun.id });
336+
const deadline = Date.now() + 5_000;
337+
while (childData?.run.status !== "CANCELED" && Date.now() < deadline) {
338+
await setTimeout(50);
339+
childData = await engine.getRunExecutionData({ runId: childRun.id });
340+
}
337341
expect(childData?.run.status).toBe("CANCELED");
338342
} finally {
339343
await engine.quit();

scripts/recover-stuck-runs.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,9 @@ async function main() {
220220
r."concurrencyKey"
221221
FROM "TaskRunExecutionSnapshot" s
222222
INNER JOIN (
223-
SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun"
223+
SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM "TaskRun" WHERE id = ANY(${runIds})
224224
UNION
225-
SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2
225+
SELECT id, "organizationId", "projectId", "runtimeEnvironmentId", "taskIdentifier", "queue", "concurrencyKey" FROM task_run_v2 WHERE id = ANY(${runIds})
226226
) r ON r.id = s."runId"
227227
WHERE s."runId" = ANY(${runIds})
228228
AND s."isValid" = true

0 commit comments

Comments
 (0)