Skip to content

Commit 7b1159e

Browse files
authored
MarQS reserve concurrency system & queue priority for resuming/retrying (#1715)
* run engine v1: orgs are no longer considered for concurrency * Add reserve concurrency concept to allow waiting to resume parent tasks to release concurrency at the env level for child tasks to use (or else there is a deadlock). WIP recursive tasks * child tasks inherit the queue timestamp from their parent tasks to prioritize completing child tasks based on when their parent started * handle reserve concurrency with recursive deadlocks * Finish docs update for concurrency * Some fixes from badge conflict resolution * WIP priority queues * Implement MarQS priority queues * Fix the migrations
1 parent c9f7ea8 commit 7b1159e

File tree

49 files changed

+4257
-1473
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+4257
-1473
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
import { useIsImpersonating } from "~/hooks/useOrganizations";
2+
import { useHasAdminAccess } from "~/hooks/useUser";
3+
import { Button } from "../primitives/Buttons";
4+
import { Dialog, DialogContent, DialogHeader, DialogTrigger } from "../primitives/Dialog";
5+
import { Cog6ToothIcon } from "@heroicons/react/20/solid";
6+
import { type loader } from "~/routes/resources.taskruns.$runParam.debug";
7+
import { UseDataFunctionReturn, useTypedFetcher } from "remix-typedjson";
8+
import { useEffect } from "react";
9+
import { Spinner } from "../primitives/Spinner";
10+
import * as Property from "~/components/primitives/PropertyTable";
11+
import { ClipboardField } from "../primitives/ClipboardField";
12+
import { MarQSShortKeyProducer } from "~/v3/marqs/marqsKeyProducer";
13+
14+
export function AdminDebugRun({ friendlyId }: { friendlyId: string }) {
15+
const hasAdminAccess = useHasAdminAccess();
16+
const isImpersonating = useIsImpersonating();
17+
18+
if (!hasAdminAccess && !isImpersonating) {
19+
return null;
20+
}
21+
22+
return (
23+
<Dialog key={`debug-${friendlyId}`}>
24+
<DialogTrigger asChild>
25+
<Button variant="tertiary/small" LeadingIcon={Cog6ToothIcon}>
26+
Debug run
27+
</Button>
28+
</DialogTrigger>
29+
<DebugRunDialog friendlyId={friendlyId} />
30+
</Dialog>
31+
);
32+
}
33+
34+
export function DebugRunDialog({ friendlyId }: { friendlyId: string }) {
35+
return (
36+
<DialogContent
37+
key={`debug`}
38+
className="overflow-y-auto sm:h-[80vh] sm:max-h-[80vh] sm:max-w-[50vw]"
39+
>
40+
<DebugRunContent friendlyId={friendlyId} />
41+
</DialogContent>
42+
);
43+
}
44+
45+
function DebugRunContent({ friendlyId }: { friendlyId: string }) {
46+
const fetcher = useTypedFetcher<typeof loader>();
47+
const isLoading = fetcher.state === "loading";
48+
49+
useEffect(() => {
50+
fetcher.load(`/resources/taskruns/${friendlyId}/debug`);
51+
}, [friendlyId]);
52+
53+
return (
54+
<>
55+
<DialogHeader>Debugging run</DialogHeader>
56+
{isLoading ? (
57+
<div className="grid place-items-center p-6">
58+
<Spinner />
59+
</div>
60+
) : fetcher.data ? (
61+
<DebugRunData {...fetcher.data} />
62+
) : (
63+
<>Failed to get run debug data</>
64+
)}
65+
</>
66+
);
67+
}
68+
69+
function DebugRunData({
70+
run,
71+
queueConcurrencyLimit,
72+
queueCurrentConcurrency,
73+
envConcurrencyLimit,
74+
envCurrentConcurrency,
75+
queueReserveConcurrency,
76+
envReserveConcurrency,
77+
}: UseDataFunctionReturn<typeof loader>) {
78+
const keys = new MarQSShortKeyProducer("marqs:");
79+
80+
const withPrefix = (key: string) => `marqs:${key}`;
81+
82+
return (
83+
<Property.Table>
84+
<Property.Item>
85+
<Property.Label>ID</Property.Label>
86+
<Property.Value className="flex items-center gap-2">
87+
<ClipboardField value={run.id} variant="tertiary/small" iconButton />
88+
</Property.Value>
89+
</Property.Item>
90+
<Property.Item>
91+
<Property.Label>Message key</Property.Label>
92+
<Property.Value className="flex items-center gap-2">
93+
<ClipboardField
94+
value={withPrefix(keys.messageKey(run.id))}
95+
variant="tertiary/small"
96+
iconButton
97+
/>
98+
</Property.Value>
99+
</Property.Item>
100+
<Property.Item>
101+
<Property.Label>GET message</Property.Label>
102+
<Property.Value className="flex items-center gap-2">
103+
<ClipboardField
104+
value={`GET ${withPrefix(keys.messageKey(run.id))}`}
105+
variant="tertiary/small"
106+
iconButton
107+
/>
108+
</Property.Value>
109+
</Property.Item>
110+
<Property.Item>
111+
<Property.Label>Queue key</Property.Label>
112+
<Property.Value className="flex items-center gap-2">
113+
<ClipboardField
114+
value={withPrefix(
115+
keys.queueKey(run.runtimeEnvironment, run.queue, run.concurrencyKey ?? undefined)
116+
)}
117+
variant="tertiary/small"
118+
iconButton
119+
/>
120+
</Property.Value>
121+
</Property.Item>
122+
<Property.Item>
123+
<Property.Label>Get queue set</Property.Label>
124+
<Property.Value className="flex items-center gap-2">
125+
<ClipboardField
126+
value={`ZRANGE ${withPrefix(
127+
keys.queueKey(run.runtimeEnvironment, run.queue, run.concurrencyKey ?? undefined)
128+
)} 0 -1`}
129+
variant="tertiary/small"
130+
iconButton
131+
/>
132+
</Property.Value>
133+
</Property.Item>
134+
<Property.Item>
135+
<Property.Label>Queue current concurrency key</Property.Label>
136+
<Property.Value className="flex items-center gap-2">
137+
<ClipboardField
138+
value={withPrefix(
139+
keys.queueCurrentConcurrencyKey(
140+
run.runtimeEnvironment,
141+
run.queue,
142+
run.concurrencyKey ?? undefined
143+
)
144+
)}
145+
variant="tertiary/small"
146+
iconButton
147+
/>
148+
</Property.Value>
149+
</Property.Item>
150+
151+
<Property.Item>
152+
<Property.Label>Get queue current concurrency</Property.Label>
153+
<Property.Value className="flex items-center gap-2">
154+
<ClipboardField
155+
value={`SMEMBERS ${withPrefix(
156+
keys.queueCurrentConcurrencyKey(
157+
run.runtimeEnvironment,
158+
run.queue,
159+
run.concurrencyKey ?? undefined
160+
)
161+
)}`}
162+
variant="tertiary/small"
163+
iconButton
164+
/>
165+
</Property.Value>
166+
</Property.Item>
167+
<Property.Item>
168+
<Property.Label>Queue current concurrency</Property.Label>
169+
<Property.Value className="flex items-center gap-2">
170+
<span>{queueCurrentConcurrency ?? "0"}</span>
171+
</Property.Value>
172+
</Property.Item>
173+
<Property.Item>
174+
<Property.Label>Queue reserve concurrency key</Property.Label>
175+
<Property.Value className="flex items-center gap-2">
176+
<ClipboardField
177+
value={withPrefix(
178+
keys.queueReserveConcurrencyKeyFromQueue(
179+
keys.queueKey(run.runtimeEnvironment, run.queue, run.concurrencyKey ?? undefined)
180+
)
181+
)}
182+
variant="tertiary/small"
183+
iconButton
184+
/>
185+
</Property.Value>
186+
</Property.Item>
187+
188+
<Property.Item>
189+
<Property.Label>Get queue reserve concurrency</Property.Label>
190+
<Property.Value className="flex items-center gap-2">
191+
<ClipboardField
192+
value={`SMEMBERS ${withPrefix(
193+
keys.queueReserveConcurrencyKeyFromQueue(
194+
keys.queueKey(run.runtimeEnvironment, run.queue, run.concurrencyKey ?? undefined)
195+
)
196+
)}`}
197+
variant="tertiary/small"
198+
iconButton
199+
/>
200+
</Property.Value>
201+
</Property.Item>
202+
<Property.Item>
203+
<Property.Label>Queue reserve concurrency</Property.Label>
204+
<Property.Value className="flex items-center gap-2">
205+
<span>{queueReserveConcurrency ?? "0"}</span>
206+
</Property.Value>
207+
</Property.Item>
208+
<Property.Item>
209+
<Property.Label>Queue concurrency limit key</Property.Label>
210+
<Property.Value className="flex items-center gap-2">
211+
<ClipboardField
212+
value={withPrefix(keys.queueConcurrencyLimitKey(run.runtimeEnvironment, run.queue))}
213+
variant="tertiary/small"
214+
iconButton
215+
/>
216+
</Property.Value>
217+
</Property.Item>
218+
<Property.Item>
219+
<Property.Label>GET queue concurrency limit</Property.Label>
220+
<Property.Value className="flex items-center gap-2">
221+
<ClipboardField
222+
value={`GET ${withPrefix(
223+
keys.queueConcurrencyLimitKey(run.runtimeEnvironment, run.queue)
224+
)}`}
225+
variant="tertiary/small"
226+
iconButton
227+
/>
228+
</Property.Value>
229+
</Property.Item>
230+
<Property.Item>
231+
<Property.Label>Queue concurrency limit</Property.Label>
232+
<Property.Value className="flex items-center gap-2">
233+
<span>{queueConcurrencyLimit ?? "Not set"}</span>
234+
</Property.Value>
235+
</Property.Item>
236+
<Property.Item>
237+
<Property.Label>Env current concurrency key</Property.Label>
238+
<Property.Value className="flex items-center gap-2">
239+
<ClipboardField
240+
value={withPrefix(keys.envCurrentConcurrencyKey(run.runtimeEnvironment))}
241+
variant="tertiary/small"
242+
iconButton
243+
/>
244+
</Property.Value>
245+
</Property.Item>
246+
<Property.Item>
247+
<Property.Label>Get env current concurrency</Property.Label>
248+
<Property.Value className="flex items-center gap-2">
249+
<ClipboardField
250+
value={`SMEMBERS ${withPrefix(keys.envCurrentConcurrencyKey(run.runtimeEnvironment))}`}
251+
variant="tertiary/small"
252+
iconButton
253+
/>
254+
</Property.Value>
255+
</Property.Item>
256+
<Property.Item>
257+
<Property.Label>Env current concurrency</Property.Label>
258+
<Property.Value className="flex items-center gap-2">
259+
<span>{envCurrentConcurrency ?? "0"}</span>
260+
</Property.Value>
261+
</Property.Item>
262+
<Property.Item>
263+
<Property.Label>Env reserve concurrency key</Property.Label>
264+
<Property.Value className="flex items-center gap-2">
265+
<ClipboardField
266+
value={withPrefix(keys.envReserveConcurrencyKey(run.runtimeEnvironment.id))}
267+
variant="tertiary/small"
268+
iconButton
269+
/>
270+
</Property.Value>
271+
</Property.Item>
272+
<Property.Item>
273+
<Property.Label>Get env reserve concurrency</Property.Label>
274+
<Property.Value className="flex items-center gap-2">
275+
<ClipboardField
276+
value={`SMEMBERS ${withPrefix(
277+
keys.envReserveConcurrencyKey(run.runtimeEnvironment.id)
278+
)}`}
279+
variant="tertiary/small"
280+
iconButton
281+
/>
282+
</Property.Value>
283+
</Property.Item>
284+
<Property.Item>
285+
<Property.Label>Env reserve concurrency</Property.Label>
286+
<Property.Value className="flex items-center gap-2">
287+
<span>{envReserveConcurrency ?? "0"}</span>
288+
</Property.Value>
289+
</Property.Item>
290+
<Property.Item>
291+
<Property.Label>Env concurrency limit key</Property.Label>
292+
<Property.Value className="flex items-center gap-2">
293+
<ClipboardField
294+
value={withPrefix(keys.envConcurrencyLimitKey(run.runtimeEnvironment))}
295+
variant="tertiary/small"
296+
iconButton
297+
/>
298+
</Property.Value>
299+
</Property.Item>
300+
<Property.Item>
301+
<Property.Label>GET env concurrency limit</Property.Label>
302+
<Property.Value className="flex items-center gap-2">
303+
<ClipboardField
304+
value={`GET ${withPrefix(keys.envConcurrencyLimitKey(run.runtimeEnvironment))}`}
305+
variant="tertiary/small"
306+
iconButton
307+
/>
308+
</Property.Value>
309+
</Property.Item>
310+
<Property.Item>
311+
<Property.Label>Env concurrency limit</Property.Label>
312+
<Property.Value className="flex items-center gap-2">
313+
<span>{envConcurrencyLimit ?? "Not set"}</span>
314+
</Property.Value>
315+
</Property.Item>
316+
<Property.Item>
317+
<Property.Label>Shared queue key</Property.Label>
318+
<Property.Value className="flex items-center gap-2">
319+
<ClipboardField
320+
value={`GET ${withPrefix(keys.envSharedQueueKey(run.runtimeEnvironment))}`}
321+
variant="tertiary/small"
322+
iconButton
323+
/>
324+
</Property.Value>
325+
</Property.Item>
326+
<Property.Item>
327+
<Property.Label>Get shared queue set</Property.Label>
328+
<Property.Value className="flex items-center gap-2">
329+
<ClipboardField
330+
value={`ZRANGEBYSCORE ${withPrefix(
331+
keys.envSharedQueueKey(run.runtimeEnvironment)
332+
)} -inf ${Date.now()} WITHSCORES`}
333+
variant="tertiary/small"
334+
iconButton
335+
/>
336+
</Property.Value>
337+
</Property.Item>
338+
</Property.Table>
339+
);
340+
}

apps/webapp/app/components/runs/v3/BatchStatus.tsx

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
import { CheckCircleIcon } from "@heroicons/react/20/solid";
1+
import { CheckCircleIcon, XCircleIcon } from "@heroicons/react/20/solid";
22
import { BatchTaskRunStatus } from "@trigger.dev/database";
33
import assertNever from "assert-never";
44
import { Spinner } from "~/components/primitives/Spinner";
55
import { cn } from "~/utils/cn";
66

7-
export const allBatchStatuses = ["PENDING", "COMPLETED"] as const satisfies Readonly<
7+
export const allBatchStatuses = ["PENDING", "COMPLETED", "ABORTED"] as const satisfies Readonly<
88
Array<BatchTaskRunStatus>
99
>;
1010

1111
const descriptions: Record<BatchTaskRunStatus, string> = {
1212
PENDING: "The batch has child runs that have not yet completed.",
1313
COMPLETED: "All the batch child runs have finished.",
14+
ABORTED: "The batch was aborted because some child tasks could not be triggered.",
1415
};
1516

1617
export function descriptionForBatchStatus(status: BatchTaskRunStatus): string {
@@ -50,6 +51,8 @@ export function BatchStatusIcon({
5051
return <Spinner className={cn(batchStatusColor(status), className)} />;
5152
case "COMPLETED":
5253
return <CheckCircleIcon className={cn(batchStatusColor(status), className)} />;
54+
case "ABORTED":
55+
return <XCircleIcon className={cn(batchStatusColor(status), className)} />;
5356
default: {
5457
assertNever(status);
5558
}
@@ -62,6 +65,8 @@ export function batchStatusColor(status: BatchTaskRunStatus): string {
6265
return "text-pending";
6366
case "COMPLETED":
6467
return "text-success";
68+
case "ABORTED":
69+
return "text-error";
6570
default: {
6671
assertNever(status);
6772
}
@@ -74,6 +79,8 @@ export function batchStatusTitle(status: BatchTaskRunStatus): string {
7479
return "In progress";
7580
case "COMPLETED":
7681
return "Completed";
82+
case "ABORTED":
83+
return "Aborted";
7784
default: {
7885
assertNever(status);
7986
}

apps/webapp/app/env.server.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ const EnvironmentSchema = z.object({
357357
MARQS_AVAILABLE_CAPACITY_BIAS: z.coerce.number().default(0.3),
358358
MARQS_QUEUE_AGE_RANDOMIZATION_BIAS: z.coerce.number().default(0.25),
359359
MARQS_REUSE_SNAPSHOT_COUNT: z.coerce.number().int().default(0),
360-
MARQS_MAXIMUM_ORG_COUNT: z.coerce.number().int().optional(),
360+
MARQS_MAXIMUM_ENV_COUNT: z.coerce.number().int().optional(),
361361

362362
PROD_TASK_HEARTBEAT_INTERVAL_MS: z.coerce.number().int().optional(),
363363

0 commit comments

Comments
 (0)