Skip to content

Commit bf4c6e9

Browse files
authored
feat(webapp): trace deployment outcomes and compute template creation (#4015)
Deploy success/failure wasn't easily observable: compute template creation only logged, and the terminal deployment statuses (deployed / failed / timed-out) weren't traced — so deploy health couldn't be seen without querying the database. This adds two spans: - `compute.template.create` around template creation at finalize, tagging the resolved mode and per-preset outcome. `resolveMode` now returns its decision (mode + reason) so the span can record why a mode was chosen. - `deployment.outcome` via a small shared helper (`recordDeploymentOutcome`) emitted at every terminal-status write — finalize (deployed), fail / index-failed / background-worker (failed), and timeout (timed out) — so deploy success/failure is queryable by status and reason. The helper is best-effort (org/project/env enrichment where cheaply available) and never throws, so telemetry can't break a deploy.
1 parent bb92935 commit bf4c6e9

7 files changed

Lines changed: 230 additions & 74 deletions

apps/webapp/app/v3/services/computeTemplateCreation.server.ts

Lines changed: 115 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,26 @@ import { isOrgMigrated } from "~/runEngine/concerns/computeMigration.server";
1313
import { backingForQueue, workerRegionRegistry } from "~/v3/workerRegions.server";
1414
import { globalFlagsRegistry } from "~/v3/globalFlagsRegistry.server";
1515
import { getEntitlement } from "~/services/platform.v3.server";
16+
import { startActiveSpan, attributesFromAuthenticatedEnv } from "~/v3/tracer.server";
1617

1718
type TemplateCreationMode = "required" | "shadow" | "skip";
1819

20+
// Why the mode was chosen — slices the compute.template.create span by path.
21+
type TemplateModeReason =
22+
| "no-client"
23+
| "no-project"
24+
| "microvm-native"
25+
| "migrated"
26+
| "compute-access"
27+
| "rollout"
28+
| "none";
29+
30+
type ResolvedTemplateMode = {
31+
mode: TemplateCreationMode;
32+
migrated: boolean;
33+
reason: TemplateModeReason;
34+
};
35+
1936
type ResolvedPreset = {
2037
name: MachinePresetName;
2138
cpu: number;
@@ -60,89 +77,116 @@ export class ComputeTemplateCreationService {
6077
prisma: PrismaClientOrTransaction;
6178
writer?: WritableStreamDefaultWriter;
6279
}): Promise<void> {
63-
const mode = await this.resolveMode(options.projectId, options.prisma);
80+
return startActiveSpan("compute.template.create", async (span) => {
81+
const { mode, migrated, reason } = await this.resolveMode(
82+
options.projectId,
83+
options.prisma
84+
);
6485

65-
if (mode === "skip") {
66-
return;
67-
}
86+
span.setAttributes({
87+
...attributesFromAuthenticatedEnv(options.authenticatedEnv),
88+
"compute.template.mode": mode,
89+
"compute.template.migrated": migrated,
90+
"compute.template.reason": reason,
91+
"compute.template.deployment_id": options.deploymentFriendlyId,
92+
"compute.template.presets_total": this.presets.length,
93+
"compute.template.presets_required": this.requiredPresets.size,
94+
});
6895

69-
if (mode === "shadow") {
70-
this.createTemplate(options.imageReference, { background: true })
71-
.then((outcome) => {
72-
if (outcome.error) {
73-
logger.error("Shadow template creation failed", {
96+
if (mode === "skip") {
97+
span.setAttribute("compute.template.result", "skipped");
98+
return;
99+
}
100+
101+
if (mode === "shadow") {
102+
// Shadow is fire-and-forget (background build), so the span only records
103+
// that it was dispatched — the build outcome lands server-side later.
104+
span.setAttribute("compute.template.result", "shadow_dispatched");
105+
this.createTemplate(options.imageReference, { background: true })
106+
.then((outcome) => {
107+
if (outcome.error) {
108+
logger.error("Shadow template creation failed", {
109+
id: options.deploymentFriendlyId,
110+
imageReference: options.imageReference,
111+
error: outcome.error,
112+
});
113+
}
114+
})
115+
.catch((error) => {
116+
logger.error("Shadow template creation threw unexpectedly", {
74117
id: options.deploymentFriendlyId,
75118
imageReference: options.imageReference,
76-
error: outcome.error,
119+
error: error instanceof Error ? error.message : String(error),
77120
});
78-
}
79-
})
80-
.catch((error) => {
81-
logger.error("Shadow template creation threw unexpectedly", {
82-
id: options.deploymentFriendlyId,
83-
imageReference: options.imageReference,
84-
error: error instanceof Error ? error.message : String(error),
85121
});
86-
});
87-
return;
88-
}
89-
90-
// Required mode
91-
if (options.writer) {
92-
try {
93-
await options.writer.write(
94-
`event: log\ndata: ${JSON.stringify({ message: "Building compute template..." })}\n\n`
95-
);
96-
} catch {
97-
// Stream may be closed if client disconnected - continue with template creation
122+
return;
98123
}
99-
}
100-
101-
logger.info("Creating compute template (required mode)", {
102-
id: options.deploymentFriendlyId,
103-
imageReference: options.imageReference,
104-
presets: this.presets.map((p) => p.name),
105-
requiredPresets: [...this.requiredPresets],
106-
});
107124

108-
const outcome = await this.createTemplate(options.imageReference);
109-
const failureMessage = this.failureMessageForRequiredMode(
110-
outcome,
111-
options.deploymentFriendlyId,
112-
options.imageReference
113-
);
125+
// Required mode
126+
if (options.writer) {
127+
try {
128+
await options.writer.write(
129+
`event: log\ndata: ${JSON.stringify({ message: "Building compute template..." })}\n\n`
130+
);
131+
} catch {
132+
// Stream may be closed if client disconnected - continue with template creation
133+
}
134+
}
114135

115-
if (failureMessage) {
116-
logger.error("Compute template creation failed", {
136+
logger.info("Creating compute template (required mode)", {
117137
id: options.deploymentFriendlyId,
118138
imageReference: options.imageReference,
119-
error: failureMessage,
139+
presets: this.presets.map((p) => p.name),
140+
requiredPresets: [...this.requiredPresets],
120141
});
121142

122-
const failService = new FailDeploymentService();
123-
await failService.call(options.authenticatedEnv, options.deploymentFriendlyId, {
124-
error: {
125-
name: "TemplateCreationFailed",
126-
message: `Failed to create compute template: ${failureMessage}`,
127-
},
128-
});
143+
const outcome = await this.createTemplate(options.imageReference);
144+
span.setAttribute("compute.template.presets_built", outcome.results.length);
129145

130-
throw new ServiceValidationError(`Compute template creation failed: ${failureMessage}`);
131-
}
146+
const failureMessage = this.failureMessageForRequiredMode(
147+
outcome,
148+
options.deploymentFriendlyId,
149+
options.imageReference
150+
);
151+
152+
if (failureMessage) {
153+
span.setAttributes({
154+
"compute.template.result": "failed",
155+
"compute.template.failure": failureMessage,
156+
});
132157

133-
logger.info("Compute template created", {
134-
id: options.deploymentFriendlyId,
135-
imageReference: options.imageReference,
136-
results: outcome.results.length,
158+
logger.error("Compute template creation failed", {
159+
id: options.deploymentFriendlyId,
160+
imageReference: options.imageReference,
161+
error: failureMessage,
162+
});
163+
164+
const failService = new FailDeploymentService();
165+
await failService.call(options.authenticatedEnv, options.deploymentFriendlyId, {
166+
error: {
167+
name: "TemplateCreationFailed",
168+
message: `Failed to create compute template: ${failureMessage}`,
169+
},
170+
});
171+
172+
throw new ServiceValidationError(`Compute template creation failed: ${failureMessage}`);
173+
}
174+
175+
span.setAttribute("compute.template.result", "created");
176+
logger.info("Compute template created", {
177+
id: options.deploymentFriendlyId,
178+
imageReference: options.imageReference,
179+
results: outcome.results.length,
180+
});
137181
});
138182
}
139183

140184
async resolveMode(
141185
projectId: string,
142186
prisma: PrismaClientOrTransaction
143-
): Promise<TemplateCreationMode> {
187+
): Promise<ResolvedTemplateMode> {
144188
if (!this.client) {
145-
return "skip";
189+
return { mode: "skip", migrated: false, reason: "no-client" };
146190
}
147191

148192
const project = await prisma.project.findFirst({
@@ -158,11 +202,11 @@ export class ComputeTemplateCreationService {
158202
});
159203

160204
if (!project) {
161-
return "skip";
205+
return { mode: "skip", migrated: false, reason: "no-project" };
162206
}
163207

164208
if (project.defaultWorkerGroup?.workloadType === "MICROVM") {
165-
return "required";
209+
return { mode: "required", migrated: false, reason: "microvm-native" };
166210
}
167211

168212
// Migrated orgs route runs to the compute backing even though their stored
@@ -194,22 +238,26 @@ export class ComputeTemplateCreationService {
194238
}
195239
if (migrated) {
196240
// required => template built at deploy (deploy fails on error); off => shadow.
197-
return decision.flags?.computeMigrationRequireTemplate ? "required" : "shadow";
241+
return {
242+
mode: decision.flags?.computeMigrationRequireTemplate ? "required" : "shadow",
243+
migrated: true,
244+
reason: "migrated",
245+
};
198246
}
199247
}
200248

201249
const hasComputeAccess = await resolveComputeAccess(prisma, project.organization.featureFlags);
202250

203251
if (hasComputeAccess) {
204-
return "shadow";
252+
return { mode: "shadow", migrated: false, reason: "compute-access" };
205253
}
206254

207255
const rolloutPct = Number(env.COMPUTE_TEMPLATE_SHADOW_ROLLOUT_PCT ?? "0");
208256
if (rolloutPct > 0 && Math.random() * 100 < rolloutPct) {
209-
return "shadow";
257+
return { mode: "shadow", migrated: false, reason: "rollout" };
210258
}
211259

212-
return "skip";
260+
return { mode: "skip", migrated: false, reason: "none" };
213261
}
214262

215263
async createTemplate(

apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
} from "./createBackgroundWorker.server";
1616
import { findOrCreateBackgroundWorker } from "./createDeploymentBackgroundWorkerV4/findOrCreateBackgroundWorker.server";
1717
import { TimeoutDeploymentService } from "./timeoutDeployment.server";
18+
import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server";
1819
import { env } from "~/env.server";
1920

2021
export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
@@ -111,7 +112,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
111112
if (findOrCreateError instanceof ServiceValidationError) {
112113
// `#failBackgroundWorkerDeployment` already throws its argument; the
113114
// outer `throw` covers the non-SVE branch.
114-
await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError);
115+
await this.#failBackgroundWorkerDeployment(deployment, findOrCreateError, environment);
115116
}
116117
throw findOrCreateError;
117118
}
@@ -144,7 +145,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
144145

145146
const serviceError = new ServiceValidationError("Error creating background worker files");
146147

147-
await this.#failBackgroundWorkerDeployment(deployment, serviceError);
148+
await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment);
148149

149150
throw serviceError;
150151
}
@@ -167,7 +168,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
167168
error: resourcesError.message,
168169
});
169170

170-
await this.#failBackgroundWorkerDeployment(deployment, resourcesError);
171+
await this.#failBackgroundWorkerDeployment(deployment, resourcesError, environment);
171172
throw resourcesError;
172173
}
173174

@@ -179,7 +180,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
179180
"Error creating background worker resources"
180181
);
181182

182-
await this.#failBackgroundWorkerDeployment(deployment, serviceError);
183+
await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment);
183184

184185
throw serviceError;
185186
}
@@ -206,7 +207,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
206207
error: schedulesError.message,
207208
});
208209

209-
await this.#failBackgroundWorkerDeployment(deployment, schedulesError);
210+
await this.#failBackgroundWorkerDeployment(deployment, schedulesError, environment);
210211
throw schedulesError;
211212
}
212213

@@ -220,7 +221,7 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
220221

221222
const serviceError = new ServiceValidationError("Error syncing declarative schedules");
222223

223-
await this.#failBackgroundWorkerDeployment(deployment, serviceError);
224+
await this.#failBackgroundWorkerDeployment(deployment, serviceError, environment);
224225

225226
throw serviceError;
226227
}
@@ -264,7 +265,11 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
264265
});
265266
}
266267

267-
async #failBackgroundWorkerDeployment(deployment: WorkerDeployment, error: Error) {
268+
async #failBackgroundWorkerDeployment(
269+
deployment: WorkerDeployment,
270+
error: Error,
271+
environment: AuthenticatedEnvironment
272+
) {
268273
// Guarded BUILDING → FAILED transition, symmetric with the BUILDING → DEPLOYING
269274
// transition in `call()`. With idempotent retries, two attempts can run side-by-side;
270275
// without the predicate, one attempt's failure could downgrade the deployment after
@@ -297,6 +302,16 @@ export class CreateDeploymentBackgroundWorkerServiceV4 extends BaseService {
297302
// sibling attempt may have just enqueued it as part of a successful
298303
// BUILDING → DEPLOYING transition.
299304
await TimeoutDeploymentService.dequeue(deployment.id, this._prisma);
305+
306+
recordDeploymentOutcome({
307+
status: "FAILED",
308+
deploymentFriendlyId: deployment.friendlyId,
309+
organizationId: environment.organizationId,
310+
projectId: environment.projectId,
311+
environmentId: environment.id,
312+
environmentType: environment.type,
313+
reason: error.message,
314+
});
300315
}
301316

302317
throw error;

apps/webapp/app/v3/services/deploymentIndexFailed.server.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { BaseService } from "./baseService.server";
33
import { logger } from "~/services/logger.server";
44
import { type WorkerDeploymentStatus } from "@trigger.dev/database";
55
import { DeploymentService } from "./deployment.server";
6+
import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server";
67

78
const FINAL_DEPLOYMENT_STATUSES: WorkerDeploymentStatus[] = [
89
"CANCELED",
@@ -74,6 +75,16 @@ export class DeploymentIndexFailed extends BaseService {
7475
},
7576
});
7677

78+
recordDeploymentOutcome({
79+
status: "FAILED",
80+
deploymentFriendlyId: deployment.friendlyId,
81+
organizationId: deployment.environment.project.organizationId,
82+
projectId: deployment.environment.projectId,
83+
environmentId: deployment.environmentId,
84+
environmentType: deployment.environment.type,
85+
reason: error.message,
86+
});
87+
7788
const deploymentService = new DeploymentService();
7889
await deploymentService
7990
.appendToEventLog(deployment.environment.project, failedDeployment, [

apps/webapp/app/v3/services/failDeployment.server.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { type WorkerDeploymentStatus } from "@trigger.dev/database";
55
import { type FailDeploymentRequestBody } from "@trigger.dev/core/v3/schemas";
66
import { type AuthenticatedEnvironment } from "~/services/apiAuth.server";
77
import { DeploymentService } from "./deployment.server";
8+
import { recordDeploymentOutcome } from "./recordDeploymentOutcome.server";
89

910
export const FINAL_DEPLOYMENT_STATUSES: WorkerDeploymentStatus[] = [
1011
"CANCELED",
@@ -51,6 +52,16 @@ export class FailDeploymentService extends BaseService {
5152
},
5253
});
5354

55+
recordDeploymentOutcome({
56+
status: "FAILED",
57+
deploymentFriendlyId: friendlyId,
58+
organizationId: authenticatedEnv.organizationId,
59+
projectId: authenticatedEnv.projectId,
60+
environmentId: authenticatedEnv.id,
61+
environmentType: authenticatedEnv.type,
62+
reason: params.error.message,
63+
});
64+
5465
const deploymentService = new DeploymentService();
5566
await deploymentService
5667
.appendToEventLog(authenticatedEnv.project, failedDeployment, [

0 commit comments

Comments
 (0)