From 18ee97c55bfe70af21428df61cf33c30058d3041 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 11:19:51 +0000 Subject: [PATCH 1/8] [world-vercel] Switch event endpoints to v4 wire format (#2055) Signed-off-by: Peter Wielander --- .changeset/v4-events-client.md | 5 + packages/world-vercel/src/events-v4.test.ts | 210 +++++ packages/world-vercel/src/events-v4.ts | 524 ++++++++++++ packages/world-vercel/src/events.test.ts | 346 ++++++++ packages/world-vercel/src/events.ts | 849 +++++++++++--------- packages/world-vercel/src/frames.test.ts | 217 +++++ packages/world-vercel/src/frames.ts | 130 +++ packages/world-vercel/src/refs.test.ts | 333 -------- packages/world-vercel/src/refs.ts | 352 -------- 9 files changed, 1912 insertions(+), 1054 deletions(-) create mode 100644 .changeset/v4-events-client.md create mode 100644 packages/world-vercel/src/events-v4.test.ts create mode 100644 packages/world-vercel/src/events-v4.ts create mode 100644 packages/world-vercel/src/events.test.ts create mode 100644 packages/world-vercel/src/frames.test.ts create mode 100644 packages/world-vercel/src/frames.ts delete mode 100644 packages/world-vercel/src/refs.test.ts delete mode 100644 packages/world-vercel/src/refs.ts diff --git a/.changeset/v4-events-client.md b/.changeset/v4-events-client.md new file mode 100644 index 0000000000..99cfda1950 --- /dev/null +++ b/.changeset/v4-events-client.md @@ -0,0 +1,5 @@ +--- +"@workflow/world-vercel": minor +--- + +New internal API format: separately encode event metadata from user payloads. Eliminates the need for calling separate endpoints for ref resolution, which improves performance especially on longer runs. diff --git a/packages/world-vercel/src/events-v4.test.ts b/packages/world-vercel/src/events-v4.test.ts new file mode 100644 index 0000000000..ccdc803cb4 --- /dev/null +++ b/packages/world-vercel/src/events-v4.test.ts @@ -0,0 +1,210 @@ +import { + EntityConflictError, + RunExpiredError, + ThrottleError, + TooEarlyError, + WorkflowWorldError, +} from '@workflow/errors'; +import { encode } from 'cbor-x'; +import { MockAgent } from 'undici'; +import { describe, expect, it } from 'vitest'; +import { + createWorkflowRunEventV4, + getWorkflowRunEventsV4, + throwForErrorResponse, +} from './events-v4.js'; +import { encodeFrame, V4_FRAME_CONTENT_TYPE } from './frames.js'; + +/** + * The v4 client must preserve the typed-error contract of the v3 + * `makeRequest` path — the workflow runtime branches on these types + * (`RunExpiredError.is`, `TooEarlyError.is`, the 404 → HookNotFoundError + * translation in events.ts) for core retry/terminal-state control flow. + */ +describe('throwForErrorResponse', () => { + const call = ( + status: number, + body = '{"message":"boom"}', + headers: Record = {} + ) => throwForErrorResponse(status, headers, body, 'createEvent', 'http://x'); + + it('maps 409 to EntityConflictError', () => { + expect(() => call(409)).toThrowError(EntityConflictError); + }); + + it('maps 410 to RunExpiredError (terminal run — runtime must not retry)', () => { + expect(() => call(410)).toThrowError(RunExpiredError); + }); + + it('maps 425 to TooEarlyError with retryAfter from the header', () => { + try { + call(425, '{"message":"too early"}', { 'retry-after': '7' }); + expect.unreachable(); + } catch (err) { + expect(TooEarlyError.is(err)).toBe(true); + expect((err as TooEarlyError).retryAfter).toBe(7); + } + }); + + it('maps 429 to ThrottleError with retryAfter from the header', () => { + try { + call(429, '{"message":"slow down"}', { 'retry-after': '30' }); + expect.unreachable(); + } catch (err) { + expect(ThrottleError.is(err)).toBe(true); + expect((err as ThrottleError).retryAfter).toBe(30); + } + }); + + it('maps 404 to WorkflowWorldError with status (hook → HookNotFoundError translation keys off this)', () => { + try { + call(404, '{"message":"hook not found","code":"not_found"}'); + expect.unreachable(); + } catch (err) { + expect(WorkflowWorldError.is(err)).toBe(true); + expect((err as WorkflowWorldError).status).toBe(404); + expect((err as WorkflowWorldError).code).toBe('not_found'); + expect((err as WorkflowWorldError).message).toBe('hook not found'); + } + }); + + it('maps 5xx to WorkflowWorldError with status (runtime treats as retryable)', () => { + try { + call(503); + expect.unreachable(); + } catch (err) { + expect(WorkflowWorldError.is(err)).toBe(true); + expect((err as WorkflowWorldError).status).toBe(503); + } + }); + + it('keeps a useful message when the body is not JSON', () => { + expect(() => call(500, 'plain text oops')).toThrowError( + /createEvent failed: HTTP 500 plain text oops/ + ); + }); +}); + +/** + * Full HTTP round-trip through getWorkflowRunEventsV4 — exercises the + * undici response-body → decodeFrames path that previously crashed in + * Next.js webpack bundles (node:stream Readable.toWeb), and verifies + * `config.dispatcher` is honored (it was silently ignored before). + */ +describe('getWorkflowRunEventsV4 over HTTP', () => { + it('parses a frame stream fetched via a custom dispatcher', async () => { + const origin = 'https://vercel-workflow.com'; + const agent = new MockAgent(); + agent.disableNetConnect(); + + const body = new TextEncoder().encode('payload-bytes'); + const frames = Buffer.concat([ + encodeFrame( + { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'run_created', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: {}, + }, + body + ), + encodeFrame({ _end: 1, next: 'cursor-2' }, new Uint8Array(0)), + ]); + + agent + .get(origin) + .intercept({ path: '/api/v4/runs/wrun_1/events', method: 'GET' }) + .reply(200, frames, { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + const result = await getWorkflowRunEventsV4( + 'wrun_1', + {}, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.events).toHaveLength(1); + expect(result.events[0].event.eventId).toBe('evnt_1'); + expect(new Uint8Array(result.events[0].body)).toEqual(body); + expect(result.next).toBe('cursor-2'); + agent.assertNoPendingInterceptors(); + }); + + it('throws when the stream ends without the end sentinel (truncated response)', async () => { + const origin = 'https://vercel-workflow.com'; + const agent = new MockAgent(); + agent.disableNetConnect(); + + // A complete event frame but NO `{_end: 1}` sentinel — what a response + // truncated on a frame boundary looks like. Returning this as a + // successful page would silently drop events with hasMore=false. + const frames = encodeFrame( + { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'run_created', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: {}, + }, + new Uint8Array(0) + ); + + agent + .get(origin) + .intercept({ path: '/api/v4/runs/wrun_1/events', method: 'GET' }) + .reply(200, frames, { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + await expect( + getWorkflowRunEventsV4( + 'wrun_1', + {}, + { token: 'test-token', dispatcher: agent } + ) + ).rejects.toThrow(/end-of-stream sentinel/); + }); +}); + +describe('createWorkflowRunEventV4 over HTTP', () => { + it('POSTs to the /events/:eventType alias and decodes the response', async () => { + const origin = 'https://vercel-workflow.com'; + const agent = new MockAgent(); + agent.disableNetConnect(); + + agent + .get(origin) + .intercept({ + // The event type rides in the URL purely as an observability hint + // (access logs / traces); the frame meta stays authoritative. + path: '/api/v4/runs/wrun_1/events/step_completed', + method: 'POST', + }) + .reply(200, encode({ step: { stepId: 'step_1', status: 'completed' } }), { + headers: { + 'x-wf-event-id': 'evnt_1', + 'x-wf-run-id': 'wrun_1', + 'x-wf-created-at': '2026-06-10T00:00:00.000Z', + }, + }); + + const result = await createWorkflowRunEventV4( + { + runId: 'wrun_1', + eventType: 'step_completed', + specVersion: 2, + correlationId: 'step_1', + payload: new TextEncoder().encode('"result"'), + }, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.eventId).toBe('evnt_1'); + expect(result.runId).toBe('wrun_1'); + expect(result.createdAt).toBe('2026-06-10T00:00:00.000Z'); + expect(result.body.step).toMatchObject({ stepId: 'step_1' }); + agent.assertNoPendingInterceptors(); + }); +}); diff --git a/packages/world-vercel/src/events-v4.ts b/packages/world-vercel/src/events-v4.ts new file mode 100644 index 0000000000..912474431f --- /dev/null +++ b/packages/world-vercel/src/events-v4.ts @@ -0,0 +1,524 @@ +/** + * v4 event endpoints — fully framed wire protocol. + * + * Both directions use the same length-prefixed binary frame layout: + * + * frame := [u32_be meta_len][cbor_meta][u32_be body_len][body_bytes] + * + * - **POST**: request body is one frame. `cbor_meta` carries structured + * event metadata (eventType, specVersion, deploymentId, workflowName, + * …, executionContext); `body_bytes` is the opaque user payload that + * the server stores without ever decoding it. + * - **GET single event**: response body is one frame. + * - **LIST events**: response body is a stream of frames terminated by a + * sentinel frame (meta = `{_end: 1, next?: cursor}`). + * + * Requests carry special HTTP response headers (eventId / runId / createdAt) + * for client convenience, to allow metadata access without decoding the body. + * + * Higher-level callers (the world-vercel adapter) CBOR-encode their JS + * values into the `payload` parameter and CBOR-decode returned `body` + * bytes — this module stays at the wire-bytes layer. + */ + +import { + EntityConflictError, + RunExpiredError, + ThrottleError, + TooEarlyError, + WorkflowWorldError, +} from '@workflow/errors'; +import { decode } from 'cbor-x'; +import { type Dispatcher, request } from 'undici'; +import { decodeFrames, encodeFrame, V4_FRAME_CONTENT_TYPE } from './frames.js'; +import { getDispatcher } from './http-client.js'; +import { type APIConfig, getHttpConfig } from './utils.js'; + +/** + * POST surfaces these so callers can read the created eventId without + * decoding the CBOR response body + */ +export const V4_RESPONSE_HEADERS = { + eventId: 'x-wf-event-id', + runId: 'x-wf-run-id', + createdAt: 'x-wf-created-at', +} as const; + +export interface CreateEventV4Input { + // runId is required even for run_created, because the payload is keyed under the runId + runId: string; + eventType: string; + /** Opaque payload bytes. Pass undefined for events that don't carry + * user data (e.g. step_started). */ + payload?: Uint8Array; + specVersion: number; + correlationId?: string; + vercelId?: string; + remoteRefBehavior?: 'resolve' | 'lazy'; + deploymentId?: string; + workflowName?: string; + stepName?: string; + attempt?: number; + /** cbor-x encodes Date as CBOR tag 1 (epoch) and the server decodes it + * back to a Date — the round-trip is symmetric, so wait_created / + * step_retrying / etc. see a Date in eventData.resumeAt on the read + * side. */ + resumeAt?: Date; + /** step_retrying's custom backoff timestamp (RetryableError.retryAfter). + * The queue enforces the actual delay, but the backend persists this on + * the step entity for premature-delivery pacing and observability. */ + retryAfter?: Date; + hookToken?: string; + hookIsWebhook?: boolean; + hookIsSystem?: boolean; + errorCode?: string; + /** Arbitrary structured map; rides as a native CBOR object in the + * frame meta. Bounded by the server at 2 KB encoded. */ + executionContext?: Record; + /** Initial run attributes (run_created, and run_started on the + * resilient-start path). Validated server-side against the attribute + * key/value/count caps. */ + attributes?: Record; + /** attr_set's attribute change list ({key, value|null} entries). */ + changes?: Array>; + /** attr_set's writer provenance ({type:'workflow'} or + * {type:'step', stepId, attempt}). */ + writer?: Record; + /** Opt-in for framework-level callers to write `$`-prefixed reserved + * attribute keys (attr_set / run_created / run_started). */ + allowReservedAttributes?: boolean; +} + +export interface CreateEventV4Result { + eventId: string; + runId: string; + createdAt: string; + /** + * Materialized-entity bag — CBOR-decoded from the response body. The + * server hands back the same shape v2/v3 use for EventResult so the + * adapter layer can drop these fields into its return value unchanged. + * Keys are unset when the event type doesn't materialize that entity + * kind. + */ + body: { + event?: unknown; + run?: unknown; + step?: unknown; + hook?: unknown; + wait?: unknown; + events?: unknown[]; + cursor?: string | null; + hasMore?: boolean; + }; +} + +/** Build the CBOR meta map for a v4 POST frame. Drops undefined entries + * so the wire shape matches what the server expects to see. */ +function buildPostFrameMeta( + input: CreateEventV4Input +): Record { + const meta: Record = { + eventType: input.eventType, + specVersion: input.specVersion, + }; + if (input.correlationId !== undefined) + meta.correlationId = input.correlationId; + if (input.vercelId !== undefined) meta.vercelId = input.vercelId; + if (input.remoteRefBehavior !== undefined) { + meta.remoteRefBehavior = input.remoteRefBehavior; + } + if (input.deploymentId !== undefined) meta.deploymentId = input.deploymentId; + if (input.workflowName !== undefined) meta.workflowName = input.workflowName; + if (input.stepName !== undefined) meta.stepName = input.stepName; + if (input.attempt !== undefined) meta.attempt = input.attempt; + if (input.resumeAt !== undefined) meta.resumeAt = input.resumeAt; + if (input.retryAfter !== undefined) meta.retryAfter = input.retryAfter; + if (input.hookToken !== undefined) meta.hookToken = input.hookToken; + if (input.hookIsWebhook !== undefined) + meta.hookIsWebhook = input.hookIsWebhook; + if (input.hookIsSystem !== undefined) meta.hookIsSystem = input.hookIsSystem; + if (input.errorCode !== undefined) meta.errorCode = input.errorCode; + if (input.executionContext !== undefined) { + meta.executionContext = input.executionContext; + } + if (input.attributes !== undefined) meta.attributes = input.attributes; + if (input.changes !== undefined) meta.changes = input.changes; + if (input.writer !== undefined) meta.writer = input.writer; + if (input.allowReservedAttributes !== undefined) { + meta.allowReservedAttributes = input.allowReservedAttributes; + } + return meta; +} + +/** + * Map a non-2xx response to the same typed-error contract the v3 client's + * `makeRequest` used. The runtime branches on these types for core control + * flow, so v4 must preserve every mapping: + * + * - 409 → EntityConflictError (start() dedupe, terminal-state transitions) + * - 410 → RunExpiredError (runtime exits without retrying) + * - 425 → TooEarlyError + retryAfter (step retry pacing — see #1806 for + * what happens when a 425 degrades into an untyped error) + * - 429 → ThrottleError + retryAfter + * - anything else → WorkflowWorldError with `status` (the hook 404 → + * HookNotFoundError translation in events.ts keys off status === 404) + * + * Exported for unit tests. + */ +export function throwForErrorResponse( + statusCode: number, + responseHeaders: Record, + errorBody: string, + opName: string, + url: string +): never { + let message = `v4 ${opName} failed: HTTP ${statusCode}`; + let code: string | undefined; + try { + const json = JSON.parse(errorBody) as { message?: string; code?: string }; + if (typeof json.message === 'string') message = json.message; + if (typeof json.code === 'string') code = json.code; + } catch { + // body wasn't JSON — keep the default message, append raw text below + if (errorBody) message += ` ${errorBody}`; + } + + // Retry-After response header (seconds). Used by 425 and 429. + let retryAfter: number | undefined; + const retryAfterHeader = readHeader(responseHeaders, 'retry-after'); + if (retryAfterHeader) { + const parsed = parseInt(retryAfterHeader, 10); + if (!Number.isNaN(parsed)) retryAfter = parsed; + } + + if (statusCode === 409) throw new EntityConflictError(message); + if (statusCode === 410) throw new RunExpiredError(message); + if (statusCode === 425) throw new TooEarlyError(message, { retryAfter }); + if (statusCode === 429) throw new ThrottleError(message, { retryAfter }); + throw new WorkflowWorldError(message, { + status: statusCode, + code, + url, + retryAfter, + }); +} + +/** + * POST /api/v4/runs/:runId/events/:eventType + * + * Sends the full request as a single v4 frame and returns the event ids + * + materialized-entity bag from the CBOR response body. Throws on + * non-2xx. + * + * The trailing `:eventType` path segment is an alias of the canonical + * `/events` route: it exists purely so the event type is visible in + * access logs / traces / route metrics without decoding the frame body. + * The frame meta's `eventType` remains authoritative — the backend + * cross-checks the two and logs (but does not reject) a mismatch. + */ +export async function createWorkflowRunEventV4( + input: CreateEventV4Input, + config?: APIConfig +): Promise { + // getHttpConfig sets the Authorization header (explicit config.token or + // per-request OIDC fallback) — same contract as the v3 makeRequest path. + const { baseUrl, headers: baseHeaders } = await getHttpConfig(config); + const headers = new Headers(baseHeaders); + headers.set('Content-Type', 'application/octet-stream'); + + const frame = encodeFrame( + buildPostFrameMeta(input), + input.payload ?? new Uint8Array(0) + ); + + const url = `${baseUrl}/v4/runs/${encodeURIComponent(input.runId)}/events/${encodeURIComponent(input.eventType)}`; + const response = await request(url, { + method: 'POST', + headers: Object.fromEntries(headers.entries()), + body: frame, + // getDispatcher() is typed `unknown` (undici's Dispatcher type is + // version-specific across @types/node majors); cast to the undici + // Dispatcher this module's own `request` expects. + dispatcher: getDispatcher(config) as Dispatcher, + }); + if (response.statusCode < 200 || response.statusCode >= 300) { + const errorBody = await response.body.text(); + throwForErrorResponse( + response.statusCode, + response.headers, + errorBody, + 'createEvent', + url + ); + } + + const eventId = response.headers[V4_RESPONSE_HEADERS.eventId]; + const runId = response.headers[V4_RESPONSE_HEADERS.runId]; + const createdAt = response.headers[V4_RESPONSE_HEADERS.createdAt]; + if ( + typeof eventId !== 'string' || + typeof runId !== 'string' || + typeof createdAt !== 'string' + ) { + throw new Error('v4 createEvent: response missing required x-wf-* headers'); + } + + // Decode the materialized-entity bag from the CBOR response body. + const bodyBytes = new Uint8Array(await response.body.arrayBuffer()); + const body = + bodyBytes.byteLength > 0 + ? (decode(bodyBytes) as CreateEventV4Result['body']) + : {}; + + return { eventId, runId, createdAt, body }; +} + +/** + * Decoded event entity returned by GET /api/v4/runs/:runId/events/:eventId. + * The server CBOR-encodes the full entity with refs resolved server-side, + * so the payload field (input/output/result/error/payload/metadata + * depending on eventType) already contains the resolved bytes — the + * adapter layer doesn't need to splice them in. + */ +export interface DecodedV4Event { + eventId: string; + runId: string; + eventType: string; + correlationId?: string; + createdAt: Date | string; + specVersion?: number; + eventData?: Record; +} + +function readHeader( + responseHeaders: Record, + name: string +): string | undefined { + const value = responseHeaders[name]; + if (typeof value === 'string') return value; + if (Array.isArray(value) && value.length > 0) return value[0]; + return undefined; +} + +/** + * GET /api/v4/runs/:runId/events/:eventId + * + * Returns one v4 frame: the full event entity (CBOR-decoded from the + * frame meta) plus the resolved payload bytes (frame body, possibly + * empty). The wire format is identical to a single LIST frame so the + * server can stream the payload back without buffering — callers + * are responsible for splicing `body` into `event.eventData[payloadField]` + * when they need the resolved value. The world-vercel adapter does this + * in events.ts. + */ +export async function getEventV4( + runId: string, + eventId: string, + config?: APIConfig +): Promise<{ event: DecodedV4Event; body: Uint8Array }> { + const { baseUrl, headers } = await getHttpConfig(config); + + const url = `${baseUrl}/v4/runs/${encodeURIComponent(runId)}/events/${encodeURIComponent(eventId)}`; + const response = await request(url, { + method: 'GET', + headers: Object.fromEntries(headers.entries()), + // getDispatcher() is typed `unknown` (undici's Dispatcher type is + // version-specific across @types/node majors); cast to the undici + // Dispatcher this module's own `request` expects. + dispatcher: getDispatcher(config) as Dispatcher, + }); + if (response.statusCode < 200 || response.statusCode >= 300) { + const errorBody = await response.body.text(); + throwForErrorResponse( + response.statusCode, + response.headers, + errorBody, + 'getEvent', + url + ); + } + const contentType = readHeader(response.headers, 'content-type'); + if (!contentType?.startsWith(V4_FRAME_CONTENT_TYPE)) { + throw new Error( + `v4 getEvent: expected ${V4_FRAME_CONTENT_TYPE}, got ${contentType ?? '(none)'}` + ); + } + + // undici's response body is an AsyncIterable of byte chunks — feed it + // to decodeFrames directly. Do NOT convert via node:stream + // Readable.toWeb: dynamic `import('node:stream')` resolves to an empty + // module namespace in Next.js webpack server bundles and crashes. + const chunks = response.body as unknown as AsyncIterable; + + // GET emits a single frame (no sentinel); decodeFrames returns at EOF + // after yielding it. + for await (const frame of decodeFrames(chunks)) { + return { event: frame.meta as unknown as DecodedV4Event, body: frame.body }; + } + throw new Error(`v4 getEvent: empty frame stream for ${eventId}`); +} + +export interface ListEventsV4Params { + cursor?: string; + limit?: number; + sortOrder?: 'asc' | 'desc'; +} + +/** + * A single event extracted from a v4 LIST frame. Mirrors `DecodedV4Event` + * but also carries the raw payload bytes — for payload-bearing events the + * server emits the resolved bytes in the frame body (so it never has to + * decode them) and the SDK is expected to splice them back into the + * appropriate `eventData` field. + */ +export interface ListedEventV4 { + event: DecodedV4Event; + /** Resolved payload bytes. Empty for events without a payload. */ + body: Uint8Array; +} + +export interface ListEventsV4Result { + events: ListedEventV4[]; + /** Pagination cursor — present when more pages remain. */ + next?: string; +} + +/** + * Drive a v4 frame-stream list response into an in-memory page. Used by + * both the by-runId and by-correlationId list endpoints — the wire + * shape is identical, only the URL differs. + * + * `headers` come from the caller's single getHttpConfig resolution (the + * same call that produced the baseUrl in `url`) so each LIST resolves + * auth exactly once. + */ +async function consumeListFrameStream( + url: string, + headers: Headers, + config: APIConfig | undefined, + opName: string +): Promise { + const response = await request(url, { + method: 'GET', + headers: Object.fromEntries(headers.entries()), + // getDispatcher() is typed `unknown` (undici's Dispatcher type is + // version-specific across @types/node majors); cast to the undici + // Dispatcher this module's own `request` expects. + dispatcher: getDispatcher(config) as Dispatcher, + }); + if (response.statusCode < 200 || response.statusCode >= 300) { + const errorBody = await response.body.text(); + throwForErrorResponse( + response.statusCode, + response.headers, + errorBody, + opName, + url + ); + } + const contentType = readHeader(response.headers, 'content-type'); + if (!contentType?.startsWith(V4_FRAME_CONTENT_TYPE)) { + throw new Error( + `v4 ${opName}: expected ${V4_FRAME_CONTENT_TYPE}, got ${contentType ?? '(none)'}` + ); + } + + // undici's response body is an AsyncIterable of byte chunks — feed it + // to decodeFrames directly. Do NOT convert via node:stream + // Readable.toWeb: dynamic `import('node:stream')` resolves to an empty + // module namespace in Next.js webpack server bundles and crashes. + const chunks = response.body as unknown as AsyncIterable; + + const events: ListedEventV4[] = []; + let next: string | undefined; + let sawEndSentinel = false; + for await (const frame of decodeFrames(chunks)) { + if (frame.meta._end === 1) { + if (typeof frame.meta.next === 'string') next = frame.meta.next; + sawEndSentinel = true; + break; + } + events.push({ + event: frame.meta as unknown as DecodedV4Event, + body: frame.body, + }); + } + + // A LIST response always ends with the `{_end: 1}` sentinel frame. EOF + // without it means the response was truncated — and if the cut landed + // between two complete frames, decodeFrames alone can't tell. Returning + // the partial page here would surface as `hasMore: false` and silently + // drop events (replay correctness!), so fail loudly instead; the read + // is idempotent and safe for the caller to retry. + if (!sawEndSentinel) { + throw new Error( + `v4 ${opName}: frame stream ended without the end-of-stream sentinel ` + + `(${events.length} events read) — truncated response?` + ); + } + + return { events, ...(next ? { next } : {}) }; +} + +function paginationToQuery(params: ListEventsV4Params): string { + const sp = new URLSearchParams(); + if (params.cursor) sp.set('cursor', params.cursor); + if (params.limit !== undefined) sp.set('limit', String(params.limit)); + if (params.sortOrder) sp.set('sortOrder', params.sortOrder); + const qs = sp.toString(); + return qs ? `?${qs}` : ''; +} + +/** + * GET /api/v4/runs/:runId/events + * + * Parses the binary-frame stream into a list of events plus the + * pagination cursor (from the sentinel frame). Each frame's CBOR meta + * IS the full event entity, with the payload field still in `eventData` + * as a `RefDescriptor` (lazy); the resolved payload bytes ride in the + * frame body. The adapter layer splices them back into eventData. + * + * Eagerly drains the stream into memory to match the existing + * `getWorkflowRunEvents` page-at-a-time contract. A streaming variant + * that yields events one at a time without buffering the page would be + * a small refactor (decodeFrames is already async-iterable). + */ +export async function getWorkflowRunEventsV4( + runId: string, + params: ListEventsV4Params = {}, + config?: APIConfig +): Promise { + const { baseUrl, headers } = await getHttpConfig(config); + const url = + `${baseUrl}/v4/runs/${encodeURIComponent(runId)}/events` + + paginationToQuery(params); + return consumeListFrameStream(url, headers, config, 'listEvents'); +} + +/** + * GET /api/v4/events?correlationId=... + * + * Same frame stream as getWorkflowRunEventsV4 but selected by + * correlationId (GSI) instead of runId. Used by the storage adapter's + * `events.listByCorrelationId` path — the v3 client used + * `/v2/events?correlationId=...` for the equivalent query. + */ +export async function getEventsByCorrelationIdV4( + correlationId: string, + params: ListEventsV4Params = {}, + config?: APIConfig +): Promise { + const { baseUrl, headers } = await getHttpConfig(config); + const sp = new URLSearchParams(); + sp.set('correlationId', correlationId); + if (params.cursor) sp.set('cursor', params.cursor); + if (params.limit !== undefined) sp.set('limit', String(params.limit)); + if (params.sortOrder) sp.set('sortOrder', params.sortOrder); + const url = `${baseUrl}/v4/events?${sp.toString()}`; + return consumeListFrameStream( + url, + headers, + config, + 'listEventsByCorrelationId' + ); +} diff --git a/packages/world-vercel/src/events.test.ts b/packages/world-vercel/src/events.test.ts new file mode 100644 index 0000000000..22713e1eec --- /dev/null +++ b/packages/world-vercel/src/events.test.ts @@ -0,0 +1,346 @@ +import type { AnyEventRequest } from '@workflow/world'; +import { encode } from 'cbor-x'; +import { MockAgent } from 'undici'; +import { describe, expect, it } from 'vitest'; +import { createWorkflowRunEvent, splitEventDataForV4 } from './events.js'; + +const ORIGIN = 'https://vercel-workflow.com'; + +function mockAgent() { + const agent = new MockAgent(); + agent.disableNetConnect(); + return agent; +} + +/** + * Legacy (spec-version-1) runs predate event sourcing: the runtime still + * posts hook_received (resumeHook) and wait_completed (wakeUpRun) for them + * with `v1Compat: true`, expecting the legacy `/v1/runs/:id/events` + * endpoint — NOT the v4 protocol. This locks in the fallback so the v4 + * migration can't silently break webhooks/waits on pre-event-sourcing runs. + */ +describe('createWorkflowRunEvent with v1Compat', () => { + it.each([ + { + eventType: 'hook_received' as const, + data: { + eventType: 'hook_received', + correlationId: 'hook_1', + specVersion: 1, + eventData: { payload: { hello: 'world' } }, + }, + responseEventData: { payload: { hello: 'world' } }, + }, + { + eventType: 'wait_completed' as const, + data: { + eventType: 'wait_completed', + correlationId: 'wait_1', + specVersion: 1, + eventData: { resumeAt: '2026-06-10T00:00:00.000Z' }, + }, + responseEventData: { resumeAt: '2026-06-10T00:00:00.000Z' }, + }, + ])('posts $eventType to the legacy v1 events endpoint', async ({ + eventType, + data, + responseEventData, + }) => { + const agent = mockAgent(); + agent + .get(ORIGIN) + .intercept({ path: '/api/v1/runs/wrun_legacy/events', method: 'POST' }) + .reply( + 200, + { + eventId: 'evnt_legacy', + runId: 'wrun_legacy', + eventType, + correlationId: data.correlationId, + createdAt: '2026-06-10T00:00:00.000Z', + specVersion: 1, + eventData: responseEventData, + }, + { headers: { 'content-type': 'application/json' } } + ); + + const result = await createWorkflowRunEvent( + 'wrun_legacy', + data as AnyEventRequest, + { v1Compat: true }, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.event?.eventId).toBe('evnt_legacy'); + expect(result.event?.eventType).toBe(eventType); + agent.assertNoPendingInterceptors(); + }); + + it('rejects v1Compat without a runId for non-lifecycle events', async () => { + await expect( + createWorkflowRunEvent( + null, + { + eventType: 'hook_received', + correlationId: 'hook_1', + specVersion: 1, + eventData: { payload: {} }, + } as AnyEventRequest, + { v1Compat: true }, + { token: 'test-token' } + ) + ).rejects.toThrow(/requires a runId/); + }); +}); + +/** + * The split's meta allowlist IS the eventData wire contract on v4. The + * type-level `assertEventDataWireContractExhaustive` guard in events.ts + * fails the build if a schema field is routed to neither the payload body + * nor the frame meta, so a *missing* field can't silently regress. These + * runtime tests are the complement: they prove the fields that ARE routed + * actually reach the frame meta with the right values and renames. + */ +describe('splitEventDataForV4 attribute fields', () => { + it('carries attr_set changes/writer/allowReservedAttributes in the frame meta', () => { + const { payload, meta } = splitEventDataForV4({ + eventType: 'attr_set', + correlationId: 'attr_1', + specVersion: 4, + eventData: { + changes: [ + { key: 'phase', value: 'done' }, + { key: 'stale', value: null }, + ], + writer: { type: 'step', stepId: 'step_1', attempt: 2 }, + allowReservedAttributes: true, + }, + } as AnyEventRequest); + + expect(payload).toBeUndefined(); + expect(meta.changes).toEqual([ + { key: 'phase', value: 'done' }, + { key: 'stale', value: null }, + ]); + expect(meta.writer).toEqual({ type: 'step', stepId: 'step_1', attempt: 2 }); + expect(meta.allowReservedAttributes).toBe(true); + }); + + it('carries initial run attributes on run_created', () => { + const { payload, meta } = splitEventDataForV4({ + eventType: 'run_created', + specVersion: 4, + eventData: { + deploymentId: 'dpl_1', + workflowName: 'wf', + input: new TextEncoder().encode('[]'), + attributes: { sourceAtStart: 'api' }, + }, + } as AnyEventRequest); + + expect(payload).toBeInstanceOf(Uint8Array); + expect(meta.attributes).toEqual({ sourceAtStart: 'api' }); + expect(meta.deploymentId).toBe('dpl_1'); + expect(meta.workflowName).toBe('wf'); + }); + + it('carries attributes on resilient-start run_started', () => { + const { meta } = splitEventDataForV4({ + eventType: 'run_started', + specVersion: 4, + eventData: { + input: new TextEncoder().encode('[]'), + deploymentId: 'dpl_1', + workflowName: 'wf', + attributes: { sourceAtStart: 'api' }, + }, + } as AnyEventRequest); + + expect(meta.attributes).toEqual({ sourceAtStart: 'api' }); + }); +}); + +describe('createWorkflowRunEvent response coercion', () => { + it('coerces ISO-string dates in the returned event and preloaded events', async () => { + // Persisted events store nested eventData dates as ISO strings + // (the backend's entity layer converts Date → toISOString on write with + // no inverse getter). The run_started TTFB preload reads events back + // from a query, so the POST response's `event`/`events` need the same + // EventSchema coercion as the GET/LIST path — the runtime calls + // .getTime() on wait_created.resumeAt during replay. + const agent = mockAgent(); + agent + .get(ORIGIN) + .intercept({ + path: '/api/v4/runs/wrun_1/events/run_started', + method: 'POST', + }) + .reply( + 200, + encode({ + run: { + runId: 'wrun_1', + status: 'running', + startedAt: new Date('2026-06-10T00:00:01.000Z'), + }, + event: { + eventId: 'evnt_2', + runId: 'wrun_1', + eventType: 'run_started', + createdAt: '2026-06-10T00:00:01.000Z', + eventData: {}, + }, + events: [ + { + eventId: 'evnt_3', + runId: 'wrun_1', + eventType: 'wait_created', + correlationId: 'wait_1', + createdAt: '2026-06-10T00:00:02.000Z', + specVersion: 2, + eventData: { resumeAt: '2026-06-10T01:00:00.000Z' }, + }, + ], + cursor: 'cursor-1', + hasMore: false, + }), + { + headers: { + 'x-wf-event-id': 'evnt_2', + 'x-wf-run-id': 'wrun_1', + 'x-wf-created-at': '2026-06-10T00:00:01.000Z', + }, + } + ); + + const result = await createWorkflowRunEvent( + 'wrun_1', + { eventType: 'run_started', specVersion: 2 } as AnyEventRequest, + undefined, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.event?.createdAt).toBeInstanceOf(Date); + const preloaded = result.events?.[0] as { + createdAt: Date; + eventData: { resumeAt: Date }; + }; + expect(preloaded.createdAt).toBeInstanceOf(Date); + expect(preloaded.eventData.resumeAt).toBeInstanceOf(Date); + expect(preloaded.eventData.resumeAt.getTime()).toBe( + new Date('2026-06-10T01:00:00.000Z').getTime() + ); + agent.assertNoPendingInterceptors(); + }); + + it('threads the wait entity through to the EventResult', async () => { + const agent = mockAgent(); + agent + .get(ORIGIN) + .intercept({ + path: '/api/v4/runs/wrun_1/events/wait_created', + method: 'POST', + }) + .reply( + 200, + encode({ + event: { + eventId: 'evnt_4', + runId: 'wrun_1', + eventType: 'wait_created', + correlationId: 'wait_1', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: { resumeAt: '2026-06-10T01:00:00.000Z' }, + }, + wait: { + waitId: 'wait_1', + runId: 'wrun_1', + status: 'pending', + }, + }), + { + headers: { + 'x-wf-event-id': 'evnt_4', + 'x-wf-run-id': 'wrun_1', + 'x-wf-created-at': '2026-06-10T00:00:00.000Z', + }, + } + ); + + const result = await createWorkflowRunEvent( + 'wrun_1', + { + eventType: 'wait_created', + correlationId: 'wait_1', + specVersion: 2, + eventData: { resumeAt: new Date('2026-06-10T01:00:00.000Z') }, + } as AnyEventRequest, + undefined, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.wait).toMatchObject({ waitId: 'wait_1' }); + expect( + (result.event as { eventData?: { resumeAt?: unknown } })?.eventData + ?.resumeAt + ).toBeInstanceOf(Date); + agent.assertNoPendingInterceptors(); + }); +}); + +describe('createWorkflowRunEvent resolveData', () => { + it("strips payload fields from the returned event when resolveData is 'none'", async () => { + const agent = mockAgent(); + agent + .get(ORIGIN) + .intercept({ + path: '/api/v4/runs/wrun_1/events/step_completed', + method: 'POST', + }) + .reply( + 200, + encode({ + event: { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'step_completed', + correlationId: 'step_1', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: { + result: new TextEncoder().encode('"payload-bytes"'), + stepName: 'my-step', + }, + }, + }), + { + headers: { + 'x-wf-event-id': 'evnt_1', + 'x-wf-run-id': 'wrun_1', + 'x-wf-created-at': '2026-06-10T00:00:00.000Z', + }, + } + ); + + const result = await createWorkflowRunEvent( + 'wrun_1', + { + eventType: 'step_completed', + correlationId: 'step_1', + specVersion: 2, + eventData: { + result: new TextEncoder().encode('"payload-bytes"'), + }, + } as AnyEventRequest, + { resolveData: 'none' }, + { token: 'test-token', dispatcher: agent } + ); + + // The Storage contract: a caller asking for resolveData 'none' must + // not get payload bytes back — only entity metadata. + const eventData = (result.event as { eventData?: Record }) + ?.eventData; + expect(eventData?.result).toBeUndefined(); + expect(eventData?.stepName).toBe('my-step'); + agent.assertNoPendingInterceptors(); + }); +}); diff --git a/packages/world-vercel/src/events.ts b/packages/world-vercel/src/events.ts index 4477278846..1abfca16ab 100644 --- a/packages/world-vercel/src/events.ts +++ b/packages/world-vercel/src/events.ts @@ -1,3 +1,36 @@ +/** + * world-vercel event functions — v4 wire format throughout. + * + * This module replaces the previous v2/v3 implementation. The v4 wire + * format uses a single length-prefixed binary frame layout in both + * directions: + * + * frame := [u32_be meta_len][cbor_meta][u32_be body_len][body_bytes] + * + * `cbor_meta` is the structured event metadata; `body_bytes` is the + * opaque user payload, never CBOR-decoded by the server. See the + * world-vercel backend's v4 handlers for the matching server-side + * encoding and ../events-v4.ts for the wire-level client. + * + * Key shape changes vs. v2/v3: + * + * - POST request body is one v4 frame (meta + payload). The response + * surfaces eventId/runId/createdAt as `x-wf-*` headers and carries + * the materialized EventResult (event/run/step/hook/wait/events/ + * cursor/hasMore) as a CBOR body — `remoteRefBehavior` in the frame + * meta still controls server-side ref resolution. + * - GET single event returns one v4 frame: the event entity in the + * frame meta, the user payload bytes in the frame body. + * - LIST events returns a stream of v4 frames terminated by a sentinel + * frame whose meta carries `{_end: 1, next?: cursor}`. The old + * per-event `/refs` round-trip is eliminated. + * + * Public function signatures are unchanged: storage.ts continues to + * wire these as `Storage['events']` and the workflow runtime sees the + * same EventResult / Event / PaginatedResponse shapes it did on + * the v3 path. + */ + import { HookNotFoundError, WorkflowWorldError } from '@workflow/errors'; import { type AnyEventRequest, @@ -7,99 +40,56 @@ import { EventSchema, EventTypeSchema, type GetEventParams, - HookSchema, type ListEventsByCorrelationIdParams, type ListEventsParams, type PaginatedResponse, - PaginatedResponseSchema, stripEventDataRefs, validateUlidTimestamp, type WorkflowRun, - WorkflowRunSchema, } from '@workflow/world'; -import z from 'zod'; -import { - isRefDescriptor, - type RefDescriptor, - type RefWithRunId, - resolveRefDescriptors, -} from './refs.js'; import { - cancelWorkflowRunV1, - createWorkflowRunV1, - WorkflowRunWireBaseSchema, -} from './runs.js'; -import { deserializeStep, StepWireSchema } from './steps.js'; -import { trace } from './telemetry.js'; -import type { APIConfig } from './utils.js'; + createWorkflowRunEventV4, + type DecodedV4Event, + getEventsByCorrelationIdV4, + getEventV4, + getWorkflowRunEventsV4, +} from './events-v4.js'; +import { cancelWorkflowRunV1, createWorkflowRunV1 } from './runs.js'; +import { deserializeStep } from './steps.js'; import { + type APIConfig, DEFAULT_RESOLVE_DATA_OPTION, deserializeError, makeRequest, } from './utils.js'; -// Wraps stripEventDataRefs to also strip the legacy eventDataRef field, -// since the server always returns lazy refs and callers with -// resolveData='none' should not see them. -function stripEventAndLegacyRefs( - event: any, - resolveData: 'none' | 'all' -): Event { - if (resolveData !== 'none') return event; - const { eventDataRef: _eventDataRef, ...withoutLegacyRef } = event; - return stripEventDataRefs(withoutLegacyRef, resolveData); -} - -// Schema for EventResult wire format returned by events.create. -// Uses wire format schemas for step to handle field name mapping. -// Two variants are used depending on `remoteRefBehavior`: -// - 'resolve': the server returns fully resolved data, so we validate the run -// with the strict WorkflowRunSchema discriminated union (e.g. status:'failed' -// requires error to be present). -// - 'lazy': the server may omit resolved fields (error may be a string or -// undefined), so we use the looser WorkflowRunWireBaseSchema and normalize -// the error via deserializeError() afterward. -const EventResultResolveWireSchema = z.object({ - event: EventSchema.optional(), - run: WorkflowRunSchema.optional(), - step: StepWireSchema.optional(), - hook: HookSchema.optional(), - events: z.array(EventSchema).optional(), - cursor: z.string().nullable().optional(), - hasMore: z.boolean().optional(), -}); - -const EventResultLazyWireSchema = z.object({ - event: EventSchema.optional(), - run: WorkflowRunWireBaseSchema.optional(), - step: StepWireSchema.optional(), - hook: HookSchema.optional(), - events: z.array(EventSchema).optional(), - cursor: z.string().nullable().optional(), - hasMore: z.boolean().optional(), -}); - -// Schema for events returned with `remoteRefBehavior=lazy`. -// Includes both `eventDataRef` (legacy, specVersion=1) and `eventData` -// (v2, specVersion=2 — may contain nested RefDescriptor values). -// specVersion defaults to 1 (legacy) when parsing responses from storage. -const EventWithRefsSchema = z.object({ - eventId: z.string(), - runId: z.string(), - eventType: EventTypeSchema, - correlationId: z.string().optional(), - eventDataRef: z.any().optional(), - eventData: z.any().optional(), - createdAt: z.coerce.date(), - specVersion: z.number().default(1), -}); - /** - * Maps event types to the field name within `eventData` that may contain - * a ref descriptor. Mirrors the server-side `resolveEventDataRefs()` mapping. + * Per-event-type map of the field within `eventData` that holds the user + * payload. The backend uses the same convention on the v4 read side. + * + * The v4 wire encoding picks this field out of `eventData`, CBOR-encodes + * its value, and ships it as the frame body. Everything else in + * `eventData` rides in the frame's CBOR meta block. + * + * This map's values, together with `MetaSourceField` below, ARE the wire + * contract for `eventData` on v4: every field a @workflow/world event + * schema can put in `eventData` must be routed either to the frame body + * (a payload field here) or the frame meta (a `MetaSourceField`). Unlike + * v3 (which serialized the whole object), a field that is neither does not + * cross the wire. `assertEventDataWireContractExhaustive` turns that into a + * compile error — the silent drop that bit `step_retrying.retryAfter` is + * now a build break that names the unrouted field. (The backend's meta + * parser still has to accept any new meta field independently, so a new + * field is a two-sided change.) */ -const eventDataRefFieldMap: Record = { +const PAYLOAD_FIELD_BY_EVENT_TYPE = { run_created: 'input', + // run_started normally has no payload, but on the resilient-start path + // the runtime piggybacks `runInput.input` here so the server can + // synthesize the missing run_created. Without this entry the v4 split + // would silently drop those bytes and the backend's "run_started arrived + // before run_created" fallback would have nothing to backfill from. + run_started: 'input', run_completed: 'output', run_failed: 'error', step_created: 'input', @@ -108,152 +98,350 @@ const eventDataRefFieldMap: Record = { step_retrying: 'error', hook_created: 'metadata', hook_received: 'payload', -}; - -// Events where the client uses the response entity data need 'resolve' (default). -// Events where the client discards the response can use 'lazy' to skip expensive -// S3 ref resolution on the server, saving ~200-460ms per event. -const eventsNeedingResolve = new Set([ - 'run_created', // client reads result.run.runId - 'run_started', // client reads result.run (checks startedAt, status) - 'step_started', // client reads result.step (checks attempt, state) -]); +} as const satisfies Record; + +/** + * The payload field names — the values of the map above. These are the + * fields that become the opaque frame body rather than frame meta. + */ +type PayloadField = + (typeof PAYLOAD_FIELD_BY_EVENT_TYPE)[keyof typeof PAYLOAD_FIELD_BY_EVENT_TYPE]; /** - * Collect all ref descriptors from a list of lazy-loaded events. - * Returns a flat array of { eventIndex, refType, fieldName?, descriptor } - * entries that can be resolved in bulk. + * Look up the payload field for an event type, or undefined for the event + * types that carry no user payload (run_cancelled, attr_set, step_started, + * wait_*, hook_disposed). The map is `as const` so it can drive + * `PayloadField`; the cast keeps the lookup callable with any event-type + * string. */ -interface PendingRef { - eventIndex: number; - /** - * 'entity' = top-level eventDataRef (legacy specVersion=1 events) - * 'nested' = nested ref descriptor within eventData (v2 events) - */ - refType: 'entity' | 'nested'; - /** The field name within eventData containing the ref (only for 'nested') */ - fieldName?: string; - descriptor: RefDescriptor; +function payloadFieldFor(eventType: string): PayloadField | undefined { + return ( + PAYLOAD_FIELD_BY_EVENT_TYPE as Record + )[eventType]; } -function collectPendingRefs(events: any[]): PendingRef[] { - const pending: PendingRef[] = []; +/** + * Union of every field a user-creatable event can carry in `eventData`, + * derived from the @workflow/world `CreateEventSchema` discriminated union + * (via `AnyEventRequest`). Adding a field to any event schema there widens + * this union automatically, which is what drives the exhaustiveness guard + * below. Event types with no `eventData` (run_cancelled) and with optional + * `eventData` (run_started, step_started, …) both contribute correctly. + */ +type EventDataField = E extends { eventData?: infer D } + ? keyof NonNullable & string + : never; + +// Events whose POST response the workflow runtime reads immediately +// (so the materialized entity must come back fully resolved). +const eventsNeedingResolve = new Set([ + 'run_created', // runtime reads result.run.runId + 'run_started', // runtime reads result.run (checks startedAt, status) + 'step_started', // runtime reads result.step (checks attempt, state) +]); - for (let i = 0; i < events.length; i++) { - const event = events[i]; +// Hook events that 404 when the hook is already disposed or never existed — +// translate to a typed HookNotFoundError so the runtime can branch on it. +const hookEventsRequiringExistence = new Set([ + 'hook_disposed', + 'hook_received', +]); - // Legacy events (specVersion=1): eventDataRef is a RefDescriptor - if (event.eventDataRef && isRefDescriptor(event.eventDataRef)) { - pending.push({ - eventIndex: i, - refType: 'entity', - descriptor: event.eventDataRef, - }); - } +// ============================================================================= +// Helpers +// ============================================================================= + +interface SplitEventData { + /** Encoded payload bytes (undefined when the event has no user payload). */ + payload?: Uint8Array; + /** Metadata fields that ride in the v4 POST frame's CBOR meta block. */ + meta: { + deploymentId?: string; + workflowName?: string; + stepName?: string; + attempt?: number; + resumeAt?: Date; + retryAfter?: Date; + hookToken?: string; + hookIsWebhook?: boolean; + hookIsSystem?: boolean; + errorCode?: string; + /** Structured executionContext, included verbatim in frame meta. */ + executionContext?: Record; + /** Initial run attributes (run_created / resilient-start run_started). */ + attributes?: Record; + /** attr_set change list, included verbatim in frame meta. */ + changes?: Array>; + /** attr_set writer provenance, included verbatim in frame meta. */ + writer?: Record; + /** Reserved-attribute-key opt-in (attr_set / run_created / run_started). */ + allowReservedAttributes?: boolean; + }; +} + +/** + * Source field names in `eventData` that `splitEventDataForV4` lifts into + * the frame meta (some are renamed on the wire, e.g. `token` → `hookToken`). + * This is the metadata half of the v4 `eventData` allowlist; the payload + * half is `PayloadField`. The exhaustiveness guard below keeps this in sync + * with the @workflow/world schema in both directions; the per-field + * extraction in `splitEventDataForV4` is bespoke, so it must read each field + * listed here. + */ +type MetaSourceField = + | 'deploymentId' + | 'workflowName' + | 'stepName' + | 'attempt' + | 'resumeAt' + | 'retryAfter' + | 'token' + | 'isWebhook' + | 'isSystem' + | 'errorCode' + | 'executionContext' + | 'attributes' + | 'changes' + | 'writer' + | 'allowReservedAttributes'; + +/** + * Compile-time guard that the v4 `eventData` wire allowlist is exhaustive + * against the @workflow/world event schemas. + * + * - `Unhandled`: schema fields routed to neither the payload body + * (`PayloadField`) nor the frame meta (`MetaSourceField`). + * - `Stale`: allowlisted meta fields that no longer exist on any schema. + * + * Both must be `never`. Add a field to a @workflow/world event schema + * without routing it here and the `assertEventDataWireContractExhaustive` + * call fails to compile with `Type '["theField", never]' does not satisfy + * the constraint '[never, never]'` — the historical "silently dropped" + * footgun, now a build break that names the field. + */ +type Unhandled = Exclude; +type Stale = Exclude; +function assertEventDataWireContractExhaustive< + _Check extends [never, never], +>(): void { + // Type-level assertion only; the empty body is never relied on. +} +assertEventDataWireContractExhaustive<[Unhandled, Stale]>(); - // V2 events: eventData may contain a nested RefDescriptor - if (event.eventData && typeof event.eventData === 'object') { - const fieldName = eventDataRefFieldMap[event.eventType as string]; - if (fieldName) { - const fieldValue = event.eventData[fieldName]; - if (isRefDescriptor(fieldValue)) { - pending.push({ - eventIndex: i, - refType: 'nested', - fieldName, - descriptor: fieldValue, - }); - } +/** + * Split an AnyEventRequest's `eventData` into (a) the payload bytes that + * become the v4 frame body and (b) the metadata fields that become the + * CBOR-encoded meta block of the same frame. + * + * Exported for unit tests (the meta allowlist is the eventData wire + * contract — see the warning on PAYLOAD_FIELD_BY_EVENT_TYPE). + */ +export function splitEventDataForV4(data: AnyEventRequest): SplitEventData { + // Some event types in the AnyEventRequest discriminated union (e.g. + // run_cancelled) have no eventData. Cast through unknown so this + // helper can read it defensively without TS narrowing per branch. + const eventData = (( + data as unknown as { eventData?: Record } + ).eventData ?? {}) as Record; + const payloadField = payloadFieldFor(data.eventType); + const meta: SplitEventData['meta'] = {}; + + if (typeof eventData.deploymentId === 'string') { + meta.deploymentId = eventData.deploymentId; + } + if (typeof eventData.workflowName === 'string') { + meta.workflowName = eventData.workflowName; + } + if (typeof eventData.stepName === 'string') { + meta.stepName = eventData.stepName; + } + if (typeof eventData.attempt === 'number') { + meta.attempt = eventData.attempt; + } + // wait_created passes resumeAt as a Date. cbor-x encodes Date natively + // (tag 1) and round-trips back to a Date on the server, so the runtime + // sees a real Date instance when it reads the event back. ISO strings + // are accepted as a fallback for non-runtime callers. + if (eventData.resumeAt instanceof Date) { + meta.resumeAt = eventData.resumeAt; + } else if (typeof eventData.resumeAt === 'string') { + const parsed = new Date(eventData.resumeAt); + if (!Number.isNaN(parsed.getTime())) meta.resumeAt = parsed; + } + // step_retrying carries the RetryableError backoff timestamp. The queue + // enforces the actual retry delay, but the server persists this on the + // step entity (premature-delivery pacing + observability) — dropping it + // here would silently disable both. + if (eventData.retryAfter instanceof Date) { + meta.retryAfter = eventData.retryAfter; + } else if (typeof eventData.retryAfter === 'string') { + const parsed = new Date(eventData.retryAfter); + if (!Number.isNaN(parsed.getTime())) meta.retryAfter = parsed; + } + // Runtime emits hook_created / hook_received / hook_disposed with the + // hook token in `eventData.token` (matches the world contract in + // packages/world/src/events.ts). The v4 wire encoding still calls it + // `hookToken` in the frame meta, so do the rename here. + if (typeof eventData.token === 'string') { + meta.hookToken = eventData.token; + } + if (typeof eventData.isWebhook === 'boolean') { + meta.hookIsWebhook = eventData.isWebhook; + } + if (typeof eventData.isSystem === 'boolean') { + meta.hookIsSystem = eventData.isSystem; + } + if (typeof eventData.errorCode === 'string') { + meta.errorCode = eventData.errorCode; + } + if ( + eventData.executionContext !== undefined && + eventData.executionContext !== null && + typeof eventData.executionContext === 'object' + ) { + meta.executionContext = eventData.executionContext as Record< + string, + unknown + >; + } + // Native run attributes (spec v4): initial attributes ride on + // run_created (and run_started for resilient start); attr_set carries + // the change list + writer provenance. All of these are structured + // metadata, not user payloads — they ride in the frame meta and the + // server validates them against the attribute caps before + // materializing run.attributes. + if ( + eventData.attributes !== undefined && + eventData.attributes !== null && + typeof eventData.attributes === 'object' + ) { + meta.attributes = eventData.attributes as Record; + } + if (Array.isArray(eventData.changes)) { + meta.changes = eventData.changes as Array>; + } + if ( + eventData.writer !== undefined && + eventData.writer !== null && + typeof eventData.writer === 'object' + ) { + meta.writer = eventData.writer as Record; + } + if (typeof eventData.allowReservedAttributes === 'boolean') { + meta.allowReservedAttributes = eventData.allowReservedAttributes; + } + + let payload: Uint8Array | undefined; + if (payloadField && payloadField in eventData) { + const value = eventData[payloadField]; + if (value !== undefined) { + // Payload fields (input / output / result / error / payload / + // metadata) reach this layer already serialized as Uint8Array — the + // runtime calls dehydrateRunError / dehydrateStepReturnValue / etc. + // before invoking events.create. Pass the bytes through unchanged + // so runs.get and the events stream return the same raw form that + // hydrateRunError / hydrateStepIO expect. CBOR-encoding here would + // double-wrap on write and (since runs.get bypasses the v4 frame + // decode) leave the consumer with cbor(Uint8Array) rather than the + // devalue blob it was looking for. + if (!(value instanceof Uint8Array)) { + // Surface non-Uint8Array values loudly — current SDK callers go + // through the dehydrate helpers, so anything else is either a + // legacy caller or a bug. + throw new TypeError( + `world-vercel v4: eventData.${payloadField} for ${data.eventType} ` + + `must be a Uint8Array (the runtime's dehydrated wire form); ` + + `got ${typeof value === 'object' ? (value === null ? 'null' : ((value as object).constructor?.name ?? typeof value)) : typeof value}.` + ); } + payload = value; } } - return pending; + return { payload, meta }; } /** - * Hydrate lazy-loaded events by resolving all ref descriptors client-side. - * For entity-level refs (eventDataRef), the resolved value becomes eventData. - * For nested refs (eventData[field]), the resolved value replaces the descriptor. + * Run an assembled event through EventSchema so per-event-type + * z.coerce.date() (wait_created.resumeAt, wait_completed.resumeAt, + * step_retrying.retryAfter) converts the ISO strings the backing store + * returns back into Date instances — the workflow runtime calls .getTime() on + * these and would otherwise crash. safeParse: pass the event through + * unchanged if it doesn't match a known shape (legacy / mid-rollout). * - * Events are shallow-cloned before mutation to avoid corrupting any upstream - * caches (SWR, React cache, etc.) that might hold references to the originals. + * Used by every path that hands events to the runtime: GET/LIST frames + * (via buildEventFromV4) and the POST response's `event` / preloaded + * `events` bag — all of these can carry events read back from the + * backing store, where nested eventData dates are stored as ISO strings. */ -async function hydrateEventRefs( - events: any[], - config?: APIConfig, - refResolveConcurrency?: number -): Promise { - const pending = collectPendingRefs(events); - if (pending.length === 0) return events; - - return trace('world.refs.hydrate', async (span) => { - span?.setAttribute('workflow.refs.hydrated_count', pending.length); - - // Deduplicate descriptors by _ref key to avoid redundant resolutions. - // Multiple events may reference the same ref (e.g., shared input). - const uniqueRefs = new Map(); - for (const p of pending) { - if (!uniqueRefs.has(p.descriptor._ref)) { - const eventRunId = events[p.eventIndex].runId as string; - uniqueRefs.set(p.descriptor._ref, { - descriptor: p.descriptor, - runId: eventRunId, - }); - } - } - const deduped = Array.from(uniqueRefs.values()); +function coerceEventDates(raw: Record): Event { + const parsed = EventSchema.safeParse(raw); + if (parsed.success) return parsed.data as unknown as Event; + if (EventTypeSchema.safeParse(raw.eventType).success) { + // The raw-event fallback is for unknown/future event types. A parse + // failure on a *known* type means a schema/coercion regression that + // would otherwise only surface later as a crash deep in the runtime + // (e.g. .getTime() on a resumeAt that stayed a string) — leave a + // breadcrumb at the actual failure point. + console.debug( + `[workflow:world-vercel] v4 event ${raw.eventId} failed ` + + `EventSchema parse for known eventType '${raw.eventType}'; ` + + `passing through unparsed: ${parsed.error.message}` + ); + } + return raw as unknown as Event; +} - // Resolve unique descriptors in parallel with bounded concurrency - const dedupedResults = await resolveRefDescriptors( - deduped, - config, - refResolveConcurrency - ).catch((err) => { - const msg = err instanceof Error ? err.message : String(err); - throw new Error( - `Failed to hydrate ${pending.length} ref(s) across ${events.length} event(s): ${msg}` - ); - }); +/** + * Turn a v4 event (frame meta + frame body) into the Event shape the + * workflow runtime expects. + * + * Both GET single-event and LIST use the same frame format: meta is the + * full event entity with the payload field as a RefDescriptor, body is + * the resolved payload bytes (possibly empty). This helper splices the + * body bytes into `eventData[fieldName]` unchanged — the runtime's + * hydrate helpers (hydrateStepIO, hydrateRunError, …) consume the raw + * devalue-with-format-prefix Uint8Array directly. No CBOR decode here, + * symmetric with the pass-through write in `splitEventDataForV4`. + */ +function buildEventFromV4( + decoded: DecodedV4Event, + payloadBody: Uint8Array, + resolveData: 'none' | 'all' +): Event { + const eventData = (decoded.eventData ?? {}) as Record; - // Build a map from ref key → resolved value for fast lookup - const resolvedMap = new Map(); - const dedupedKeys = Array.from(uniqueRefs.keys()); - for (let i = 0; i < dedupedKeys.length; i++) { - resolvedMap.set(dedupedKeys[i], dedupedResults[i]); - } + if (payloadBody.byteLength > 0) { + const payloadField = payloadFieldFor(decoded.eventType); + if (payloadField) eventData[payloadField] = payloadBody; + } - // Shallow-clone events that need modification, then apply resolved values - const result = [...events]; - for (let i = 0; i < pending.length; i++) { - const { eventIndex, refType, fieldName, descriptor } = pending[i]; - const resolved = resolvedMap.get(descriptor._ref); + const raw = { + eventId: decoded.eventId, + runId: decoded.runId, + eventType: decoded.eventType, + createdAt: + decoded.createdAt instanceof Date + ? decoded.createdAt + : new Date(decoded.createdAt), + ...(decoded.correlationId ? { correlationId: decoded.correlationId } : {}), + eventData, + ...(decoded.specVersion !== undefined + ? { specVersion: decoded.specVersion } + : {}), + }; - // Shallow-clone the event (and eventData if nested) before mutating - if (result[eventIndex] === events[eventIndex]) { - result[eventIndex] = { ...events[eventIndex] }; - } - const event = result[eventIndex]; - - if (refType === 'entity') { - // Legacy: eventDataRef → eventData, remove the ref field - event.eventData = resolved; - delete event.eventDataRef; - } else if (refType === 'nested' && fieldName) { - // Shallow-clone eventData before mutating if not yet cloned - if (event.eventData === events[eventIndex].eventData) { - event.eventData = { ...event.eventData }; - } - // V2: replace the nested ref descriptor with resolved value - event.eventData[fieldName] = resolved; - } - } + const event = coerceEventDates(raw); - return result; - }); + // For resolveData='none', strip eventData entirely. Reuse the world- + // side helper so behavior stays in sync with other backends. + return resolveData === 'none' ? stripEventDataRefs(event, 'none') : event; } -// Functions +// ============================================================================= +// Public API +// ============================================================================= + export async function getEvent( runId: string, eventId: string, @@ -261,123 +449,38 @@ export async function getEvent( config?: APIConfig ): Promise { const resolveData = params?.resolveData ?? DEFAULT_RESOLVE_DATA_OPTION; - const remoteRefBehavior = resolveData === 'none' ? 'lazy' : 'resolve'; - - const searchParams = new URLSearchParams(); - searchParams.set('remoteRefBehavior', remoteRefBehavior); - - const queryString = searchParams.toString(); - const endpoint = `/v3/runs/${encodeURIComponent(runId)}/events/${encodeURIComponent(eventId)}${queryString ? `?${queryString}` : ''}`; - - const event = await makeRequest({ - endpoint, - options: { method: 'GET' }, - config, - schema: (resolveData === 'none' ? EventWithRefsSchema : EventSchema) as any, - }); - - return stripEventAndLegacyRefs(event as any, resolveData); + const { event, body } = await getEventV4(runId, eventId, config); + // Same shape as a LIST frame — splice the body bytes into + // eventData[payloadField] in buildEventFromV4. + return buildEventFromV4(event, body, resolveData); } export async function getWorkflowRunEvents( params: ListEventsParams | ListEventsByCorrelationIdParams, config?: APIConfig ): Promise> { - const searchParams = new URLSearchParams(); - const { pagination, resolveData = DEFAULT_RESOLVE_DATA_OPTION } = params; - let runId: string | undefined; - let correlationId: string | undefined; - if ('runId' in params) { - runId = params.runId; - } else { - correlationId = params.correlationId; - } - - if (!runId && !correlationId) { - throw new Error('Either runId or correlationId must be provided'); - } + const wirePagination = { + cursor: pagination?.cursor ?? undefined, + limit: pagination?.limit, + sortOrder: pagination?.sortOrder, + }; - if (pagination?.limit) searchParams.set('limit', pagination.limit.toString()); - if (pagination?.cursor) searchParams.set('cursor', pagination.cursor); - if (pagination?.sortOrder) - searchParams.set('sortOrder', pagination.sortOrder); - if (correlationId) searchParams.set('correlationId', correlationId); - - // Always send 'lazy' to the server to avoid memory pressure from resolving - // all refs in memory. When resolveData is 'all', we hydrate refs client-side - // via individual ref resolution requests. - searchParams.set('remoteRefBehavior', 'lazy'); - - const queryString = searchParams.toString(); - const query = queryString ? `?${queryString}` : ''; - const endpoint = correlationId - ? `/v2/events${query}` - : `/v3/runs/${encodeURIComponent(runId!)}/events${query}`; - - let refResolveConcurrency: number | undefined; - const response = (await makeRequest({ - endpoint, - options: { method: 'GET' }, - config, - schema: PaginatedResponseSchema(EventWithRefsSchema), - onResponse: (res) => { - const header = res.headers.get('x-ref-resolve-concurrency'); - if (header) { - const parsed = parseInt(header, 10); - if (!Number.isNaN(parsed) && parsed > 0) { - refResolveConcurrency = parsed; - } - } - }, - })) as PaginatedResponse; + const result = await ('correlationId' in params + ? getEventsByCorrelationIdV4(params.correlationId, wirePagination, config) + : getWorkflowRunEventsV4(params.runId, wirePagination, config)); - if (resolveData === 'all') { - // Hydrate refs client-side: resolve all ref descriptors in parallel - const hydratedEvents = await hydrateEventRefs( - response.data, - config, - refResolveConcurrency - ); + const events = result.events.map((listed) => + buildEventFromV4(listed.event, listed.body, resolveData) + ); - // Re-parse hydrated events through EventSchema to apply type coercions - // (e.g., z.coerce.date() for resumeAt) that EventWithRefsSchema skips. - // Use safeParse to gracefully handle any events that don't match a known - // type — pass them through as-is rather than failing the entire request. - let coercionFailures = 0; - const validatedEvents = hydratedEvents.map((event: any) => { - const result = EventSchema.safeParse(event); - if (!result.success) coercionFailures++; - return result.success ? result.data : event; - }); - if (coercionFailures > 0) { - console.warn( - `[world-vercel] EventSchema coercion failed for ${coercionFailures}/${hydratedEvents.length} events` - ); - } - - return { - ...response, - data: validatedEvents, - }; - } - - // resolveData === 'none': strip eventData and eventDataRef return { - ...response, - data: response.data.map((event: any) => - stripEventAndLegacyRefs(event, resolveData) - ), - }; + data: events, + cursor: result.next ?? null, + hasMore: Boolean(result.next), + } as PaginatedResponse; } -// Event types that require the hook to already exist — a 404 on these -// means the hook was already disposed or never created. -const hookEventsRequiringExistence = new Set([ - 'hook_disposed', - 'hook_received', -]); - export async function createWorkflowRunEvent( id: string | null, data: AnyEventRequest, @@ -387,10 +490,7 @@ export async function createWorkflowRunEvent( try { return await createWorkflowRunEventInner(id, data, params, config); } catch (err) { - // Translate 404 to HookNotFoundError for hook-related events. - // makeRequest() throws a generic WorkflowWorldError for all 404s; - // on the hook_disposed / hook_received path a 404 means the hook - // was already disposed or never created. + // 404 on hook_disposed / hook_received → already-disposed hook. if ( hookEventsRequiringExistence.has(data.eventType) && WorkflowWorldError.is(err) && @@ -409,99 +509,110 @@ async function createWorkflowRunEventInner( params?: CreateEventParams, config?: APIConfig ): Promise { - const resolveData = params?.resolveData ?? DEFAULT_RESOLVE_DATA_OPTION; - - const v1Compat = params?.v1Compat ?? false; - if (v1Compat) { + // v1Compat: caller wants the legacy entity-mutation endpoints (used + // for legacy spec-version runs that predate event sourcing). Keep all + // of this on v1 routes — the v4 protocol does not cover legacy runs. + if (params?.v1Compat) { if (data.eventType === 'run_cancelled' && id) { const run = await cancelWorkflowRunV1(id, params, config); return { run: run as WorkflowRun }; - } else if (data.eventType === 'run_created') { + } + if (data.eventType === 'run_created') { const run = await createWorkflowRunV1(data.eventData, config); return { run }; } + if (id === null) { + throw new WorkflowWorldError( + `world-vercel: v1Compat=true requires a runId for ${data.eventType}`, + { status: 400 } + ); + } + // Catch-all for the remaining event types the runtime still emits + // against legacy runs (hook_received via resumeHook, wait_completed + // via wakeUpRun): POST to the legacy v1 events endpoint, same as the + // pre-v4 client did. const wireResult = await makeRequest({ - endpoint: `/v1/runs/${encodeURIComponent(id!)}/events`, + endpoint: `/v1/runs/${encodeURIComponent(id)}/events`, options: { method: 'POST' }, data, config, schema: EventSchema, }); - return { event: wireResult }; } - // Validate client-provided runId timestamp is within acceptable threshold - if (data.eventType === 'run_created' && id) { + if (id === null) { + throw new WorkflowWorldError( + 'world-vercel v4: createWorkflowRunEvent requires a client-generated ' + + 'runId for run_created (the runId is part of the payload storage ' + + 'ref key). Generate a wrun_ ULID before calling.', + { status: 400 } + ); + } + + // Defensive check for client-generated run_created IDs that ride too + // far ahead of wall-clock time — same threshold the v3 path enforced. + if (data.eventType === 'run_created') { const validationError = validateUlidTimestamp(id, 'wrun_'); if (validationError) { throw new WorkflowWorldError(validationError, { status: 400 }); } } - // For run_created events, runId may be client-provided or null - const runIdPath = id === null ? 'null' : encodeURIComponent(id); - const remoteRefBehavior = eventsNeedingResolve.has(data.eventType) ? 'resolve' : 'lazy'; - // Use the strict schema when the server resolves all refs (preserves the - // WorkflowRunSchema discriminated union), and the loose wire schema when - // the server returns lazy refs (error may be a string or undefined). - if (remoteRefBehavior === 'resolve') { - const wireResult = await makeRequest({ - endpoint: `/v3/runs/${runIdPath}/events`, - options: { method: 'POST' }, - data: { - ...data, - remoteRefBehavior, - ...(params?.requestId ? { vercelId: params.requestId } : {}), - }, - config, - schema: EventResultResolveWireSchema, - }); + const { payload, meta } = splitEventDataForV4(data); - return { - event: wireResult.event - ? stripEventAndLegacyRefs(wireResult.event, resolveData) - : undefined, - run: wireResult.run, - step: wireResult.step ? deserializeStep(wireResult.step) : undefined, - hook: wireResult.hook, - events: wireResult.events, - cursor: wireResult.cursor, - hasMore: wireResult.hasMore, - }; - } - - const wireResult = await makeRequest({ - endpoint: `/v3/runs/${runIdPath}/events`, - options: { method: 'POST' }, - data: { - ...data, - remoteRefBehavior, + const result = await createWorkflowRunEventV4( + { + runId: id, + eventType: data.eventType, + specVersion: data.specVersion ?? 2, + ...(data.correlationId ? { correlationId: data.correlationId } : {}), ...(params?.requestId ? { vercelId: params.requestId } : {}), + remoteRefBehavior, + payload, + ...meta, }, - config, - schema: EventResultLazyWireSchema, - }); - - // Transform wire format to interface format. - // The run entity from the wire may have error as a string (legacy) or - // undefined (lazy ref mode), so deserializeError normalizes it into the - // StructuredError shape expected by WorkflowRun consumers. + config + ); + + // The server already CBOR-decoded into result.body — just thread the + // fields through. Step has a wire-format adapter; runs use the + // pass-through deserializeError helper (run/step dates arrive as real + // Dates — the server's entity getters convert before CBOR-encoding). + // The returned `event` and preloaded `events` go through + // coerceEventDates: they can be read back from the backing store + // server-side (e.g. the run_started TTFB preload queries the event + // log), where nested eventData dates are ISO strings — same coercion + // the GET/LIST path applies, and the v3 path applied via its zod wire + // schemas. + // The returned event honors the caller's resolveData: 'none' strips + // payload fields, matching the v3 path's stripEventAndLegacyRefs + // behavior and the Storage contract. + const resolveData = params?.resolveData ?? DEFAULT_RESOLVE_DATA_OPTION; + const body = result.body; return { - event: wireResult.event - ? stripEventAndLegacyRefs(wireResult.event, resolveData) + event: body.event + ? stripEventDataRefs( + coerceEventDates(body.event as Record), + resolveData + ) + : undefined, + run: body.run + ? deserializeError(body.run as Record) + : undefined, + step: body.step + ? deserializeStep(body.step as Parameters[0]) : undefined, - run: wireResult.run - ? deserializeError(wireResult.run) + hook: body.hook as EventResult['hook'], + wait: body.wait as EventResult['wait'], + events: body.events + ? (body.events as Record[]).map(coerceEventDates) : undefined, - step: wireResult.step ? deserializeStep(wireResult.step) : undefined, - hook: wireResult.hook, - events: wireResult.events, - cursor: wireResult.cursor, - hasMore: wireResult.hasMore, + cursor: body.cursor ?? undefined, + hasMore: body.hasMore, }; } diff --git a/packages/world-vercel/src/frames.test.ts b/packages/world-vercel/src/frames.test.ts new file mode 100644 index 0000000000..966bab97ab --- /dev/null +++ b/packages/world-vercel/src/frames.test.ts @@ -0,0 +1,217 @@ +import { decode, encode } from 'cbor-x'; +import { describe, expect, it } from 'vitest'; +import { + type DecodedFrame, + decodeFrames, + encodeFrame, + V4_FRAME_CONTENT_TYPE, +} from './frames.js'; + +/** Server's wire encoder (matches the world-vercel backend's v4 end-frame + * helper). Re-implemented here so the client tests don't depend on + * importing from another package. */ +function encodeEndFrame(next?: string): Uint8Array { + const meta: Record = { _end: 1 }; + if (next) meta.next = next; + return encodeFrame(meta, new Uint8Array(0)); +} + +/** Build a ReadableStream that yields `payload` in fixed-size chunks. Used to + * stress chunk-boundary handling in the decoder. */ +function streamOf(payload: Uint8Array, chunkSize: number) { + let offset = 0; + return new ReadableStream({ + pull(controller) { + if (offset >= payload.byteLength) { + controller.close(); + return; + } + const end = Math.min(offset + chunkSize, payload.byteLength); + controller.enqueue(payload.subarray(offset, end)); + offset = end; + }, + }); +} + +async function drainFrames( + source: ReadableStream +): Promise { + const out: DecodedFrame[] = []; + for await (const f of decodeFrames(source)) out.push(f); + return out; +} + +describe('encodeFrame', () => { + it('produces the canonical wire layout', () => { + const meta = { eventId: 'evnt_abc', n: 42 }; + const body = new Uint8Array([1, 2, 3, 4, 5]); + const frame = encodeFrame(meta, body); + const view = new DataView(frame.buffer); + const metaLen = view.getUint32(0, false); + expect(decode(frame.subarray(4, 4 + metaLen))).toEqual(meta); + const bodyLen = view.getUint32(4 + metaLen, false); + expect(bodyLen).toBe(body.byteLength); + expect(frame.subarray(4 + metaLen + 4)).toEqual(body); + }); +}); + +describe('decodeFrames', () => { + it('round-trips a single frame', async () => { + const meta = { eventType: 'run_created', eventId: 'evnt_1' }; + const body = new TextEncoder().encode('{"hello":"world"}'); + const stream = streamOf( + new Uint8Array([...encodeFrame(meta, body), ...encodeEndFrame()]), + 4096 + ); + const frames = await drainFrames(stream); + expect(frames).toHaveLength(2); + expect(frames[0].meta).toEqual(meta); + expect(frames[0].body).toEqual(body); + expect(frames[1].meta).toEqual({ _end: 1 }); + }); + + it('round-trips multiple frames with cursor', async () => { + const body1 = new TextEncoder().encode('one'); + const body2 = new Uint8Array(64).fill(0xab); + const parts = [ + encodeFrame({ eventId: 'a' }, body1), + encodeFrame({ eventId: 'b' }, body2), + encodeEndFrame('cursor-xyz'), + ]; + let total = 0; + for (const p of parts) total += p.byteLength; + const flat = new Uint8Array(total); + let off = 0; + for (const p of parts) { + flat.set(p, off); + off += p.byteLength; + } + const frames = await drainFrames(streamOf(flat, 256)); + expect(frames).toHaveLength(3); + expect(frames[0].meta).toEqual({ eventId: 'a' }); + expect(frames[0].body).toEqual(body1); + expect(frames[1].meta).toEqual({ eventId: 'b' }); + expect(frames[1].body).toEqual(body2); + expect(frames[2].meta).toEqual({ _end: 1, next: 'cursor-xyz' }); + expect(frames[2].body.byteLength).toBe(0); + }); + + it('handles delivery in 1-byte chunks (worst-case chunk boundary)', async () => { + const body = new Uint8Array(1024); + for (let i = 0; i < body.length; i++) body[i] = (i * 13 + 5) & 0xff; + const flat = new Uint8Array([ + ...encodeFrame({ eventType: 'big', n: 99 }, body), + ...encodeEndFrame(), + ]); + const frames = await drainFrames(streamOf(flat, 1)); + expect(frames).toHaveLength(2); + expect(frames[0].meta).toEqual({ eventType: 'big', n: 99 }); + expect(frames[0].body).toEqual(body); + expect(frames[1].meta).toEqual({ _end: 1 }); + }); + + it('handles a 64 KB body split across many small chunks', async () => { + const body = new Uint8Array(64 * 1024); + for (let i = 0; i < body.length; i++) body[i] = (i * 7) & 0xff; + const flat = new Uint8Array([ + ...encodeFrame({ eventId: 'big' }, body), + ...encodeEndFrame(), + ]); + const frames = await drainFrames(streamOf(flat, 37)); + expect(frames[0].body.byteLength).toBe(body.byteLength); + expect(frames[0].body[0]).toBe(body[0]); + expect(frames[0].body[body.length - 1]).toBe(body[body.length - 1]); + }); + + it('handles frames whose body contains bytes that look like length prefixes', async () => { + // 0xff bytes that could trip up a parser that scans for u32 patterns + // rather than honoring the explicit length prefixes. + const body = new Uint8Array(32).fill(0xff); + const flat = new Uint8Array([ + ...encodeFrame({ eventId: 'tricky' }, body), + ...encodeEndFrame(), + ]); + const frames = await drainFrames(streamOf(flat, 7)); + expect(frames[0].body).toEqual(body); + }); + + it('handles back-to-back frames in a single chunk', async () => { + const flat = new Uint8Array([ + ...encodeFrame({ id: 1 }, new Uint8Array([10, 20, 30])), + ...encodeFrame({ id: 2 }, new Uint8Array([40, 50, 60])), + ...encodeFrame({ id: 3 }, new Uint8Array(0)), + ...encodeEndFrame(), + ]); + const frames = await drainFrames(streamOf(flat, flat.byteLength)); + expect(frames).toHaveLength(4); + expect(frames[2].body.byteLength).toBe(0); + expect(frames[3].meta._end).toBe(1); + }); + + it('throws when the stream ends mid-frame', async () => { + const partial = encodeFrame({ x: 1 }, new Uint8Array(100)).slice(0, 20); + const stream = streamOf(partial, 1024); + await expect(drainFrames(stream)).rejects.toThrow(/truncated/); + }); + + it('preserves CBOR types in meta (numbers, booleans, arrays)', async () => { + const meta = { + eventId: 'mix', + attempt: 4, + isWebhook: true, + tags: ['a', 'b'], + n: 12345, + }; + const flat = new Uint8Array([ + ...encodeFrame(meta, new Uint8Array(0)), + ...encodeEndFrame(), + ]); + const frames = await drainFrames(streamOf(flat, 32)); + expect(frames[0].meta).toEqual(meta); + expect(typeof frames[0].meta.attempt).toBe('number'); + expect(typeof frames[0].meta.isWebhook).toBe('boolean'); + expect(Array.isArray(frames[0].meta.tags)).toBe(true); + }); +}); + +describe('decodeFrames from an AsyncIterable source', () => { + // Regression guard: production feeds undici's response body (an + // AsyncIterable of Buffer chunks) into decodeFrames directly. The + // previous node:stream Readable.toWeb conversion crashed in Next.js + // webpack server bundles (`(await import('node:stream')).Readable` is + // undefined there), so the decoder must not require a Web stream. + async function* chunked(payload: Uint8Array, chunkSize: number) { + for (let offset = 0; offset < payload.byteLength; offset += chunkSize) { + // Yield Buffer (not Uint8Array) chunks, like undici does. + yield Buffer.from( + payload.subarray( + offset, + Math.min(offset + chunkSize, payload.byteLength) + ) + ); + } + } + + it('round-trips frames from an async generator of Buffer chunks', async () => { + const body = new Uint8Array([9, 8, 7]); + const flat = new Uint8Array([ + ...encodeFrame({ eventId: 'evnt_1', eventType: 'run_created' }, body), + ...encodeEndFrame('cursor-1'), + ]); + const frames: DecodedFrame[] = []; + for await (const f of decodeFrames(chunked(flat, 3))) frames.push(f); + expect(frames).toHaveLength(2); + expect(frames[0].meta).toEqual({ + eventId: 'evnt_1', + eventType: 'run_created', + }); + expect(frames[0].body).toEqual(body); + expect(frames[1].meta).toEqual({ _end: 1, next: 'cursor-1' }); + }); +}); + +describe('V4_FRAME_CONTENT_TYPE', () => { + it('matches the server-side content type', () => { + expect(V4_FRAME_CONTENT_TYPE).toBe('application/vnd.workflow.v4-frames'); + }); +}); diff --git a/packages/world-vercel/src/frames.ts b/packages/world-vercel/src/frames.ts new file mode 100644 index 0000000000..b8c041e4df --- /dev/null +++ b/packages/world-vercel/src/frames.ts @@ -0,0 +1,130 @@ +/** + * Length-prefixed binary frame codec for the v4 list-events response. + * + * Mirrors the server-side encoder in the world-vercel backend. Wire format: + * + * list-response := frame* end-frame + * frame := u32_be(meta_len) || cbor_meta || u32_be(body_len) || body_bytes + * end-frame := u32_be(meta_len) || cbor_meta {_end: 1, next?: string} || u32_be(0) + */ + +import { decode, encode } from 'cbor-x'; + +export const V4_FRAME_CONTENT_TYPE = 'application/vnd.workflow.v4-frames'; + +export interface DecodedFrame { + meta: Record; + body: Uint8Array; +} + +/** Test/utility: encode a complete frame. Production server uses prefix + * + streaming body. */ +export function encodeFrame( + meta: Record, + body: Uint8Array +): Uint8Array { + const metaBytes = new Uint8Array(encode(meta)); + const out = new Uint8Array(4 + metaBytes.byteLength + 4 + body.byteLength); + const view = new DataView(out.buffer); + view.setUint32(0, metaBytes.byteLength, false); + out.set(metaBytes, 4); + view.setUint32(4 + metaBytes.byteLength, body.byteLength, false); + out.set(body, 4 + metaBytes.byteLength + 4); + return out; +} + +/** + * Async-iterable parser for a frame stream. Yields one `DecodedFrame` + * per frame in source order, terminating at the sentinel frame whose + * meta contains `_end: 1`. The sentinel frame itself IS yielded — the + * caller inspects `meta._end` to detect end-of-stream and reads + * `meta.next` for the pagination cursor. + * + * Accepts any async iterable of byte chunks (undici response bodies, + * Node Readables) as well as a Web ReadableStream. Notably it must NOT + * require a `node:stream` conversion: `Readable.toWeb` via dynamic + * `import('node:stream')` resolves to an empty namespace in Next.js + * webpack server bundles and crashes at runtime. + * + * Survives arbitrary chunk boundaries from the source stream, including + * splits that fall in the middle of a u32 length prefix or in the + * middle of the CBOR meta block. + */ +export async function* decodeFrames( + source: AsyncIterable | ReadableStream +): AsyncGenerator { + const chunks = + Symbol.asyncIterator in source + ? (source as AsyncIterable)[Symbol.asyncIterator]() + : readerToIterator((source as ReadableStream).getReader()); + // Accumulating buffer of bytes we've read but not yet consumed. + let buffer = new Uint8Array(0); + + const refill = async (needed: number): Promise => { + while (buffer.byteLength < needed) { + const { done, value } = await chunks.next(); + if (done) return false; + if (!value || value.byteLength === 0) continue; + const next = new Uint8Array(buffer.byteLength + value.byteLength); + next.set(buffer, 0); + next.set(value, buffer.byteLength); + buffer = next; + } + return true; + }; + + const take = (n: number): Uint8Array => { + const out = buffer.subarray(0, n); + buffer = buffer.subarray(n); + return out; + }; + + while (true) { + if (!(await refill(4))) return; + const metaLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( + 0, + false + ); + take(4); + + if (!(await refill(metaLen))) { + throw new Error('decodeFrames: truncated meta block'); + } + const meta = decode(take(metaLen)) as Record; + + if (!(await refill(4))) { + throw new Error('decodeFrames: truncated body length'); + } + const bodyLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( + 0, + false + ); + take(4); + + if (bodyLen > 0) { + if (!(await refill(bodyLen))) { + throw new Error('decodeFrames: truncated body bytes'); + } + // Slice (not subarray) so the yielded body owns its bytes — + // subsequent reads into the buffer won't overwrite it. + yield { meta, body: buffer.slice(0, bodyLen) }; + take(bodyLen); + } else { + yield { meta, body: new Uint8Array(0) }; + } + + if (meta._end === 1) return; + } +} + +/** Adapt a Web ReadableStream reader to the async-iterator protocol for + * runtimes where ReadableStream itself is not async-iterable. */ +async function* readerToIterator( + reader: ReadableStreamDefaultReader +): AsyncGenerator { + while (true) { + const { done, value } = await reader.read(); + if (done) return; + if (value) yield value; + } +} diff --git a/packages/world-vercel/src/refs.test.ts b/packages/world-vercel/src/refs.test.ts deleted file mode 100644 index 04537fae14..0000000000 --- a/packages/world-vercel/src/refs.test.ts +++ /dev/null @@ -1,333 +0,0 @@ -import { encode } from 'cbor-x'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; - -const { mockFetch } = vi.hoisted(() => ({ - mockFetch: vi.fn(), -})); -vi.stubGlobal('fetch', mockFetch); -vi.mock('./http-client.js', () => ({ - getDispatcher: vi.fn().mockReturnValue({}), -})); - -// Mock the auth flow used by getHttpConfig so we don't hit OIDC endpoints. -vi.mock('./utils.js', async () => { - const actual = - await vi.importActual('./utils.js'); - return { - ...actual, - getHttpConfig: vi.fn().mockResolvedValue({ - baseUrl: 'https://workflow-server.test', - headers: new Headers(), - }), - }; -}); - -import type { RefDescriptor } from './refs.js'; -import { resolveRefDescriptor } from './refs.js'; - -const TEST_RUN_ID = 'wrun_01TEST00000000000000000000'; -const TEST_REF = `s3rf:team_o:prj_p:production:${TEST_RUN_ID}:wf:01TEST`; - -function s3RemoteRef(ref: string = TEST_REF): RefDescriptor { - return { _type: 'RemoteRef', _ref: ref }; -} - -describe('resolveRefDescriptor', () => { - beforeEach(() => { - mockFetch.mockReset(); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('decodes CBOR payloads on the happy path', async () => { - const payload = { value: 'ok', padding: 'x'.repeat(100) }; - const encoded = encode(payload); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': String(encoded.byteLength), - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toEqual(payload); - }); - - it('returns Uint8Array for application/octet-stream payloads', async () => { - const payload = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8]); - mockFetch.mockResolvedValueOnce( - new Response(payload, { - status: 200, - headers: { - 'Content-Type': 'application/octet-stream', - 'Content-Length': String(payload.byteLength), - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toBeInstanceOf(Uint8Array); - expect(Array.from(result as Uint8Array)).toEqual(Array.from(payload)); - }); - - it('throws when the server returns a zero-byte 200 (CBOR Content-Type)', async () => { - // This is the production failure mode we're guarding against. A 200 - // with an empty body would otherwise be passed downstream as a - // zero-length Uint8Array / decoded as undefined, then corrupt the - // workflow's event-log replay. - mockFetch.mockResolvedValueOnce( - new Response(new Uint8Array(0), { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': '0', - }, - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/zero-byte body/); - }); - - it('throws when the server returns a zero-byte 200 (octet-stream Content-Type)', async () => { - mockFetch.mockResolvedValueOnce( - new Response(new Uint8Array(0), { - status: 200, - headers: { - 'Content-Type': 'application/octet-stream', - 'Content-Length': '0', - }, - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/zero-byte body/); - }); - - it('throws when actual body length disagrees with Content-Length', async () => { - // Simulate a truncated response: server declared 1024 bytes but only - // 16 actually came through (e.g. an upstream proxy aborted the - // stream mid-flight). Without this check we'd CBOR-decode the - // truncated bytes and either fail with a confusing CBOR error or, - // worse, decode to a structurally valid but semantically wrong - // value. - const truncated = new Uint8Array(16); - mockFetch.mockResolvedValueOnce( - new Response(truncated, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': '1024', - }, - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/length mismatch/); - }); - - it('skips the length check for compressed (Content-Encoding) responses', async () => { - // fetch/undici transparently decompresses gzip/br bodies but leaves - // Content-Length describing the *encoded* (compressed) size. The - // decompressed byteLength therefore legitimately differs from the - // header, and without skipping the check a valid compressed ref would - // be wrongly rejected as a phantom truncation. - const payload = { value: 'ok', padding: 'x'.repeat(100) }; - const encoded = encode(payload); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - // Declared (compressed) length is much smaller than the - // decompressed body we actually receive. - 'Content-Length': '20', - 'Content-Encoding': 'gzip', - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toEqual(payload); - }); - - it('still enforces the length check for identity Content-Encoding', async () => { - // `Content-Encoding: identity` means no transform was applied, so the - // declared length is directly comparable and a mismatch is still a - // real truncation. - const truncated = new Uint8Array(16); - mockFetch.mockResolvedValueOnce( - new Response(truncated, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': '1024', - 'Content-Encoding': 'identity', - }, - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/length mismatch/); - }); - - it('throws when a binary body is shorter than the format-prefix length (with Content-Length)', async () => { - // The SDK guarantees a 4-byte format prefix on every stored binary - // ref payload. A 1-3 byte octet-stream body — even one that "agrees" - // with the declared Content-Length — would still deterministically - // fail downstream replay with "Data too short to contain format - // prefix". We catch it at the transport boundary. - const tooShort = new Uint8Array([0x01, 0x02, 0x03]); - mockFetch.mockResolvedValueOnce( - new Response(tooShort, { - status: 200, - headers: { - 'Content-Type': 'application/octet-stream', - 'Content-Length': '3', - }, - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/truncated 3-byte binary body/); - }); - - it('throws when a binary body is shorter than the format-prefix length (no Content-Length)', async () => { - // Same as above but for chunked transfer where Content-Length is - // absent. This is the case the Content-Length validator can't see, - // so the minimum-length defense is what protects us. Without it, - // a 1–3 byte truncated binary response in chunked mode would still - // flow downstream and trigger the same "Data too short" failure that - // poisons the in-memory event log. - const tooShort = new Uint8Array([0xfa]); - mockFetch.mockResolvedValueOnce( - new Response(tooShort, { - status: 200, - headers: new Headers({ 'Content-Type': 'application/octet-stream' }), - }) - ); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/truncated 1-byte binary body/); - }); - - it('decodes a 1-byte CBOR primitive (the 4-byte minimum is binary-only)', async () => { - // The server stores non-binary values as raw CBOR and CBOR - // primitives (true/false/null/small ints) encode to a single byte. - // The 4-byte format-prefix minimum must NOT reject these — it only - // applies to application/octet-stream binary payloads. - const encoded = encode(true); - expect(encoded.byteLength).toBe(1); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': '1', - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toBe(true); - }); - - it('ignores a malformed Content-Length header instead of misreporting truncation', async () => { - // Some upstream paths could in theory emit a non-numeric or - // otherwise malformed Content-Length (e.g. proxy bugs). parseInt - // would happily turn "not-a-number" into NaN (surfacing a phantom - // "truncated" error) or "12junk" into 12 (a false mismatch). We only - // accept a plain run of digits and treat anything else as absent. - const payload = { ok: true }; - const encoded = encode(payload); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': 'not-a-number', - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toEqual(payload); - }); - - it('ignores a numeric-prefixed Content-Length instead of fabricating a mismatch', async () => { - // parseInt("12junk") === 12, which could fabricate a phantom - // length-mismatch error against a perfectly valid body. The strict - // all-digits check treats this as absent. - const payload = { ok: true }; - const encoded = encode(payload); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { - status: 200, - headers: { - 'Content-Type': 'application/cbor', - 'Content-Length': `${encoded.byteLength}junk`, - }, - }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toEqual(payload); - }); - - it('still decodes when Content-Length header is absent (transfer-encoding: chunked)', async () => { - // Some upstream paths drop the Content-Length header (chunked - // transfer encoding). In that case we have nothing to validate - // against, so only the minimum-length check applies. - const payload = { ok: true }; - const encoded = encode(payload); - const headers = new Headers({ 'Content-Type': 'application/cbor' }); - mockFetch.mockResolvedValueOnce( - new Response(encoded, { status: 200, headers }) - ); - - const result = await resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID); - - expect(result).toEqual(payload); - }); - - it('throws WorkflowWorldError when the server returns a non-2xx status', async () => { - mockFetch.mockResolvedValueOnce(new Response('not found', { status: 404 })); - - await expect( - resolveRefDescriptor(s3RemoteRef(), TEST_RUN_ID) - ).rejects.toThrow(/HTTP 404/); - }); - - it('decodes inline dbrf refs without making a network request', async () => { - const payload = { inline: true }; - const encoded = encode(payload); - const ref: RefDescriptor = { - _type: 'RemoteRef', - _ref: 'dbrf:team_o:prj_p:production:wrun_x:wf:01INLINE', - _data: Buffer.from(encoded).toString('base64'), - _ct: 'application/cbor', - }; - - const result = await resolveRefDescriptor(ref, TEST_RUN_ID); - - expect(result).toEqual(payload); - expect(mockFetch).not.toHaveBeenCalled(); - }); -}); diff --git a/packages/world-vercel/src/refs.ts b/packages/world-vercel/src/refs.ts deleted file mode 100644 index 0c7cba58fb..0000000000 --- a/packages/world-vercel/src/refs.ts +++ /dev/null @@ -1,352 +0,0 @@ -import type { Span } from '@opentelemetry/api'; -import { WorkflowWorldError } from '@workflow/errors'; -import { decode } from 'cbor-x'; -import { getDispatcher } from './http-client.js'; -import { - ErrorType, - getSpanKind, - HttpRequestMethod, - HttpResponseStatusCode, - PeerService, - trace, - UrlFull, -} from './telemetry.js'; -import { type APIConfig, getHttpConfig } from './utils.js'; - -/** - * A ref descriptor as returned by workflow-server when `remoteRefBehavior=lazy`. - * Matches the server-side `RefDescriptor` type in `lib/data/remote-ref.ts`. - */ -export interface RefDescriptor { - _type: 'RemoteRef'; - _ref: string; - /** Base64-encoded inline payload. Present only for dbrf: (inline) refs. */ - _data?: string; - /** Content type of the inline payload. Present only for dbrf: refs. */ - _ct?: string; -} - -/** - * Checks if a value is a RefDescriptor object. - */ -export function isRefDescriptor(value: unknown): value is RefDescriptor { - return ( - typeof value === 'object' && - value !== null && - '_type' in value && - '_ref' in value && - typeof (value as { _ref: unknown })._ref === 'string' && - (value as { _type: string })._type === 'RemoteRef' - ); -} - -/** - * Maximum number of concurrent ref resolution requests. - * Limits peak concurrency to avoid overwhelming the server. - */ -const REF_RESOLVE_CONCURRENCY = 10; - -/** - * Minimum number of bytes a stored *binary* (octet-stream) ref payload can - * ever be. - * - * The SDK writes every binary ref payload with a 4-byte format prefix (see - * `FORMAT_PREFIX_LENGTH` / `encodeWithFormatPrefix` in - * `@workflow/core/src/serialization/format.ts`). A 1–3 byte - * `application/octet-stream` body is therefore never a valid stored binary - * ref — it indicates a server-side corruption or a transport-layer - * truncation (proxy drop, edge-cache miss returning a partial 200, abort - * during streaming, etc.). - * - * Without a transport-layer guard, those bytes would flow into the - * workflow's deterministic event-log replay and fail downstream with - * `Data too short to contain format prefix: expected at least 4 bytes, got N`. - * By that point the in-memory event snapshot is already poisoned: the same - * failure replays deterministically forever, downstream `resumeHook()` - * calls surface as `Hook not found`, and the run only unsticks when - * stale-run cleanup terminates the sandbox. - * - * This minimum does NOT apply to `application/cbor` refs: the server still - * stores non-binary values as raw CBOR (`S3RemoteRef`/Redis refs encode - * non-`Uint8Array` values and mark them `application/cbor`), and valid CBOR - * primitives like `true`, `0`, or `null` encode to a single byte. - */ -const MIN_BINARY_REF_PAYLOAD_BYTES = 4; - -/** - * Defense-in-depth validation for ref resolve response bodies. - * - * Rejects: - * 1. Zero-byte bodies (never valid for any content type — an empty body - * cannot CBOR-decode and is never a valid stored binary payload). - * 2. `application/octet-stream` bodies shorter than the 4-byte format - * prefix the SDK always writes ({@link MIN_BINARY_REF_PAYLOAD_BYTES}). - * CBOR bodies are exempt because valid CBOR primitives are 1 byte. - * 3. Bodies whose actual length disagrees with a well-formed - * `Content-Length` header (catches truncation when the server - * declared a length we can compare against). - * - * A `Content-Length` that is not a plain run of digits (e.g. `"abc"`, - * `"12junk"`, `"12, 12"`, negative) is treated as absent — surfacing it as - * a "truncated" error would mask the actual cause. The checks above still - * defend against truncation in that case. - * - * The length comparison is also skipped when the response carries a - * non-`identity` `Content-Encoding` (e.g. `gzip`, `br`). `fetch`/`undici` - * transparently decompresses the body but leaves `Content-Length` - * describing the *encoded* size, so the decompressed `byteLength` would not - * match the header and a perfectly valid compressed ref would otherwise be - * rejected as a phantom truncation. - * - * Throws {@link WorkflowWorldError} so the runtime retry layer can treat - * this as a transport-level error instead of poisoning replay. - */ -function assertValidRefBody( - buffer: ArrayBuffer, - ctx: { - ref: string; - url: string; - status: number; - contentType: string; - contentLengthHeader: string | null; - contentEncodingHeader: string | null; - span: Span | undefined; - } -): void { - const { - ref, - url, - status, - contentType, - contentLengthHeader, - contentEncodingHeader, - span, - } = ctx; - const actualLength = buffer.byteLength; - const isBinary = contentType.includes('application/octet-stream'); - - const throwInvalid = (code: string, message: string): never => { - const error = new WorkflowWorldError(message, { url, status, code }); - span?.setAttributes({ ...ErrorType(code) }); - span?.recordException?.(error); - throw error; - }; - - if (actualLength === 0) { - throwInvalid( - 'empty-ref-body', - `Ref resolve returned a zero-byte body for ${ref} (Content-Type=${contentType || ''}). Refusing to corrupt the event log with an empty payload.` - ); - } - - // The 4-byte format-prefix minimum only applies to raw binary payloads. - // CBOR refs can legitimately be 1–3 bytes (e.g. the primitives true/0/null). - if (isBinary && actualLength < MIN_BINARY_REF_PAYLOAD_BYTES) { - throwInvalid( - 'truncated-ref-body', - `Ref resolve returned a truncated ${actualLength}-byte binary body for ${ref} (Content-Type=${contentType}); minimum valid payload is ${MIN_BINARY_REF_PAYLOAD_BYTES} bytes (4-byte format prefix).` - ); - } - - if (contentLengthHeader == null) return; - - // Skip the comparison for compressed responses. fetch/undici transparently - // decompresses the body but leaves Content-Length describing the encoded - // (compressed) size, so the decompressed byteLength legitimately differs - // from the header. An absent or `identity` encoding means no transform was - // applied, so the lengths are directly comparable. - if (contentEncodingHeader != null) { - const encoding = contentEncodingHeader.trim().toLowerCase(); - if (encoding !== '' && encoding !== 'identity') return; - } - - // Only a plain run of digits is a well-formed Content-Length. parseInt - // would happily accept numeric-prefixed garbage ("12junk" -> 12, - // "12, 12" -> 12), which could fabricate a phantom mismatch or silently - // accept an invalid header. Anything else is treated as absent. - if (!/^\d+$/.test(contentLengthHeader)) return; - - const declaredLength = Number.parseInt(contentLengthHeader, 10); - if (declaredLength === actualLength) return; - - throwInvalid( - 'ref-body-length-mismatch', - `Ref resolve body length mismatch for ${ref}: Content-Length=${contentLengthHeader}, actual=${actualLength} bytes. The response body was truncated in transit; refusing to use it.` - ); -} - -/** - * Resolve a single ref descriptor. - * - * For inline refs (dbrf: prefix), the data is decoded locally from the - * descriptor's `_data` field — no network request is needed. - * - * For S3 refs (s3rf:) and Redis refs (kvrf:), a request is made to the - * `GET /v2/runs/:runId/refs` endpoint on workflow-server which returns - * raw CBOR or binary bytes. - * - * @param descriptor - The ref descriptor to resolve - * @param runId - The runId that owns this ref (used in the URL path) - * @param config - API configuration - */ -export async function resolveRefDescriptor( - descriptor: RefDescriptor, - runId: string, - config?: APIConfig -): Promise { - const ref = descriptor._ref; - - // Inline refs (dbrf:) carry their data in the descriptor — decode locally - if (ref.startsWith('dbrf:')) { - if (!descriptor._data) { - throw new Error(`Inline ref descriptor missing _data field: ${ref}`); - } - const contentType = descriptor._ct ?? 'application/cbor'; - const binaryData = Buffer.from(descriptor._data, 'base64'); - if (contentType === 'application/octet-stream') { - // Buffer is a Uint8Array subclass — return directly to avoid a copy. - return binaryData; - } - // CBOR-encoded data — decode it. Buffer is accepted by cbor-x directly. - return decode(binaryData); - } - - // Remote refs (s3rf:, kvrf:) — fetch raw bytes from the server. - // The server returns the raw stored bytes directly (not wrapped in a - // JSON/CBOR envelope). The Content-Type may be 'application/cbor' (for - // CBOR-encoded data) or 'application/octet-stream' (for raw binary like - // Uint8Array). We handle both content types directly rather than going - // through makeRequest, which only handles JSON/CBOR API responses. - const { baseUrl, headers } = await getHttpConfig(config); - const endpoint = `/v2/runs/${encodeURIComponent(runId)}/refs?ref=${encodeURIComponent(ref)}`; - const url = `${baseUrl}${endpoint}`; - - // Set headers that makeRequest normally adds: Accept for content - // negotiation and X-Request-Time to bypass RSC request memoization. - headers.set('Accept', 'application/cbor, application/octet-stream'); - headers.set('X-Request-Time', Date.now().toString()); - - return trace( - 'http GET', - { kind: await getSpanKind('CLIENT') }, - async (span) => { - span?.setAttributes({ - ...HttpRequestMethod('GET'), - ...UrlFull(url), - ...PeerService('workflow-server'), - }); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- undici v7 dispatcher types don't match @types/node's RequestInit - const response = await fetch(url, { - method: 'GET', - headers, - dispatcher: getDispatcher(config), - } as any); - - span?.setAttributes({ - ...HttpResponseStatusCode(response.status), - }); - - if (!response.ok) { - const error = new WorkflowWorldError( - `Failed to resolve ref: HTTP ${response.status}`, - { url, status: response.status } - ); - span?.setAttributes({ - ...ErrorType(`HTTP ${response.status}`), - }); - span?.recordException?.(error); - throw error; - } - - const contentType = response.headers.get('content-type') || ''; - const contentLengthHeader = response.headers.get('content-length'); - const contentEncodingHeader = response.headers.get('content-encoding'); - const buffer = await response.arrayBuffer(); - - assertValidRefBody(buffer, { - ref, - url, - status: response.status, - contentType, - contentLengthHeader, - contentEncodingHeader, - span, - }); - - if (contentType.includes('application/octet-stream')) { - // Raw binary data (e.g., Uint8Array stored by the workflow) - return new Uint8Array(buffer); - } - - // CBOR-encoded data (the common case for structured values) - return decode(new Uint8Array(buffer)); - } - ); -} - -/** - * A ref descriptor paired with the runId that owns it, for resolution. - */ -export interface RefWithRunId { - descriptor: RefDescriptor; - runId: string; -} - -/** - * Resolve multiple ref descriptors in parallel with bounded concurrency. - * - * If any ref in a batch fails, the batch rejects and remaining batches - * are aborted to avoid cascading failures. - * - * @param refs - Array of ref descriptors with their owning runIds - * @param config - API configuration - * @param concurrency - Max concurrent ref resolution requests. Falls back to REF_RESOLVE_CONCURRENCY. - * @returns Array of resolved values in the same order as input - */ -export async function resolveRefDescriptors( - refs: RefWithRunId[], - config?: APIConfig, - concurrency?: number -): Promise { - if (refs.length === 0) return []; - - const limit = concurrency ?? REF_RESOLVE_CONCURRENCY; - - return trace('world.refs.resolve', async (span) => { - const inlineCount = refs.filter((r) => - r.descriptor._ref.startsWith('dbrf:') - ).length; - const remoteCount = refs.length - inlineCount; - - span?.setAttributes({ - 'workflow.refs.total_count': refs.length, - 'workflow.refs.inline_count': inlineCount, - 'workflow.refs.remote_count': remoteCount, - 'workflow.refs.concurrency_limit': limit, - }); - - // Simple case: if under concurrency limit, resolve all at once - if (refs.length <= limit) { - return Promise.all( - refs.map((r) => resolveRefDescriptor(r.descriptor, r.runId, config)) - ); - } - - // Batch with bounded concurrency. If any ref in a batch fails, - // the batch rejects and remaining batches are aborted to avoid - // cascading failures. - const results: unknown[] = new Array(refs.length); - for (let i = 0; i < refs.length; i += limit) { - const batch = refs.slice(i, i + limit); - const batchResults = await Promise.all( - batch.map((r) => resolveRefDescriptor(r.descriptor, r.runId, config)) - ); - for (let j = 0; j < batchResults.length; j++) { - results[i + j] = batchResults[j]; - } - } - - return results; - }); -} From 33cbef81103b94f75ad5f275e46817910b504130 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Mon, 15 Jun 2026 12:13:45 +0200 Subject: [PATCH 2/8] [world-vercel] Carry stable-line structured errors on the v4 wire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v4 wire format assumes the runtime hands `events.create` a dehydrated `Uint8Array` for every payload field. On this line run/step errors are NOT dehydrated — the runtime emits them as a plain string (step_failed / step_retrying) or a `{ message, stack }` object (run_failed). The split's Uint8Array guard therefore threw on every failure event. Route a non-Uint8Array `error` field (plus the sibling `stack`) into the frame meta instead, so the backend rebuilds the same StructuredError the pre-v4 wire produced. On read, the backend returns that StructuredError as the ref's CBOR bytes in the frame body, so decode the structured-error event types back into `eventData.error` — the core step-event reducer reads `.message` / `.stack` off it directly and has no hydrate step for errors on this line. Also drops the native-run-attributes and webhook/system-hook meta fields from the wire allowlist: those eventData fields do not exist on this line's @workflow/world schema, and the schema-derived exhaustiveness guard flags them as stale. Requires the matching backend change to parse `error` / `stack` from the v4 frame meta. Co-Authored-By: Claude Fable 5 --- packages/world-vercel/src/events-v4.ts | 33 ++--- packages/world-vercel/src/events.test.ts | 173 +++++++++++++++++------ packages/world-vercel/src/events.ts | 163 ++++++++++++--------- 3 files changed, 234 insertions(+), 135 deletions(-) diff --git a/packages/world-vercel/src/events-v4.ts b/packages/world-vercel/src/events-v4.ts index 912474431f..65ff0424c0 100644 --- a/packages/world-vercel/src/events-v4.ts +++ b/packages/world-vercel/src/events-v4.ts @@ -69,24 +69,18 @@ export interface CreateEventV4Input { * the step entity for premature-delivery pacing and observability. */ retryAfter?: Date; hookToken?: string; - hookIsWebhook?: boolean; - hookIsSystem?: boolean; errorCode?: string; + /** Structured run/step error carried inline in the frame meta (a plain + * string for step_failed / step_retrying, a `{ message, stack }` object + * for run_failed) when the runtime sends it un-dehydrated. The backend + * rebuilds the same StructuredError the pre-v4 wire produced. Bounded by + * the server's structured-error cap. */ + error?: unknown; + /** Companion stack string for step_failed / step_retrying. */ + stack?: string; /** Arbitrary structured map; rides as a native CBOR object in the * frame meta. Bounded by the server at 2 KB encoded. */ executionContext?: Record; - /** Initial run attributes (run_created, and run_started on the - * resilient-start path). Validated server-side against the attribute - * key/value/count caps. */ - attributes?: Record; - /** attr_set's attribute change list ({key, value|null} entries). */ - changes?: Array>; - /** attr_set's writer provenance ({type:'workflow'} or - * {type:'step', stepId, attempt}). */ - writer?: Record; - /** Opt-in for framework-level callers to write `$`-prefixed reserved - * attribute keys (attr_set / run_created / run_started). */ - allowReservedAttributes?: boolean; } export interface CreateEventV4Result { @@ -134,19 +128,12 @@ function buildPostFrameMeta( if (input.resumeAt !== undefined) meta.resumeAt = input.resumeAt; if (input.retryAfter !== undefined) meta.retryAfter = input.retryAfter; if (input.hookToken !== undefined) meta.hookToken = input.hookToken; - if (input.hookIsWebhook !== undefined) - meta.hookIsWebhook = input.hookIsWebhook; - if (input.hookIsSystem !== undefined) meta.hookIsSystem = input.hookIsSystem; if (input.errorCode !== undefined) meta.errorCode = input.errorCode; + if (input.error !== undefined) meta.error = input.error; + if (input.stack !== undefined) meta.stack = input.stack; if (input.executionContext !== undefined) { meta.executionContext = input.executionContext; } - if (input.attributes !== undefined) meta.attributes = input.attributes; - if (input.changes !== undefined) meta.changes = input.changes; - if (input.writer !== undefined) meta.writer = input.writer; - if (input.allowReservedAttributes !== undefined) { - meta.allowReservedAttributes = input.allowReservedAttributes; - } return meta; } diff --git a/packages/world-vercel/src/events.test.ts b/packages/world-vercel/src/events.test.ts index 22713e1eec..66dfc753e2 100644 --- a/packages/world-vercel/src/events.test.ts +++ b/packages/world-vercel/src/events.test.ts @@ -2,7 +2,12 @@ import type { AnyEventRequest } from '@workflow/world'; import { encode } from 'cbor-x'; import { MockAgent } from 'undici'; import { describe, expect, it } from 'vitest'; -import { createWorkflowRunEvent, splitEventDataForV4 } from './events.js'; +import { + createWorkflowRunEvent, + getWorkflowRunEvents, + splitEventDataForV4, +} from './events.js'; +import { encodeFrame, V4_FRAME_CONTENT_TYPE } from './frames.js'; const ORIGIN = 'https://vercel-workflow.com'; @@ -100,63 +105,88 @@ describe('createWorkflowRunEvent with v1Compat', () => { * nor the frame meta, so a *missing* field can't silently regress. These * runtime tests are the complement: they prove the fields that ARE routed * actually reach the frame meta with the right values and renames. + * + * This line's runtime does not serialize run/step errors through the + * dehydration pipeline — it emits them as a plain string (step_failed / + * step_retrying) or a `{ message, stack }` object (run_failed). The split + * must carry these in the frame meta (not as an opaque body), or the v4 + * write path would throw on the non-Uint8Array error and every failure + * event would die on the wire. */ -describe('splitEventDataForV4 attribute fields', () => { - it('carries attr_set changes/writer/allowReservedAttributes in the frame meta', () => { +describe('splitEventDataForV4 structured errors', () => { + it('routes a step_failed string error + stack into the frame meta (no body)', () => { + const stack = 'Error: boom\n at fn (/app/step.js:10:5)'; const { payload, meta } = splitEventDataForV4({ - eventType: 'attr_set', - correlationId: 'attr_1', - specVersion: 4, - eventData: { - changes: [ - { key: 'phase', value: 'done' }, - { key: 'stale', value: null }, - ], - writer: { type: 'step', stepId: 'step_1', attempt: 2 }, - allowReservedAttributes: true, - }, + eventType: 'step_failed', + correlationId: 'step_1', + specVersion: 2, + eventData: { stepName: 'a-step', error: 'boom', stack }, } as AnyEventRequest); expect(payload).toBeUndefined(); - expect(meta.changes).toEqual([ - { key: 'phase', value: 'done' }, - { key: 'stale', value: null }, - ]); - expect(meta.writer).toEqual({ type: 'step', stepId: 'step_1', attempt: 2 }); - expect(meta.allowReservedAttributes).toBe(true); + expect(meta.error).toBe('boom'); + expect(meta.stack).toBe(stack); + expect(meta.stepName).toBe('a-step'); }); - it('carries initial run attributes on run_created', () => { + it('routes a step_retrying string error + stack + retryAfter into the meta', () => { + const stack = 'Error: flake\n at fn (/app/step.js:11:5)'; + const retryAfter = new Date('2026-06-10T12:00:00.000Z'); const { payload, meta } = splitEventDataForV4({ - eventType: 'run_created', - specVersion: 4, - eventData: { - deploymentId: 'dpl_1', - workflowName: 'wf', - input: new TextEncoder().encode('[]'), - attributes: { sourceAtStart: 'api' }, - }, + eventType: 'step_retrying', + correlationId: 'step_1', + specVersion: 2, + eventData: { stepName: 'a-step', error: 'flake', stack, retryAfter }, } as AnyEventRequest); - expect(payload).toBeInstanceOf(Uint8Array); - expect(meta.attributes).toEqual({ sourceAtStart: 'api' }); - expect(meta.deploymentId).toBe('dpl_1'); - expect(meta.workflowName).toBe('wf'); + expect(payload).toBeUndefined(); + expect(meta.error).toBe('flake'); + expect(meta.stack).toBe(stack); + expect(meta.retryAfter).toEqual(retryAfter); }); - it('carries attributes on resilient-start run_started', () => { - const { meta } = splitEventDataForV4({ - eventType: 'run_started', - specVersion: 4, - eventData: { - input: new TextEncoder().encode('[]'), - deploymentId: 'dpl_1', - workflowName: 'wf', - attributes: { sourceAtStart: 'api' }, - }, + it('routes a run_failed { message, stack } error object into the meta', () => { + const error = { message: 'kaboom', stack: 'Error: kaboom\n at main' }; + const { payload, meta } = splitEventDataForV4({ + eventType: 'run_failed', + specVersion: 2, + eventData: { error, errorCode: 'RUNTIME_ERROR' }, + } as AnyEventRequest); + + expect(payload).toBeUndefined(); + expect(meta.error).toEqual(error); + expect(meta.errorCode).toBe('RUNTIME_ERROR'); + // run_failed carries its stack inside the error object, not as a sibling. + expect(meta.stack).toBeUndefined(); + }); + + it('keeps an already-dehydrated (Uint8Array) error on the body path', () => { + // A runtime that DOES dehydrate errors hands the split a Uint8Array; + // it must stream as the opaque frame body, untouched, with no meta.error. + const bytes = new TextEncoder().encode('dehydrated-error-blob'); + const { payload, meta } = splitEventDataForV4({ + eventType: 'step_failed', + correlationId: 'step_1', + specVersion: 2, + eventData: { stepName: 'a-step', error: bytes }, } as AnyEventRequest); - expect(meta.attributes).toEqual({ sourceAtStart: 'api' }); + expect(payload).toBe(bytes); + expect(meta.error).toBeUndefined(); + }); + + it('still throws on a non-Uint8Array, non-error payload field', () => { + // input/output/result/etc. are always the runtime's dehydrated bytes — + // a plain value there is a real contract violation, not a structured + // error, so the loud guard must stay. + expect(() => + splitEventDataForV4({ + eventType: 'step_completed', + correlationId: 'step_1', + specVersion: 2, + eventData: { stepName: 'a-step', result: { not: 'bytes' } }, + } as unknown as AnyEventRequest) + ).toThrow(/must be a Uint8Array/); }); }); @@ -344,3 +374,58 @@ describe('createWorkflowRunEvent resolveData', () => { agent.assertNoPendingInterceptors(); }); }); + +/** + * Read-side complement to the structured-error write tests. The backend + * materializes run/step errors into a StructuredError and stores it as a + * CBOR-encoded ref; on the v4 read that ref's bytes arrive in the frame + * body. `getWorkflowRunEvents` must decode them back to the { message, + * stack } object the core step-event reducer reads directly — it has no + * hydrate step for errors on this line, so raw bytes would surface as + * "Unknown error" with no stack during replay. + */ +describe('getWorkflowRunEvents structured-error decode', () => { + it('decodes a step_failed CBOR error body back into eventData.error', async () => { + const agent = mockAgent(); + const structuredError = { + message: 'boom', + stack: 'Error: boom\n at fn (/app/step.js:10:5)', + }; + const frames = Buffer.concat([ + encodeFrame( + { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'step_failed', + correlationId: 'step_1', + createdAt: '2026-06-10T00:00:00.000Z', + // `stack` is stripped from inline eventData server-side (it lives + // inside the structuredError ref), so it is absent from the meta. + eventData: { stepName: 'a-step' }, + }, + new Uint8Array(encode(structuredError)) + ), + encodeFrame({ _end: 1 }, new Uint8Array(0)), + ]); + + agent + .get(ORIGIN) + .intercept({ path: '/api/v4/runs/wrun_1/events', method: 'GET' }) + .reply(200, frames, { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + const result = await getWorkflowRunEvents( + { runId: 'wrun_1', resolveData: 'all' }, + { token: 'test-token', dispatcher: agent } + ); + + expect(result.data).toHaveLength(1); + const eventData = ( + result.data[0] as { eventData?: Record } + ).eventData; + // Decoded back to the object form — not the raw CBOR Uint8Array. + expect(eventData?.error).toEqual(structuredError); + agent.assertNoPendingInterceptors(); + }); +}); diff --git a/packages/world-vercel/src/events.ts b/packages/world-vercel/src/events.ts index 1abfca16ab..8168137f9b 100644 --- a/packages/world-vercel/src/events.ts +++ b/packages/world-vercel/src/events.ts @@ -47,6 +47,7 @@ import { validateUlidTimestamp, type WorkflowRun, } from '@workflow/world'; +import { decode } from 'cbor-x'; import { createWorkflowRunEventV4, type DecodedV4Event, @@ -147,6 +148,20 @@ const hookEventsRequiringExistence = new Set([ 'hook_received', ]); +// Event types whose payload field carries a backend-materialized +// StructuredError ({ message, stack }) rather than the runtime's opaque +// dehydrated bytes. This runtime sends run/step errors un-dehydrated (a +// string or a { message, stack } object), so the backend stores them as a +// CBOR-encoded structuredError ref; on read its bytes arrive in the frame +// body and must be CBOR-decoded back to the object the core step-event +// reducer reads `.message` / `.stack` off of — there is no hydrate step +// for errors on this line. Mirrors the meta routing in splitEventDataForV4. +const structuredErrorEventTypes = new Set([ + 'run_failed', + 'step_failed', + 'step_retrying', +]); + // ============================================================================= // Helpers // ============================================================================= @@ -163,19 +178,27 @@ interface SplitEventData { resumeAt?: Date; retryAfter?: Date; hookToken?: string; - hookIsWebhook?: boolean; - hookIsSystem?: boolean; errorCode?: string; + /** + * Structured run/step error carried inline in the frame meta. This + * runtime sends run/step errors as a plain string (step_failed / + * step_retrying) or a `{ message, stack }` object (run_failed) rather + * than a dehydrated SerializedData blob, so there is nothing to stream + * as an opaque body — the backend rebuilds the same StructuredError it + * did on the pre-v4 wire from this meta. (A runtime that dehydrates + * errors into a Uint8Array instead routes them through the frame body + * via `payload`.) + */ + error?: unknown; + /** + * Companion stack string for step_failed / step_retrying, whose `error` + * is a bare message. The backend folds it into the step's + * structuredError. run_failed carries its stack inside the `error` + * object instead. + */ + stack?: string; /** Structured executionContext, included verbatim in frame meta. */ executionContext?: Record; - /** Initial run attributes (run_created / resilient-start run_started). */ - attributes?: Record; - /** attr_set change list, included verbatim in frame meta. */ - changes?: Array>; - /** attr_set writer provenance, included verbatim in frame meta. */ - writer?: Record; - /** Reserved-attribute-key opt-in (attr_set / run_created / run_started). */ - allowReservedAttributes?: boolean; }; } @@ -196,14 +219,10 @@ type MetaSourceField = | 'resumeAt' | 'retryAfter' | 'token' - | 'isWebhook' - | 'isSystem' | 'errorCode' - | 'executionContext' - | 'attributes' - | 'changes' - | 'writer' - | 'allowReservedAttributes'; + // step_failed / step_retrying error stack (sibling of the message string) + | 'stack' + | 'executionContext'; /** * Compile-time guard that the v4 `eventData` wire allowlist is exhaustive @@ -285,15 +304,16 @@ export function splitEventDataForV4(data: AnyEventRequest): SplitEventData { if (typeof eventData.token === 'string') { meta.hookToken = eventData.token; } - if (typeof eventData.isWebhook === 'boolean') { - meta.hookIsWebhook = eventData.isWebhook; - } - if (typeof eventData.isSystem === 'boolean') { - meta.hookIsSystem = eventData.isSystem; - } if (typeof eventData.errorCode === 'string') { meta.errorCode = eventData.errorCode; } + // step_failed / step_retrying carry the error stack as a sibling of the + // (string) error message. It rides in the meta and the backend folds it + // into the step's structuredError. run_failed keeps its stack inside the + // error object, so there is no top-level `stack` to lift there. + if (typeof eventData.stack === 'string') { + meta.stack = eventData.stack; + } if ( eventData.executionContext !== undefined && eventData.executionContext !== null && @@ -304,57 +324,47 @@ export function splitEventDataForV4(data: AnyEventRequest): SplitEventData { unknown >; } - // Native run attributes (spec v4): initial attributes ride on - // run_created (and run_started for resilient start); attr_set carries - // the change list + writer provenance. All of these are structured - // metadata, not user payloads — they ride in the frame meta and the - // server validates them against the attribute caps before - // materializing run.attributes. - if ( - eventData.attributes !== undefined && - eventData.attributes !== null && - typeof eventData.attributes === 'object' - ) { - meta.attributes = eventData.attributes as Record; - } - if (Array.isArray(eventData.changes)) { - meta.changes = eventData.changes as Array>; - } - if ( - eventData.writer !== undefined && - eventData.writer !== null && - typeof eventData.writer === 'object' - ) { - meta.writer = eventData.writer as Record; - } - if (typeof eventData.allowReservedAttributes === 'boolean') { - meta.allowReservedAttributes = eventData.allowReservedAttributes; - } + // Note: native run attributes (the attr_set event and the `attributes` / + // `changes` / `writer` / `allowReservedAttributes` eventData fields) are + // a newer feature that does not exist on this line's @workflow/world + // event schema, so there is nothing to lift here. The exhaustiveness + // guard above keeps this honest — if the schema gains those fields, it + // will fail to compile until they are routed. let payload: Uint8Array | undefined; if (payloadField && payloadField in eventData) { const value = eventData[payloadField]; if (value !== undefined) { - // Payload fields (input / output / result / error / payload / - // metadata) reach this layer already serialized as Uint8Array — the - // runtime calls dehydrateRunError / dehydrateStepReturnValue / etc. - // before invoking events.create. Pass the bytes through unchanged - // so runs.get and the events stream return the same raw form that - // hydrateRunError / hydrateStepIO expect. CBOR-encoding here would - // double-wrap on write and (since runs.get bypasses the v4 frame - // decode) leave the consumer with cbor(Uint8Array) rather than the - // devalue blob it was looking for. - if (!(value instanceof Uint8Array)) { - // Surface non-Uint8Array values loudly — current SDK callers go - // through the dehydrate helpers, so anything else is either a - // legacy caller or a bug. + if (value instanceof Uint8Array) { + // Payload fields (input / output / result / error / payload / + // metadata) normally reach this layer already serialized as + // Uint8Array — the runtime calls the dehydrate helpers before + // invoking events.create. Pass the bytes through unchanged so + // runs.get and the events stream return the same raw form that + // hydrateStepIO etc. expect. CBOR-encoding here would double-wrap + // on write and (since runs.get bypasses the v4 frame decode) leave + // the consumer with cbor(Uint8Array) rather than the devalue blob + // it was looking for. + payload = value; + } else if (payloadField === 'error') { + // This runtime does not dehydrate run/step errors: it sends + // `error` as a plain string (step_failed / step_retrying) or a + // `{ message, stack }` object (run_failed). Carry the structured + // value in the frame meta so the backend rebuilds the same + // StructuredError the pre-v4 wire produced, instead of wrapping it + // as an opaque body the backend would store verbatim. The matching + // read path (buildEventFromV4) decodes it back. + meta.error = value; + } else { + // Any other payload field arriving as a non-Uint8Array is a real + // contract violation (those fields are always dehydrated bytes) — + // surface it loudly rather than silently mis-encoding. throw new TypeError( `world-vercel v4: eventData.${payloadField} for ${data.eventType} ` + `must be a Uint8Array (the runtime's dehydrated wire form); ` + `got ${typeof value === 'object' ? (value === null ? 'null' : ((value as object).constructor?.name ?? typeof value)) : typeof value}.` ); } - payload = value; } } @@ -399,10 +409,13 @@ function coerceEventDates(raw: Record): Event { * Both GET single-event and LIST use the same frame format: meta is the * full event entity with the payload field as a RefDescriptor, body is * the resolved payload bytes (possibly empty). This helper splices the - * body bytes into `eventData[fieldName]` unchanged — the runtime's - * hydrate helpers (hydrateStepIO, hydrateRunError, …) consume the raw - * devalue-with-format-prefix Uint8Array directly. No CBOR decode here, - * symmetric with the pass-through write in `splitEventDataForV4`. + * body bytes into `eventData[fieldName]` — for most event types unchanged, + * so the runtime's hydrate helpers (hydrateStepIO, …) consume the raw + * devalue-with-format-prefix Uint8Array directly, symmetric with the + * pass-through write in `splitEventDataForV4`. The exception is the + * structured-error event types (see `structuredErrorEventTypes`), whose + * body is a CBOR-encoded StructuredError that must be decoded back to the + * { message, stack } object the consumer reads directly. */ function buildEventFromV4( decoded: DecodedV4Event, @@ -413,7 +426,21 @@ function buildEventFromV4( if (payloadBody.byteLength > 0) { const payloadField = payloadFieldFor(decoded.eventType); - if (payloadField) eventData[payloadField] = payloadBody; + if (payloadField) { + if (structuredErrorEventTypes.has(decoded.eventType)) { + // CBOR-decode the materialized StructuredError back to its object + // form. Fall back to the raw bytes if it somehow isn't CBOR, so an + // unexpected encoding degrades to the pre-existing pass-through + // rather than throwing mid-replay. + try { + eventData[payloadField] = decode(payloadBody); + } catch { + eventData[payloadField] = payloadBody; + } + } else { + eventData[payloadField] = payloadBody; + } + } } const raw = { From e622a6b724573dcacdb179fc980e0bf2af51fb24 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Mon, 15 Jun 2026 13:04:05 +0200 Subject: [PATCH 3/8] [world-vercel] Keep hook_created isWebhook on the v4 wire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime emits `isWebhook` on hook_created (the suspension handler) and the backend reads it to mark webhook hooks, which must not be resumable via the public webhook endpoint. The schema-derived wire-allowlist guard had flagged `isWebhook` as stale because this line's @workflow/world HookCreatedEventSchema did not declare it — so it was dropped, breaking webhook hooks on the v4 wire. Declare `isWebhook` on HookCreatedEventSchema (matching the field the runtime already sends and the backend already consumes) and route it through the v4 frame meta. Unlike isWebhook, isSystem / native run attributes / attr_set are genuinely absent from this line's runtime and schema, so they stay off the wire. Co-Authored-By: Claude Fable 5 --- .changeset/v4-events-client.md | 1 + packages/world-vercel/src/events-v4.ts | 4 ++++ packages/world-vercel/src/events.test.ts | 21 +++++++++++++++++++++ packages/world-vercel/src/events.ts | 10 ++++++++++ packages/world/src/events.ts | 5 ++++- 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/.changeset/v4-events-client.md b/.changeset/v4-events-client.md index 99cfda1950..85db839ab3 100644 --- a/.changeset/v4-events-client.md +++ b/.changeset/v4-events-client.md @@ -1,5 +1,6 @@ --- "@workflow/world-vercel": minor +"@workflow/world": patch --- New internal API format: separately encode event metadata from user payloads. Eliminates the need for calling separate endpoints for ref resolution, which improves performance especially on longer runs. diff --git a/packages/world-vercel/src/events-v4.ts b/packages/world-vercel/src/events-v4.ts index 65ff0424c0..6a74509313 100644 --- a/packages/world-vercel/src/events-v4.ts +++ b/packages/world-vercel/src/events-v4.ts @@ -69,6 +69,7 @@ export interface CreateEventV4Input { * the step entity for premature-delivery pacing and observability. */ retryAfter?: Date; hookToken?: string; + hookIsWebhook?: boolean; errorCode?: string; /** Structured run/step error carried inline in the frame meta (a plain * string for step_failed / step_retrying, a `{ message, stack }` object @@ -128,6 +129,9 @@ function buildPostFrameMeta( if (input.resumeAt !== undefined) meta.resumeAt = input.resumeAt; if (input.retryAfter !== undefined) meta.retryAfter = input.retryAfter; if (input.hookToken !== undefined) meta.hookToken = input.hookToken; + if (input.hookIsWebhook !== undefined) { + meta.hookIsWebhook = input.hookIsWebhook; + } if (input.errorCode !== undefined) meta.errorCode = input.errorCode; if (input.error !== undefined) meta.error = input.error; if (input.stack !== undefined) meta.stack = input.stack; diff --git a/packages/world-vercel/src/events.test.ts b/packages/world-vercel/src/events.test.ts index 66dfc753e2..d535c2cb12 100644 --- a/packages/world-vercel/src/events.test.ts +++ b/packages/world-vercel/src/events.test.ts @@ -190,6 +190,27 @@ describe('splitEventDataForV4 structured errors', () => { }); }); +describe('splitEventDataForV4 hook fields', () => { + it('routes hook_created token + isWebhook into the frame meta', () => { + // The runtime marks webhook hooks via eventData.isWebhook; the backend + // reads it to reject public-webhook-endpoint resumption. Dropping it + // from the wire silently breaks that — guard the routing here. + const { meta } = splitEventDataForV4({ + eventType: 'hook_created', + correlationId: 'hook_1', + specVersion: 2, + eventData: { + token: 'tok_1', + metadata: new TextEncoder().encode('{}'), + isWebhook: true, + }, + } as AnyEventRequest); + + expect(meta.hookToken).toBe('tok_1'); + expect(meta.hookIsWebhook).toBe(true); + }); +}); + describe('createWorkflowRunEvent response coercion', () => { it('coerces ISO-string dates in the returned event and preloaded events', async () => { // Persisted events store nested eventData dates as ISO strings diff --git a/packages/world-vercel/src/events.ts b/packages/world-vercel/src/events.ts index 8168137f9b..6469a24165 100644 --- a/packages/world-vercel/src/events.ts +++ b/packages/world-vercel/src/events.ts @@ -178,6 +178,7 @@ interface SplitEventData { resumeAt?: Date; retryAfter?: Date; hookToken?: string; + hookIsWebhook?: boolean; errorCode?: string; /** * Structured run/step error carried inline in the frame meta. This @@ -219,6 +220,8 @@ type MetaSourceField = | 'resumeAt' | 'retryAfter' | 'token' + // hook_created webhook flag (renamed to hookIsWebhook on the wire) + | 'isWebhook' | 'errorCode' // step_failed / step_retrying error stack (sibling of the message string) | 'stack' @@ -304,6 +307,13 @@ export function splitEventDataForV4(data: AnyEventRequest): SplitEventData { if (typeof eventData.token === 'string') { meta.hookToken = eventData.token; } + // hook_created carries the webhook flag so the backend can mark webhook + // hooks (which are not resumable via the public webhook endpoint). The + // runtime emits it even though this line's @workflow/world schema only + // recently started declaring it — see MetaSourceField. + if (typeof eventData.isWebhook === 'boolean') { + meta.hookIsWebhook = eventData.isWebhook; + } if (typeof eventData.errorCode === 'string') { meta.errorCode = eventData.errorCode; } diff --git a/packages/world/src/events.ts b/packages/world/src/events.ts index a979fad494..61b7b2e665 100644 --- a/packages/world/src/events.ts +++ b/packages/world/src/events.ts @@ -170,7 +170,10 @@ const HookCreatedEventSchema = BaseEventSchema.extend({ eventData: z.object({ token: z.string(), metadata: SerializedDataSchema.optional(), - // Preserved on read so crash-recovery paths can rebuild the hook + // Marks a hook created for a webhook (not resumable via the public + // webhook endpoint). The runtime emits this on hook_created and the + // backend consumes it; declaring it here keeps it on the wire. + // Also preserved on read so crash-recovery paths can rebuild the hook // entity from the persisted `hook_created` event's payload (see // `repairHookEntityFromPersistedEvent` in `@workflow/world-local`). isWebhook: z.boolean().optional(), From 2904cad91cd5c26206905059f6ea76cee8f1e5d4 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Sun, 14 Jun 2026 13:24:29 +0200 Subject: [PATCH 4/8] [world-vercel] Send remoteRefBehavior=lazy on v4 metadata-only listings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit getWorkflowRunEvents applied resolveData only client-side: it always downloaded the resolved payload bytes for every event, then discarded them when resolveData was 'none'. The v4 list endpoints accept remoteRefBehavior (resolve|lazy, default resolve), so map resolveData 'none' → lazy and send it on the runId and correlationId list queries — the backend then emits empty-body frames and skips the per-event blob read. Safe against a backend that predates the flag: it ignores the param and streams full bodies, and buildEventFromV4 still strips them for resolveData 'none', so this is a pure bandwidth optimization either way. Co-Authored-By: Claude Fable 5 --- .changeset/v4-lazy-list-refs.md | 5 ++ packages/world-vercel/src/events-v4.ts | 29 +++++++-- packages/world-vercel/src/events.test.ts | 79 ++++++++++++++++++++++++ packages/world-vercel/src/events.ts | 10 +++ 4 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 .changeset/v4-lazy-list-refs.md diff --git a/.changeset/v4-lazy-list-refs.md b/.changeset/v4-lazy-list-refs.md new file mode 100644 index 0000000000..39a75578c4 --- /dev/null +++ b/.changeset/v4-lazy-list-refs.md @@ -0,0 +1,5 @@ +--- +"@workflow/world-vercel": patch +--- + +Skip transferring event payload bytes for `resolveData: 'none'` listings on the v4 wire. diff --git a/packages/world-vercel/src/events-v4.ts b/packages/world-vercel/src/events-v4.ts index 6a74509313..d5202ea99a 100644 --- a/packages/world-vercel/src/events-v4.ts +++ b/packages/world-vercel/src/events-v4.ts @@ -353,6 +353,15 @@ export interface ListEventsV4Params { cursor?: string; limit?: number; sortOrder?: 'asc' | 'desc'; + /** + * Whether the backend resolves payload bytes into each frame body. + * `resolve` (default) streams the bytes; `lazy` emits empty-body frames + * (the ref descriptor stays in the frame meta) — for metadata-only + * listings that would otherwise download every payload just to discard + * it. A backend that predates this flag ignores it and streams full + * bodies, so callers must still tolerate bodies being present. + */ + remoteRefBehavior?: 'resolve' | 'lazy'; } /** @@ -451,11 +460,23 @@ async function consumeListFrameStream( return { events, ...(next ? { next } : {}) }; } -function paginationToQuery(params: ListEventsV4Params): string { - const sp = new URLSearchParams(); +/** + * Append the shared list params (pagination + ref behavior) to `sp`. + * Shared by the runId and correlationId list query builders so both send + * `remoteRefBehavior` identically. + */ +function appendListParams(sp: URLSearchParams, params: ListEventsV4Params) { if (params.cursor) sp.set('cursor', params.cursor); if (params.limit !== undefined) sp.set('limit', String(params.limit)); if (params.sortOrder) sp.set('sortOrder', params.sortOrder); + if (params.remoteRefBehavior) { + sp.set('remoteRefBehavior', params.remoteRefBehavior); + } +} + +function paginationToQuery(params: ListEventsV4Params): string { + const sp = new URLSearchParams(); + appendListParams(sp, params); const qs = sp.toString(); return qs ? `?${qs}` : ''; } @@ -502,9 +523,7 @@ export async function getEventsByCorrelationIdV4( const { baseUrl, headers } = await getHttpConfig(config); const sp = new URLSearchParams(); sp.set('correlationId', correlationId); - if (params.cursor) sp.set('cursor', params.cursor); - if (params.limit !== undefined) sp.set('limit', String(params.limit)); - if (params.sortOrder) sp.set('sortOrder', params.sortOrder); + appendListParams(sp, params); const url = `${baseUrl}/v4/events?${sp.toString()}`; return consumeListFrameStream( url, diff --git a/packages/world-vercel/src/events.test.ts b/packages/world-vercel/src/events.test.ts index d535c2cb12..07e0df4a92 100644 --- a/packages/world-vercel/src/events.test.ts +++ b/packages/world-vercel/src/events.test.ts @@ -450,3 +450,82 @@ describe('getWorkflowRunEvents structured-error decode', () => { agent.assertNoPendingInterceptors(); }); }); + +describe('getWorkflowRunEvents remoteRefBehavior mapping', () => { + // A v4 LIST response: one run_created frame (with payload body) + sentinel. + function listResponse(body: Uint8Array): Buffer { + return Buffer.concat([ + encodeFrame( + { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'run_created', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: { + input: { _type: 'RemoteRef', _ref: 's3rf:wrun_1/input' }, + workflowName: 'wf', + }, + }, + body + ), + encodeFrame({ _end: 1 }, new Uint8Array(0)), + ]); + } + + it("sends remoteRefBehavior=lazy for resolveData 'none' and strips any returned body", async () => { + const agent = mockAgent(); + // The interceptor only matches when the request carries + // ?remoteRefBehavior=lazy — so a missing/wrong param fails the request. + // The reply still includes payload bytes, simulating a backend that + // predates the flag: the adapter must strip them regardless. + agent + .get(ORIGIN) + .intercept({ + path: '/api/v4/runs/wrun_1/events', + method: 'GET', + query: { remoteRefBehavior: 'lazy' }, + }) + .reply(200, listResponse(new TextEncoder().encode('"payload"')), { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + const result = await getWorkflowRunEvents( + { runId: 'wrun_1', resolveData: 'none' }, + { token: 'test-token', dispatcher: agent } + ); + + const eventData = ( + result.data[0] as { eventData?: Record } + ).eventData; + expect(eventData?.input).toBeUndefined(); + expect(eventData?.workflowName).toBe('wf'); + agent.assertNoPendingInterceptors(); + }); + + it('sends remoteRefBehavior=resolve by default and splices the body bytes', async () => { + const agent = mockAgent(); + const body = new TextEncoder().encode('"payload"'); + agent + .get(ORIGIN) + .intercept({ + path: '/api/v4/runs/wrun_1/events', + method: 'GET', + query: { remoteRefBehavior: 'resolve' }, + }) + .reply(200, listResponse(body), { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + // No resolveData → defaults to 'all' → resolve. + const result = await getWorkflowRunEvents( + { runId: 'wrun_1' }, + { token: 'test-token', dispatcher: agent } + ); + + const eventData = ( + result.data[0] as { eventData?: Record } + ).eventData; + expect(eventData?.input).toEqual(body); + agent.assertNoPendingInterceptors(); + }); +}); diff --git a/packages/world-vercel/src/events.ts b/packages/world-vercel/src/events.ts index 6469a24165..c01b34782f 100644 --- a/packages/world-vercel/src/events.ts +++ b/packages/world-vercel/src/events.ts @@ -497,10 +497,20 @@ export async function getWorkflowRunEvents( config?: APIConfig ): Promise> { const { pagination, resolveData = DEFAULT_RESOLVE_DATA_OPTION } = params; + // `resolveData: 'none'` means the caller only wants metadata — it discards + // payloads in buildEventFromV4 below. Tell the backend not to stream them + // in the first place (lazy → empty frame bodies). On `'all'` we resolve + // (the default). A backend that predates this flag ignores it and streams + // full bodies regardless; buildEventFromV4 still strips them when + // resolveData is 'none', so this is purely a bandwidth optimization and is + // safe against an older backend. const wirePagination = { cursor: pagination?.cursor ?? undefined, limit: pagination?.limit, sortOrder: pagination?.sortOrder, + remoteRefBehavior: (resolveData === 'none' ? 'lazy' : 'resolve') as + | 'lazy' + | 'resolve', }; const result = await ('correlationId' in params From 37831796b5f52cfcd658b339d1d9572a5497c54c Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Wed, 17 Jun 2026 13:44:12 -0700 Subject: [PATCH 5/8] Apply suggestion from @VaguelySerious Signed-off-by: Peter Wielander --- .changeset/v4-lazy-list-refs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/v4-lazy-list-refs.md b/.changeset/v4-lazy-list-refs.md index 39a75578c4..de79c841ab 100644 --- a/.changeset/v4-lazy-list-refs.md +++ b/.changeset/v4-lazy-list-refs.md @@ -2,4 +2,4 @@ "@workflow/world-vercel": patch --- -Skip transferring event payload bytes for `resolveData: 'none'` listings on the v4 wire. +Skip transferring event payload bytes when listing events with `resolveData: 'none'` using the v4 API. From 6b696e90c2e986abe751da6f612e3b928b61b0cf Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Fri, 19 Jun 2026 14:17:26 -0700 Subject: [PATCH 6/8] [world-vercel] Update structured-error decode test for remoteRefBehavior The structured-error decode test (stable-only, from this backport) lists events with resolveData 'all', which the lazy-refs change maps to remoteRefBehavior=resolve on the wire. Match the new query param in the interceptor so the request resolves against it. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/world-vercel/src/events.test.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/world-vercel/src/events.test.ts b/packages/world-vercel/src/events.test.ts index 07e0df4a92..b499b8e607 100644 --- a/packages/world-vercel/src/events.test.ts +++ b/packages/world-vercel/src/events.test.ts @@ -431,7 +431,12 @@ describe('getWorkflowRunEvents structured-error decode', () => { agent .get(ORIGIN) - .intercept({ path: '/api/v4/runs/wrun_1/events', method: 'GET' }) + .intercept({ + path: '/api/v4/runs/wrun_1/events', + method: 'GET', + // resolveData 'all' maps to remoteRefBehavior=resolve on the wire. + query: { remoteRefBehavior: 'resolve' }, + }) .reply(200, frames, { headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, }); From a276de0c9ac4b02937fee4e3b676cecaa9195198 Mon Sep 17 00:00:00 2001 From: Shohei Maeda <11495867+smaeda-ks@users.noreply.github.com> Date: Sun, 21 Jun 2026 04:02:58 +0900 Subject: [PATCH 7/8] fix(world-vercel): cancel v4 event frame stream on early exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit decodeFrames never cancelled response.body when a consumer stopped reading before EOF — getEventV4 returns after the first frame and consumeListFrameStream breaks at the sentinel — so the undici connection stayed checked out of the pool (8 per origin) instead of being released, causing stalls/timeouts on the event-read path. Cancel the source in a try/finally (and cancel the reader in readerToIterator) via a shared closeQuietly helper. Add regression tests for both decode branches and a getEventV4 HTTP round-trip through undici MockAgent. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cancel-v4-frame-stream.md | 5 ++ packages/world-vercel/src/events-v4.test.ts | 47 ++++++++++++ packages/world-vercel/src/frames.test.ts | 70 +++++++++++++++++- packages/world-vercel/src/frames.ts | 80 +++++++++++++-------- 4 files changed, 171 insertions(+), 31 deletions(-) create mode 100644 .changeset/cancel-v4-frame-stream.md diff --git a/.changeset/cancel-v4-frame-stream.md b/.changeset/cancel-v4-frame-stream.md new file mode 100644 index 0000000000..9383ae3cc4 --- /dev/null +++ b/.changeset/cancel-v4-frame-stream.md @@ -0,0 +1,5 @@ +--- +'@workflow/world-vercel': patch +--- + +Cancel the v4 event frame stream when a reader stops early, so the response body's undici connection returns to the pool instead of leaking. diff --git a/packages/world-vercel/src/events-v4.test.ts b/packages/world-vercel/src/events-v4.test.ts index ccdc803cb4..5509303e62 100644 --- a/packages/world-vercel/src/events-v4.test.ts +++ b/packages/world-vercel/src/events-v4.test.ts @@ -10,6 +10,7 @@ import { MockAgent } from 'undici'; import { describe, expect, it } from 'vitest'; import { createWorkflowRunEventV4, + getEventV4, getWorkflowRunEventsV4, throwForErrorResponse, } from './events-v4.js'; @@ -168,6 +169,52 @@ describe('getWorkflowRunEventsV4 over HTTP', () => { }); }); +/** + * getEventV4 returns after the first frame. The early return must cancel the + * response body (releasing its undici socket) without corrupting the returned + * value or hanging — the trailing frame below is never read. + */ +describe('getEventV4 over HTTP', () => { + it('returns the first frame and stops reading the rest', async () => { + const origin = 'https://vercel-workflow.com'; + const agent = new MockAgent(); + agent.disableNetConnect(); + + const body = new TextEncoder().encode('event-payload'); + const frames = Buffer.concat([ + encodeFrame( + { + eventId: 'evnt_1', + runId: 'wrun_1', + eventType: 'run_created', + createdAt: '2026-06-10T00:00:00.000Z', + eventData: {}, + }, + body + ), + // Trailing bytes the reader must never need. + encodeFrame({ eventId: 'evnt_unused' }, new Uint8Array(8)), + ]); + + agent + .get(origin) + .intercept({ path: '/api/v4/runs/wrun_1/events/evnt_1', method: 'GET' }) + .reply(200, frames, { + headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, + }); + + const { event, body: returnedBody } = await getEventV4('wrun_1', 'evnt_1', { + token: 'test-token', + dispatcher: agent, + }); + + expect(event.eventId).toBe('evnt_1'); + expect(event.eventType).toBe('run_created'); + expect(new Uint8Array(returnedBody)).toEqual(body); + agent.assertNoPendingInterceptors(); + }); +}); + describe('createWorkflowRunEventV4 over HTTP', () => { it('POSTs to the /events/:eventType alias and decodes the response', async () => { const origin = 'https://vercel-workflow.com'; diff --git a/packages/world-vercel/src/frames.test.ts b/packages/world-vercel/src/frames.test.ts index 966bab97ab..72d312272e 100644 --- a/packages/world-vercel/src/frames.test.ts +++ b/packages/world-vercel/src/frames.test.ts @@ -1,4 +1,4 @@ -import { decode, encode } from 'cbor-x'; +import { decode } from 'cbor-x'; import { describe, expect, it } from 'vitest'; import { type DecodedFrame, @@ -41,6 +41,29 @@ async function drainFrames( return out; } +/** A stream that stays open after delivering its payload (never signals EOF), + * like a kept-alive HTTP socket, and records whether cancel() ran — the + * signal undici uses to release the connection. highWaterMark: 0 suppresses + * the pull-ahead that would otherwise auto-close a toy stream. */ +function spyStream(payload: Uint8Array) { + let sent = false; + let cancelled = false; + const stream = new ReadableStream( + { + pull(controller) { + if (sent) return; + controller.enqueue(payload); + sent = true; + }, + cancel() { + cancelled = true; + }, + }, + { highWaterMark: 0 } + ); + return { stream, wasCancelled: () => cancelled }; +} + describe('encodeFrame', () => { it('produces the canonical wire layout', () => { const meta = { eventId: 'evnt_abc', n: 42 }; @@ -210,6 +233,51 @@ describe('decodeFrames from an AsyncIterable source', () => { }); }); +describe('decodeFrames releases the stream on early exit', () => { + // Regression: a consumer that stops before EOF (getEventV4 returns after + // the first frame; consumeListFrameStream breaks at the sentinel) must + // cancel the body, or its undici socket stays pinned out of the pool. + function twoFramesThenEnd(): Uint8Array { + return new Uint8Array([ + ...encodeFrame({ eventId: 'a' }, new Uint8Array([1, 2, 3])), + ...encodeFrame({ eventId: 'b' }, new Uint8Array([4, 5, 6])), + ...encodeEndFrame(), + ]); + } + + it('cancels the underlying stream when the consumer breaks early', async () => { + const { stream, wasCancelled } = spyStream(twoFramesThenEnd()); + for await (const f of decodeFrames(stream)) { + expect(f.meta).toEqual({ eventId: 'a' }); + break; // mirrors getEventV4 returning after the first frame + } + expect(wasCancelled()).toBe(true); + }); + + it('cancels via the reader path when the source is not async-iterable', async () => { + const { stream, wasCancelled } = spyStream(twoFramesThenEnd()); + // A bare { getReader } object forces the readerToIterator branch (a real + // ReadableStream is already async-iterable in Node). + const source = { + getReader: () => stream.getReader(), + } as unknown as ReadableStream; + for await (const f of decodeFrames(source)) { + expect(f.meta).toEqual({ eventId: 'a' }); + break; + } + expect(wasCancelled()).toBe(true); + }); + + it('still decodes every frame when fully consumed', async () => { + const frames = await drainFrames(spyStream(twoFramesThenEnd()).stream); + expect(frames.map((f) => f.meta)).toEqual([ + { eventId: 'a' }, + { eventId: 'b' }, + { _end: 1 }, + ]); + }); +}); + describe('V4_FRAME_CONTENT_TYPE', () => { it('matches the server-side content type', () => { expect(V4_FRAME_CONTENT_TYPE).toBe('application/vnd.workflow.v4-frames'); diff --git a/packages/world-vercel/src/frames.ts b/packages/world-vercel/src/frames.ts index b8c041e4df..420c089c15 100644 --- a/packages/world-vercel/src/frames.ts +++ b/packages/world-vercel/src/frames.ts @@ -79,41 +79,56 @@ export async function* decodeFrames( return out; }; - while (true) { - if (!(await refill(4))) return; - const metaLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( - 0, - false - ); - take(4); + try { + while (true) { + if (!(await refill(4))) return; + const metaLen = new DataView( + buffer.buffer, + buffer.byteOffset, + 4 + ).getUint32(0, false); + take(4); - if (!(await refill(metaLen))) { - throw new Error('decodeFrames: truncated meta block'); - } - const meta = decode(take(metaLen)) as Record; + if (!(await refill(metaLen))) { + throw new Error('decodeFrames: truncated meta block'); + } + const meta = decode(take(metaLen)) as Record; - if (!(await refill(4))) { - throw new Error('decodeFrames: truncated body length'); - } - const bodyLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( - 0, - false - ); - take(4); + if (!(await refill(4))) { + throw new Error('decodeFrames: truncated body length'); + } + const bodyLen = new DataView( + buffer.buffer, + buffer.byteOffset, + 4 + ).getUint32(0, false); + take(4); - if (bodyLen > 0) { - if (!(await refill(bodyLen))) { + if (bodyLen > 0 && !(await refill(bodyLen))) { throw new Error('decodeFrames: truncated body bytes'); } - // Slice (not subarray) so the yielded body owns its bytes — - // subsequent reads into the buffer won't overwrite it. + // Slice (not subarray) so the yielded body owns its bytes — later + // reads into the buffer won't overwrite it; bodyLen 0 yields empty. yield { meta, body: buffer.slice(0, bodyLen) }; take(bodyLen); - } else { - yield { meta, body: new Uint8Array(0) }; + + if (meta._end === 1) return; } + } finally { + // Release the source when the consumer stops before EOF (early + // break/return): an unconsumed body pins its undici socket out of the + // connection pool. No-op once the stream is already drained. + await closeQuietly(() => chunks.return?.()); + } +} - if (meta._end === 1) return; +/** Best-effort source cleanup, safe to run from a `finally`: swallows errors + * so cleanup can't mask the original outcome. */ +async function closeQuietly(close: () => unknown): Promise { + try { + await close(); + } catch { + // best-effort } } @@ -122,9 +137,14 @@ export async function* decodeFrames( async function* readerToIterator( reader: ReadableStreamDefaultReader ): AsyncGenerator { - while (true) { - const { done, value } = await reader.read(); - if (done) return; - if (value) yield value; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) return; + if (value) yield value; + } + } finally { + // Cancel on early exit so the socket is released, not just unlocked. + await closeQuietly(() => reader.cancel()); } } From 10dd0d46fe3124ef5a4aa5b41ab01bfdd0839441 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Mon, 22 Jun 2026 14:31:25 -0700 Subject: [PATCH 8/8] Revert "fix(world-vercel): cancel v4 event frame stream on early exit" This reverts commit a276de0c9ac4b02937fee4e3b676cecaa9195198. --- .changeset/cancel-v4-frame-stream.md | 5 -- packages/world-vercel/src/events-v4.test.ts | 47 ------------ packages/world-vercel/src/frames.test.ts | 70 +----------------- packages/world-vercel/src/frames.ts | 80 ++++++++------------- 4 files changed, 31 insertions(+), 171 deletions(-) delete mode 100644 .changeset/cancel-v4-frame-stream.md diff --git a/.changeset/cancel-v4-frame-stream.md b/.changeset/cancel-v4-frame-stream.md deleted file mode 100644 index 9383ae3cc4..0000000000 --- a/.changeset/cancel-v4-frame-stream.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -'@workflow/world-vercel': patch ---- - -Cancel the v4 event frame stream when a reader stops early, so the response body's undici connection returns to the pool instead of leaking. diff --git a/packages/world-vercel/src/events-v4.test.ts b/packages/world-vercel/src/events-v4.test.ts index 5509303e62..ccdc803cb4 100644 --- a/packages/world-vercel/src/events-v4.test.ts +++ b/packages/world-vercel/src/events-v4.test.ts @@ -10,7 +10,6 @@ import { MockAgent } from 'undici'; import { describe, expect, it } from 'vitest'; import { createWorkflowRunEventV4, - getEventV4, getWorkflowRunEventsV4, throwForErrorResponse, } from './events-v4.js'; @@ -169,52 +168,6 @@ describe('getWorkflowRunEventsV4 over HTTP', () => { }); }); -/** - * getEventV4 returns after the first frame. The early return must cancel the - * response body (releasing its undici socket) without corrupting the returned - * value or hanging — the trailing frame below is never read. - */ -describe('getEventV4 over HTTP', () => { - it('returns the first frame and stops reading the rest', async () => { - const origin = 'https://vercel-workflow.com'; - const agent = new MockAgent(); - agent.disableNetConnect(); - - const body = new TextEncoder().encode('event-payload'); - const frames = Buffer.concat([ - encodeFrame( - { - eventId: 'evnt_1', - runId: 'wrun_1', - eventType: 'run_created', - createdAt: '2026-06-10T00:00:00.000Z', - eventData: {}, - }, - body - ), - // Trailing bytes the reader must never need. - encodeFrame({ eventId: 'evnt_unused' }, new Uint8Array(8)), - ]); - - agent - .get(origin) - .intercept({ path: '/api/v4/runs/wrun_1/events/evnt_1', method: 'GET' }) - .reply(200, frames, { - headers: { 'content-type': V4_FRAME_CONTENT_TYPE }, - }); - - const { event, body: returnedBody } = await getEventV4('wrun_1', 'evnt_1', { - token: 'test-token', - dispatcher: agent, - }); - - expect(event.eventId).toBe('evnt_1'); - expect(event.eventType).toBe('run_created'); - expect(new Uint8Array(returnedBody)).toEqual(body); - agent.assertNoPendingInterceptors(); - }); -}); - describe('createWorkflowRunEventV4 over HTTP', () => { it('POSTs to the /events/:eventType alias and decodes the response', async () => { const origin = 'https://vercel-workflow.com'; diff --git a/packages/world-vercel/src/frames.test.ts b/packages/world-vercel/src/frames.test.ts index 72d312272e..966bab97ab 100644 --- a/packages/world-vercel/src/frames.test.ts +++ b/packages/world-vercel/src/frames.test.ts @@ -1,4 +1,4 @@ -import { decode } from 'cbor-x'; +import { decode, encode } from 'cbor-x'; import { describe, expect, it } from 'vitest'; import { type DecodedFrame, @@ -41,29 +41,6 @@ async function drainFrames( return out; } -/** A stream that stays open after delivering its payload (never signals EOF), - * like a kept-alive HTTP socket, and records whether cancel() ran — the - * signal undici uses to release the connection. highWaterMark: 0 suppresses - * the pull-ahead that would otherwise auto-close a toy stream. */ -function spyStream(payload: Uint8Array) { - let sent = false; - let cancelled = false; - const stream = new ReadableStream( - { - pull(controller) { - if (sent) return; - controller.enqueue(payload); - sent = true; - }, - cancel() { - cancelled = true; - }, - }, - { highWaterMark: 0 } - ); - return { stream, wasCancelled: () => cancelled }; -} - describe('encodeFrame', () => { it('produces the canonical wire layout', () => { const meta = { eventId: 'evnt_abc', n: 42 }; @@ -233,51 +210,6 @@ describe('decodeFrames from an AsyncIterable source', () => { }); }); -describe('decodeFrames releases the stream on early exit', () => { - // Regression: a consumer that stops before EOF (getEventV4 returns after - // the first frame; consumeListFrameStream breaks at the sentinel) must - // cancel the body, or its undici socket stays pinned out of the pool. - function twoFramesThenEnd(): Uint8Array { - return new Uint8Array([ - ...encodeFrame({ eventId: 'a' }, new Uint8Array([1, 2, 3])), - ...encodeFrame({ eventId: 'b' }, new Uint8Array([4, 5, 6])), - ...encodeEndFrame(), - ]); - } - - it('cancels the underlying stream when the consumer breaks early', async () => { - const { stream, wasCancelled } = spyStream(twoFramesThenEnd()); - for await (const f of decodeFrames(stream)) { - expect(f.meta).toEqual({ eventId: 'a' }); - break; // mirrors getEventV4 returning after the first frame - } - expect(wasCancelled()).toBe(true); - }); - - it('cancels via the reader path when the source is not async-iterable', async () => { - const { stream, wasCancelled } = spyStream(twoFramesThenEnd()); - // A bare { getReader } object forces the readerToIterator branch (a real - // ReadableStream is already async-iterable in Node). - const source = { - getReader: () => stream.getReader(), - } as unknown as ReadableStream; - for await (const f of decodeFrames(source)) { - expect(f.meta).toEqual({ eventId: 'a' }); - break; - } - expect(wasCancelled()).toBe(true); - }); - - it('still decodes every frame when fully consumed', async () => { - const frames = await drainFrames(spyStream(twoFramesThenEnd()).stream); - expect(frames.map((f) => f.meta)).toEqual([ - { eventId: 'a' }, - { eventId: 'b' }, - { _end: 1 }, - ]); - }); -}); - describe('V4_FRAME_CONTENT_TYPE', () => { it('matches the server-side content type', () => { expect(V4_FRAME_CONTENT_TYPE).toBe('application/vnd.workflow.v4-frames'); diff --git a/packages/world-vercel/src/frames.ts b/packages/world-vercel/src/frames.ts index 420c089c15..b8c041e4df 100644 --- a/packages/world-vercel/src/frames.ts +++ b/packages/world-vercel/src/frames.ts @@ -79,56 +79,41 @@ export async function* decodeFrames( return out; }; - try { - while (true) { - if (!(await refill(4))) return; - const metaLen = new DataView( - buffer.buffer, - buffer.byteOffset, - 4 - ).getUint32(0, false); - take(4); + while (true) { + if (!(await refill(4))) return; + const metaLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( + 0, + false + ); + take(4); - if (!(await refill(metaLen))) { - throw new Error('decodeFrames: truncated meta block'); - } - const meta = decode(take(metaLen)) as Record; + if (!(await refill(metaLen))) { + throw new Error('decodeFrames: truncated meta block'); + } + const meta = decode(take(metaLen)) as Record; - if (!(await refill(4))) { - throw new Error('decodeFrames: truncated body length'); - } - const bodyLen = new DataView( - buffer.buffer, - buffer.byteOffset, - 4 - ).getUint32(0, false); - take(4); + if (!(await refill(4))) { + throw new Error('decodeFrames: truncated body length'); + } + const bodyLen = new DataView(buffer.buffer, buffer.byteOffset, 4).getUint32( + 0, + false + ); + take(4); - if (bodyLen > 0 && !(await refill(bodyLen))) { + if (bodyLen > 0) { + if (!(await refill(bodyLen))) { throw new Error('decodeFrames: truncated body bytes'); } - // Slice (not subarray) so the yielded body owns its bytes — later - // reads into the buffer won't overwrite it; bodyLen 0 yields empty. + // Slice (not subarray) so the yielded body owns its bytes — + // subsequent reads into the buffer won't overwrite it. yield { meta, body: buffer.slice(0, bodyLen) }; take(bodyLen); - - if (meta._end === 1) return; + } else { + yield { meta, body: new Uint8Array(0) }; } - } finally { - // Release the source when the consumer stops before EOF (early - // break/return): an unconsumed body pins its undici socket out of the - // connection pool. No-op once the stream is already drained. - await closeQuietly(() => chunks.return?.()); - } -} -/** Best-effort source cleanup, safe to run from a `finally`: swallows errors - * so cleanup can't mask the original outcome. */ -async function closeQuietly(close: () => unknown): Promise { - try { - await close(); - } catch { - // best-effort + if (meta._end === 1) return; } } @@ -137,14 +122,9 @@ async function closeQuietly(close: () => unknown): Promise { async function* readerToIterator( reader: ReadableStreamDefaultReader ): AsyncGenerator { - try { - while (true) { - const { done, value } = await reader.read(); - if (done) return; - if (value) yield value; - } - } finally { - // Cancel on early exit so the socket is released, not just unlocked. - await closeQuietly(() => reader.cancel()); + while (true) { + const { done, value } = await reader.read(); + if (done) return; + if (value) yield value; } }