From 5ddb2e87e130a7b00165cbf74b085aadd534fa54 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 8 Apr 2025 14:51:52 -0700 Subject: [PATCH 01/17] remove redundant static text children --- lib/a11y/utils.ts | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 6c426ee92..be0887309 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -54,7 +54,7 @@ async function cleanStructuralNodes( cleanStructuralNodes(child, page, logger), ); const resolvedChildren = await Promise.all(cleanedChildrenPromises); - const cleanedChildren = resolvedChildren.filter( + let cleanedChildren = resolvedChildren.filter( (child): child is AccessibilityNode => child !== null, ); @@ -136,6 +136,17 @@ async function cleanStructuralNodes( } } + // rm redundant StaticText children + cleanedChildren = removeRedundantStaticTextChildren(node, cleanedChildren); + + if (cleanedChildren.length === 0) { + if (node.role === "generic" || node.role === "none") { + return null; + } else { + return { ...node, children: [] }; + } + } + // 6) Return the updated node. // If it has children, update them; otherwise keep it as-is. return cleanedChildren.length > 0 @@ -448,6 +459,41 @@ export async function findScrollableElementIds( return scrollableBackendIds; } + +/** + * Removes any StaticText children whose combined text equals the parent's name. + * This is most often used to avoid duplicating a link's accessible name in separate child nodes. + * + * @param parent The parent accessibility node whose `.name` we check. + * @param children The parent's current children list, typically after cleaning. + * @returns A filtered list of children with redundant StaticText nodes removed. + */ +function removeRedundantStaticTextChildren( + parent: AccessibilityNode, + children: AccessibilityNode[], +): AccessibilityNode[] { + if (!parent.name) { + return children; + } + + const parentName = parent.name.replace(/\s+/g, " ").trim(); + + // Gather all StaticText children and combine their text + const staticTextChildren = children.filter( + (child) => child.role === "StaticText" && child.name, + ); + const combinedChildText = staticTextChildren + .map((child) => child.name!.replace(/\s+/g, " ").trim()) + .join(""); + + // If the combined text exactly matches the parent's name, remove those child nodes + if (combinedChildText === parentName) { + return children.filter((child) => child.role !== "StaticText"); + } + + return children; +} + export async function performPlaywrightMethod( stagehandPage: Page, logger: (logLine: LogLine) => void, From d31329ed8aef9caa79f6b6cbe9faecb446629157 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 8 Apr 2025 15:02:00 -0700 Subject: [PATCH 02/17] prettier --- lib/a11y/utils.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index be0887309..15443cee8 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -459,7 +459,6 @@ export async function findScrollableElementIds( return scrollableBackendIds; } - /** * Removes any StaticText children whose combined text equals the parent's name. * This is most often used to avoid duplicating a link's accessible name in separate child nodes. From 346acd8233e845b26c1fd457c28f5a05634ad358 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 8 Apr 2025 15:03:07 -0700 Subject: [PATCH 03/17] changeset --- .changeset/cute-kings-stare.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/cute-kings-stare.md diff --git a/.changeset/cute-kings-stare.md b/.changeset/cute-kings-stare.md new file mode 100644 index 000000000..340af534d --- /dev/null +++ b/.changeset/cute-kings-stare.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +collapse redundant text nodes into parent elements From 716f6a47a10244689a9b8b574cb628fe477aae94 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 8 Apr 2025 15:15:44 -0700 Subject: [PATCH 04/17] add link mapping to TreeResult --- lib/a11y/utils.ts | 20 +++++++++++++++++++- types/context.ts | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index be0887309..f19ba765a 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -165,6 +165,9 @@ export async function buildHierarchicalTree( page?: StagehandPage, logger?: (logLine: LogLine) => void, ): Promise { + // Map to store nodeId -> URL for only those nodes that do have a URL. + const idToUrl: Record = {}; + // Map to store processed nodes for quick lookup const nodeMap = new Map(); const iframe_list: AccessibilityNode[] = []; @@ -178,6 +181,11 @@ export async function buildHierarchicalTree( return; } + const url = extractUrlFromAXNode(node); + if (url) { + idToUrl[node.nodeId] = url; + } + const hasChildren = node.childIds && node.childIds.length > 0; const hasValidName = node.name && node.name.trim() !== ""; const isInteractive = @@ -250,6 +258,7 @@ export async function buildHierarchicalTree( tree: finalTree, simplified: simplifiedFormat, iframes: iframe_list, + idToUrl: idToUrl, }; } @@ -294,6 +303,7 @@ export async function getAccessibilityTree( backendDOMNodeId: node.backendDOMNodeId, parentId: node.parentId, childIds: node.childIds, + properties: node.properties, }; }), page, @@ -459,7 +469,6 @@ export async function findScrollableElementIds( return scrollableBackendIds; } - /** * Removes any StaticText children whose combined text equals the parent's name. * This is most often used to avoid duplicating a link's accessible name in separate child nodes. @@ -494,6 +503,15 @@ function removeRedundantStaticTextChildren( return children; } +function extractUrlFromAXNode(axNode: AccessibilityNode): string | undefined { + if (!axNode.properties) return undefined; + const urlProp = axNode.properties.find((prop) => prop.name === "url"); + if (urlProp && urlProp.value && typeof urlProp.value.value === "string") { + return urlProp.value.value.trim(); + } + return undefined; +} + export async function performPlaywrightMethod( stagehandPage: Page, logger: (logLine: LogLine) => void, diff --git a/types/context.ts b/types/context.ts index d89068c5c..2c8976554 100644 --- a/types/context.ts +++ b/types/context.ts @@ -10,6 +10,13 @@ export interface AXNode { backendDOMNodeId?: number; parentId?: string; childIds?: string[]; + properties?: { + name: string; + value: { + type: string; + value?: string; + }; + }[]; } export type AccessibilityNode = { @@ -22,12 +29,20 @@ export type AccessibilityNode = { parentId?: string; nodeId?: string; backendDOMNodeId?: number; + properties?: { + name: string; + value: { + type: string; + value?: string; + }; + }[]; }; export interface TreeResult { tree: AccessibilityNode[]; simplified: string; iframes?: AccessibilityNode[]; + idToUrl: Record; } export interface EnhancedContext From 16d6d25a9ad1029d9e32c81afd0873f202ab9a49 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Tue, 8 Apr 2025 15:18:49 -0700 Subject: [PATCH 05/17] changeset --- .changeset/petite-worms-punch.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/petite-worms-punch.md diff --git a/.changeset/petite-worms-punch.md b/.changeset/petite-worms-punch.md new file mode 100644 index 000000000..be113e8f7 --- /dev/null +++ b/.changeset/petite-worms-punch.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +add mapping of node id -> url From 08668777b5ab152d202dbf4febb1e44aad305a25 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Wed, 9 Apr 2025 15:40:35 -0700 Subject: [PATCH 06/17] get url field from metadata inference call --- lib/inference.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/inference.ts b/lib/inference.ts index 82565ceaf..c5c6695f6 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -84,6 +84,11 @@ export async function extract({ .describe( "true if the goal is now accomplished. Use this conservatively, only when sure that the goal has been completed.", ), + url_field: z + .string() + .describe( + "the name of the field within the user defined schema that holds IDs that map to URLs or links that they are trying to extract.", + ), }); type ExtractionResponse = z.infer; @@ -282,6 +287,7 @@ export async function extract({ data: { completed: metadataResponseCompleted, progress: metadataResponseProgress, + url_field: metadataUrlField, }, usage: metadataResponseUsage, } = metadataResponse as LLMParsedResponse; @@ -296,6 +302,7 @@ export async function extract({ modelResponse: "metadata", completed: metadataResponseCompleted, progress: metadataResponseProgress, + urlField: metadataUrlField, }, ); metadataResponseFile = fileName; @@ -332,6 +339,7 @@ export async function extract({ metadata: { completed: metadataResponseCompleted, progress: metadataResponseProgress, + urlField: metadataUrlField, }, prompt_tokens: totalPromptTokens, completion_tokens: totalCompletionTokens, From e33f23c385fecbc7d06e13fb32f8b2725fd4aaf8 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Wed, 9 Apr 2025 15:41:13 -0700 Subject: [PATCH 07/17] map id->url --- lib/handlers/extractHandler.ts | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 0015fca7e..bd44b912f 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -420,6 +420,7 @@ export class StagehandExtractHandler { level: 1, }); const outputString = tree.simplified; + const idToUrlMapping = tree.idToUrl; const extractionResponse = await extract({ instruction, @@ -436,7 +437,7 @@ export class StagehandExtractHandler { }); const { - metadata: { completed }, + metadata: { completed, urlField }, prompt_tokens: promptTokens, completion_tokens: completionTokens, inference_time_ms: inferenceTimeMs, @@ -486,9 +487,47 @@ export class StagehandExtractHandler { }, }); } + + if (urlField) { + this.replaceIdsInField(output, urlField, idToUrlMapping); + } return output; } + /** + * Recursively looks for all properties named `fieldKey` + * in `obj` (including nested objects/arrays). + * Replaces bracketed IDs in those fields if they are strings. + */ + private replaceIdsInField( + obj: unknown, + fieldKey: string, + idToUrlMapping: Record, + ): void { + if (Array.isArray(obj)) { + // For arrays, recurse on each element + for (const item of obj) { + this.replaceIdsInField(item, fieldKey, idToUrlMapping); + } + return; + } + + // If it's a non-null object, iterate over its keys + if (obj && typeof obj === "object") { + for (const [key, value] of Object.entries(obj)) { + if (key === fieldKey && typeof value === "string") { + // Use a regex that matches both bracketed and non bracketed IDs + // eg "[374]" and "374". + (obj as Record)[key] = value.replace( + /\[?(\d+)\]?/g, + (match, p1) => idToUrlMapping[p1] ?? match, + ); + } else { + this.replaceIdsInField(value, fieldKey, idToUrlMapping); + } + } + } + } /** * Get the width, height, and offsets of either the entire page or a specific element. * (Matches your existing getTargetDimensions logic, just adapted to accept a string | undefined.) From 4be76c3aea5aa188837384f44a0973c670e437a7 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Wed, 9 Apr 2025 15:41:46 -0700 Subject: [PATCH 08/17] update prompt --- lib/prompt.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/prompt.ts b/lib/prompt.ts index 61d42f93e..f9c721bb3 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -51,7 +51,8 @@ ONLY print the content using the print_extracted_data tool provided. ? `Once you are given the text-rendered webpage, you must thoroughly and meticulously analyze it. Be very careful to ensure that you do not miss any important information.` - : ""; + : "If a user is attempting to extract links or URLs, you MUST respond with ONLY the IDs of the link elements. \n" + + "Do not attempt to extract links directly from the text unless absolutely necessary. "; const userInstructions = buildUserInstructionsString( userProvidedInstructions, @@ -120,6 +121,8 @@ Refined content:`, const metadataSystemPrompt = `You are an AI assistant tasked with evaluating the progress and completion status of an extraction task. Analyze the extraction response and determine if the task is completed or if more information is needed. +If a user's instruction indicates that they are attempting to extract links, you must indicate which field that they wish to have the links populated into. + Strictly abide by the following criteria: 1. Once the instruction has been satisfied by the current extraction response, ALWAYS set completion status to true and stop processing, regardless of remaining chunks. 2. Only set completion status to false if BOTH of these conditions are true: From fc82c012f7960741e2e2063a047cce58a461e2d9 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Wed, 9 Apr 2025 15:42:07 -0700 Subject: [PATCH 09/17] useTextExtract should be default false --- lib/prompt.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/prompt.ts b/lib/prompt.ts index f9c721bb3..289c0254a 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -18,7 +18,7 @@ ${userProvidedInstructions}`; // extract export function buildExtractSystemPrompt( isUsingPrintExtractedDataTool: boolean = false, - useTextExtract: boolean = true, + useTextExtract: boolean = false, userProvidedInstructions?: string, ): ChatMessage { const baseContent = `You are extracting content on behalf of a user. From 81a5233e66694e04ad69e76dad596535faf9c082 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Wed, 9 Apr 2025 15:43:32 -0700 Subject: [PATCH 10/17] changeset --- .changeset/fifty-cats-sell.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fifty-cats-sell.md diff --git a/.changeset/fifty-cats-sell.md b/.changeset/fifty-cats-sell.md new file mode 100644 index 000000000..dfc981460 --- /dev/null +++ b/.changeset/fifty-cats-sell.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +extract links From 8c998286faa00beb880b399393f045e152e1e99b Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 11 Apr 2025 10:33:03 -0700 Subject: [PATCH 11/17] better naming --- lib/handlers/extractHandler.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index bd44b912f..3c01ad522 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -489,7 +489,7 @@ export class StagehandExtractHandler { } if (urlField) { - this.replaceIdsInField(output, urlField, idToUrlMapping); + this.replaceIdsWithUrls(output, urlField, idToUrlMapping); } return output; } @@ -499,7 +499,7 @@ export class StagehandExtractHandler { * in `obj` (including nested objects/arrays). * Replaces bracketed IDs in those fields if they are strings. */ - private replaceIdsInField( + private replaceIdsWithUrls( obj: unknown, fieldKey: string, idToUrlMapping: Record, @@ -507,7 +507,7 @@ export class StagehandExtractHandler { if (Array.isArray(obj)) { // For arrays, recurse on each element for (const item of obj) { - this.replaceIdsInField(item, fieldKey, idToUrlMapping); + this.replaceIdsWithUrls(item, fieldKey, idToUrlMapping); } return; } @@ -523,7 +523,7 @@ export class StagehandExtractHandler { (match, p1) => idToUrlMapping[p1] ?? match, ); } else { - this.replaceIdsInField(value, fieldKey, idToUrlMapping); + this.replaceIdsWithUrls(value, fieldKey, idToUrlMapping); } } } From d794ad33e1a30ab332943feae174f50e6727a773 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 12:16:15 -0700 Subject: [PATCH 12/17] rm changeset --- .changeset/petite-worms-punch.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .changeset/petite-worms-punch.md diff --git a/.changeset/petite-worms-punch.md b/.changeset/petite-worms-punch.md deleted file mode 100644 index be113e8f7..000000000 --- a/.changeset/petite-worms-punch.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@browserbasehq/stagehand": patch ---- - -add mapping of node id -> url From 49b992f98fc1072b042b09debe8aaa9667febbe2 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 12:48:15 -0700 Subject: [PATCH 13/17] schema patch approach --- lib/handlers/extractHandler.ts | 293 ++++++++++++++++++++++++++++----- types/stagehand.ts | 18 ++ 2 files changed, 271 insertions(+), 40 deletions(-) diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 3c01ad522..9bab1f214 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -1,5 +1,6 @@ -import { z } from "zod"; +import { z, ZodTypeAny } from "zod"; import { LogLine } from "../../types/log"; +import { ZodPathSegments } from "../../types/stagehand"; import { TextAnnotation } from "../../types/textannotation"; import { extract } from "../inference"; import { LLMClient } from "../llm/LLMClient"; @@ -422,11 +423,16 @@ export class StagehandExtractHandler { const outputString = tree.simplified; const idToUrlMapping = tree.idToUrl; + // Transform user defined schema to replace string().url() with .number() + const [transformedSchema, urlFieldPaths] = + transformUrlStringsToNumericIds(schema); + + // call extract inference with transformed schema const extractionResponse = await extract({ instruction, previouslyExtractedContent: content, domElements: outputString, - schema, + schema: transformedSchema, chunksSeen: 1, chunksTotal: 1, llmClient, @@ -437,7 +443,7 @@ export class StagehandExtractHandler { }); const { - metadata: { completed, urlField }, + metadata: { completed }, prompt_tokens: promptTokens, completion_tokens: completionTokens, inference_time_ms: inferenceTimeMs, @@ -488,45 +494,12 @@ export class StagehandExtractHandler { }); } - if (urlField) { - this.replaceIdsWithUrls(output, urlField, idToUrlMapping); - } - return output; - } - - /** - * Recursively looks for all properties named `fieldKey` - * in `obj` (including nested objects/arrays). - * Replaces bracketed IDs in those fields if they are strings. - */ - private replaceIdsWithUrls( - obj: unknown, - fieldKey: string, - idToUrlMapping: Record, - ): void { - if (Array.isArray(obj)) { - // For arrays, recurse on each element - for (const item of obj) { - this.replaceIdsWithUrls(item, fieldKey, idToUrlMapping); - } - return; + // revert to original schema and populate with URLs + for (const { segments } of urlFieldPaths) { + injectUrls(output, segments, idToUrlMapping); } - // If it's a non-null object, iterate over its keys - if (obj && typeof obj === "object") { - for (const [key, value] of Object.entries(obj)) { - if (key === fieldKey && typeof value === "string") { - // Use a regex that matches both bracketed and non bracketed IDs - // eg "[374]" and "374". - (obj as Record)[key] = value.replace( - /\[?(\d+)\]?/g, - (match, p1) => idToUrlMapping[p1] ?? match, - ); - } else { - this.replaceIdsWithUrls(value, fieldKey, idToUrlMapping); - } - } - } + return output as z.infer; } /** * Get the width, height, and offsets of either the entire page or a specific element. @@ -681,3 +654,243 @@ export class StagehandExtractHandler { return deduplicated; } } + +/** + * Scans the provided Zod schema for any `z.string().url()` fields and + * replaces them with `z.number()`. + * + * @param schema - The Zod object schema to transform. + * @returns A tuple containing: + * 1. The transformed schema (or the original schema if no changes were needed). + * 2. An array of {@link ZodPathSegments} objects representing all the replaced URL fields, + * with each path segment showing where in the schema the replacement occurred. + */ +export function transformUrlStringsToNumericIds< + T extends z.ZodObject, +>(schema: T): [T, ZodPathSegments[]] { + const shape = schema._def.shape(); + const newShape: Record = {}; + const urlPaths: ZodPathSegments[] = []; + let changed = false; + + for (const [key, value] of Object.entries(shape)) { + const [childTransformed, childPaths] = transformSchema(value, [key]); + newShape[key] = childTransformed; + if (childTransformed !== value) { + changed = true; + } + if (childPaths.length > 0) { + childPaths.forEach((cp) => { + urlPaths.push({ segments: [key, ...cp.segments] }); + }); + } + } + + const finalSchema = changed ? z.object(newShape) : schema; + return [finalSchema as T, urlPaths]; +} + +/** + * Recursively traverses a given Zod schema, scanning for any fields of type `z.string().url()`. + * For each such field, it replaces the `z.string().url()` with `z.number()`. + * + * This function is used internally by higher-level utilities (e.g., transforming entire object schemas) + * and handles nested objects, arrays, unions, intersections, optionals. + * + * @param schema - The Zod schema to transform. + * @param currentPath - An array of string/number keys representing the current schema path (used internally for recursion). + * @returns A two-element tuple: + * 1. The updated Zod schema, with any `.url()` fields replaced by `z.number()`. + * 2. An array of {@link ZodPathSegments} objects representing each replaced field, including the path segments. + */ +export function transformSchema( + schema: ZodTypeAny, + currentPath: Array, +): [ZodTypeAny, ZodPathSegments[]] { + // 1) If it's a string with .url(), convert to z.number() + if (schema instanceof z.ZodString) { + const hasUrlCheck = + schema._def.checks?.some((check) => check.kind === "url") ?? false; + if (hasUrlCheck) { + return [ + z.number().describe("ID of element that points to a URL"), + [{ segments: [] }], + ]; + } + return [schema, []]; + } + + // 2) If it's an object, transform each field + if (schema instanceof z.ZodObject) { + // The shape is a raw object containing fields keyed by string (no symbols): + const shape = schema._def.shape() as Record; + const newShape: Record = {}; + const urlPaths: ZodPathSegments[] = []; + let changed = false; + + const shapeKeys = Object.keys(shape); + + for (const key of shapeKeys) { + const child = shape[key]; + const [transformedChild, childPaths] = transformSchema(child, [ + ...currentPath, + key, + ]); + + if (transformedChild !== child) { + changed = true; + } + newShape[key] = transformedChild; + + if (childPaths.length > 0) { + for (const cp of childPaths) { + urlPaths.push({ segments: [key, ...cp.segments] }); + } + } + } + + if (changed) { + return [z.object(newShape), urlPaths]; + } + return [schema, urlPaths]; + } + + // 3) If it's an array, transform its item type + if (schema instanceof z.ZodArray) { + const itemType = schema._def.type as ZodTypeAny; + const [transformedItem, childPaths] = transformSchema(itemType, [ + ...currentPath, + "*", + ]); + const changed = transformedItem !== itemType; + const arrayPaths: ZodPathSegments[] = childPaths.map((cp) => ({ + segments: ["*", ...cp.segments], + })); + + if (changed) { + return [z.array(transformedItem), arrayPaths]; + } + return [schema, arrayPaths]; + } + + // 4) If it's a union, transform each option + if (schema instanceof z.ZodUnion) { + // Cast the union’s options to an array of ZodTypeAny + const unionOptions = schema._def.options as ZodTypeAny[]; + const newOptions: ZodTypeAny[] = []; + let changed = false; + let allPaths: ZodPathSegments[] = []; + + unionOptions.forEach((option: ZodTypeAny, idx: number) => { + const [newOption, childPaths] = transformSchema(option, [ + ...currentPath, + `union_${idx}`, + ]); + if (newOption !== option) { + changed = true; + } + newOptions.push(newOption); + allPaths = [...allPaths, ...childPaths]; + }); + + if (changed) { + // We assume at least two options remain: + return [ + z.union(newOptions as [ZodTypeAny, ZodTypeAny, ...ZodTypeAny[]]), + allPaths, + ]; + } + return [schema, allPaths]; + } + + // 5) If it's an intersection, transform left and right + if (schema instanceof z.ZodIntersection) { + const leftType = schema._def.left as ZodTypeAny; + const rightType = schema._def.right as ZodTypeAny; + + const [left, leftPaths] = transformSchema(leftType, [ + ...currentPath, + "intersection_left", + ]); + const [right, rightPaths] = transformSchema(rightType, [ + ...currentPath, + "intersection_right", + ]); + const changed = left !== leftType || right !== rightType; + const allPaths = [...leftPaths, ...rightPaths]; + if (changed) { + return [z.intersection(left, right), allPaths]; + } + return [schema, allPaths]; + } + + // 6) If it's optional, transform inner + if (schema instanceof z.ZodOptional) { + const innerType = schema._def.innerType as ZodTypeAny; + const [inner, innerPaths] = transformSchema(innerType, currentPath); + if (inner !== innerType) { + return [z.optional(inner), innerPaths]; + } + return [schema, innerPaths]; + } + + // 7) If it's nullable, transform inner + if (schema instanceof z.ZodNullable) { + const innerType = schema._def.innerType as ZodTypeAny; + const [inner, innerPaths] = transformSchema(innerType, currentPath); + if (inner !== innerType) { + return [z.nullable(inner), innerPaths]; + } + return [schema, innerPaths]; + } + + // 8) If it's an effect, transform base schema + if (schema instanceof z.ZodEffects) { + const baseSchema = schema._def.schema as ZodTypeAny; + const [newBaseSchema, basePaths] = transformSchema(baseSchema, currentPath); + if (newBaseSchema !== baseSchema) { + return [z.effect(newBaseSchema, schema._def.effect), basePaths]; + } + return [schema, basePaths]; + } + + // 9) If none of the above, return as-is + return [schema, []]; +} + +/** + * Once we get the final extracted object that has numeric IDs in place of URLs, + * use `injectUrls` to walk the object and replace numeric IDs + * with the real URL strings from idToUrlMapping. The `path` may include `*` + * for array indices (indicating "all items in the array"). + */ +export function injectUrls( + obj: unknown, + path: Array, + idToUrlMapping: Record, +): void { + if (path.length === 0) return; + const [key, ...rest] = path; + + if (key === "*") { + if (Array.isArray(obj)) { + for (const item of obj) { + injectUrls(item, rest, idToUrlMapping); + } + } + return; + } + + if (obj && typeof obj === "object") { + const record = obj as Record; + if (path.length === 1) { + const fieldValue = record[key]; + if (typeof fieldValue === "number") { + const mappedUrl = idToUrlMapping[String(fieldValue)]; + record[key] = mappedUrl ?? `UNMAPPED_ID_${fieldValue}`; + } + } else { + injectUrls(record[key], rest, idToUrlMapping); + } + } +} diff --git a/types/stagehand.ts b/types/stagehand.ts index 814ab0886..7fdc49a1d 100644 --- a/types/stagehand.ts +++ b/types/stagehand.ts @@ -270,3 +270,21 @@ export interface HistoryEntry { result: unknown; timestamp: string; } + +/** + * Represents a path through a Zod schema from the root object down to a + * particular field. The `segments` array describes the chain of keys/indices. + * + * - **String** segments indicate object property names. + * - **Number** segments indicate array indices. + * + * For example, `["users", 0, "homepage"]` might describe reaching + * the `homepage` field in `schema.users[0].homepage`. + */ +export interface ZodPathSegments { + /** + * The ordered list of keys/indices leading from the schema root + * to the targeted field. + */ + segments: Array; +} From e5115021610e8ecd252a2dae9a73a96d4691a22b Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 12:59:57 -0700 Subject: [PATCH 14/17] rm urlField from metadata schema --- lib/inference.ts | 8 -------- lib/prompt.ts | 3 --- 2 files changed, 11 deletions(-) diff --git a/lib/inference.ts b/lib/inference.ts index c5c6695f6..82565ceaf 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -84,11 +84,6 @@ export async function extract({ .describe( "true if the goal is now accomplished. Use this conservatively, only when sure that the goal has been completed.", ), - url_field: z - .string() - .describe( - "the name of the field within the user defined schema that holds IDs that map to URLs or links that they are trying to extract.", - ), }); type ExtractionResponse = z.infer; @@ -287,7 +282,6 @@ export async function extract({ data: { completed: metadataResponseCompleted, progress: metadataResponseProgress, - url_field: metadataUrlField, }, usage: metadataResponseUsage, } = metadataResponse as LLMParsedResponse; @@ -302,7 +296,6 @@ export async function extract({ modelResponse: "metadata", completed: metadataResponseCompleted, progress: metadataResponseProgress, - urlField: metadataUrlField, }, ); metadataResponseFile = fileName; @@ -339,7 +332,6 @@ export async function extract({ metadata: { completed: metadataResponseCompleted, progress: metadataResponseProgress, - urlField: metadataUrlField, }, prompt_tokens: totalPromptTokens, completion_tokens: totalCompletionTokens, diff --git a/lib/prompt.ts b/lib/prompt.ts index 77ac93da3..1c414d8aa 100644 --- a/lib/prompt.ts +++ b/lib/prompt.ts @@ -120,9 +120,6 @@ Refined content:`, const metadataSystemPrompt = `You are an AI assistant tasked with evaluating the progress and completion status of an extraction task. Analyze the extraction response and determine if the task is completed or if more information is needed. - -If a user's instruction indicates that they are attempting to extract links, you must indicate which field that they wish to have the links populated into. - Strictly abide by the following criteria: 1. Once the instruction has been satisfied by the current extraction response, ALWAYS set completion status to true and stop processing, regardless of remaining chunks. 2. Only set completion status to false if BOTH of these conditions are true: From 42ba19dfc765d23d42c77b8aa506cca31fb89225 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 13:02:12 -0700 Subject: [PATCH 15/17] empty string if no URL found for ID --- lib/handlers/extractHandler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 9bab1f214..085d09c79 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -887,7 +887,7 @@ export function injectUrls( const fieldValue = record[key]; if (typeof fieldValue === "number") { const mappedUrl = idToUrlMapping[String(fieldValue)]; - record[key] = mappedUrl ?? `UNMAPPED_ID_${fieldValue}`; + record[key] = mappedUrl ?? ``; } } else { injectUrls(record[key], rest, idToUrlMapping); From 87c7ddb66fa0a7bd23cd6bb4dbc3ce0484a38705 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 13:36:22 -0700 Subject: [PATCH 16/17] add eval --- evals/evals.config.json | 4 + evals/tasks/extract_jfk_links.ts | 125 +++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 evals/tasks/extract_jfk_links.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index 5e8d4c1f4..5c2e53753 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -309,6 +309,10 @@ { "name": "google_flights", "categories": ["act"] + }, + { + "name": "extract_jfk_links", + "categories": ["extract"] } ] } diff --git a/evals/tasks/extract_jfk_links.ts b/evals/tasks/extract_jfk_links.ts new file mode 100644 index 000000000..dc735c49b --- /dev/null +++ b/evals/tasks/extract_jfk_links.ts @@ -0,0 +1,125 @@ +import { EvalFunction } from "@/types/evals"; +import { z } from "zod"; + +export const extract_jfk_links: EvalFunction = async ({ + logger, + debugUrl, + sessionUrl, + stagehand, +}) => { + try { + await stagehand.page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/jfk/", + ); + + const extraction = await stagehand.page.extract({ + instruction: + "extract all the record file name and their corresponding links", + schema: z.object({ + records: z.array( + z.object({ + file_name: z.string().describe("the file name of the record"), + link: z.string().url(), + }), + ), + }), + }); + + // The list of records we expect to see + const expectedRecords = [ + { + file_name: "104-10003-10041.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10003-10041.pdf", + }, + { + file_name: "104-10004-10143 (C06932208).pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10004-10143%20(C06932208).pdf", + }, + { + file_name: "104-10004-10143.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10004-10143.pdf", + }, + { + file_name: "104-10004-10156.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10004-10156.pdf", + }, + { + file_name: "104-10004-10213.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10004-10213.pdf", + }, + { + file_name: "104-10005-10321.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10005-10321.pdf", + }, + { + file_name: "104-10006-10247.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10006-10247.pdf", + }, + { + file_name: "104-10007-10345.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10007-10345.pdf", + }, + { + file_name: "104-10009-10021.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10009-10021.pdf", + }, + { + file_name: "104-10009-10222.pdf", + link: "https://www.archives.gov/files/research/jfk/releases/2025/0318/104-10009-10222.pdf", + }, + ]; + + const extractedRecords = extraction.records; + + // Check that all expected records exist in the extraction + const missingRecords = expectedRecords.filter((expected) => { + return !extractedRecords.some( + (r) => r.file_name === expected.file_name && r.link === expected.link, + ); + }); + + // Check that the extraction array is exactly length 10 + if (extractedRecords.length !== 10) { + await stagehand.close(); + return { + _success: false, + reason: `Extraction has ${extractedRecords.length} records (expected 10).`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + if (missingRecords.length > 0) { + await stagehand.close(); + return { + _success: false, + reason: "Missing one or more expected records.", + missingRecords, + extractedRecords, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + + // If we reach here, the number of records is correct, and all are present + await stagehand.close(); + return { + _success: true, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + await stagehand.close(); + + return { + _success: false, + error: JSON.parse(JSON.stringify(error, null, 2)), + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } +}; From a6a1184e9d89b1ab9f131addaaf3aabb7d3ae47a Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Mon, 14 Apr 2025 13:54:28 -0700 Subject: [PATCH 17/17] add another eval --- evals/evals.config.json | 4 +++ evals/tasks/extract_single_link.ts | 52 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 evals/tasks/extract_single_link.ts diff --git a/evals/evals.config.json b/evals/evals.config.json index 5c2e53753..ade809c32 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -313,6 +313,10 @@ { "name": "extract_jfk_links", "categories": ["extract"] + }, + { + "name": "extract_single_link", + "categories": ["extract"] } ] } diff --git a/evals/tasks/extract_single_link.ts b/evals/tasks/extract_single_link.ts new file mode 100644 index 000000000..d2e19957f --- /dev/null +++ b/evals/tasks/extract_single_link.ts @@ -0,0 +1,52 @@ +import { EvalFunction } from "@/types/evals"; +import { z } from "zod"; + +export const extract_single_link: EvalFunction = async ({ + logger, + debugUrl, + sessionUrl, + stagehand, +}) => { + try { + await stagehand.page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/", + ); + + const extraction = await stagehand.page.extract({ + instruction: "extract the link to the 'contact us' page", + schema: z.object({ + link: z.string().url(), + }), + }); + + await stagehand.close(); + const extractedLink = extraction.link; + const expectedLink = + "https://browserbase.github.io/stagehand-eval-sites/sites/geniusee/#contact"; + + if (extractedLink === expectedLink) { + return { + _success: true, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + reason: `Extracted link: ${extractedLink} does not match expected link: ${expectedLink}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + await stagehand.close(); + return { + _success: false, + error: JSON.parse(JSON.stringify(error, null, 2)), + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } +};