Skip to content

Commit 8c69013

Browse files
committed
feat(web-integration): enhance element caching by adding XPath support and improving cache hit logic
1 parent fe4e70d commit 8c69013

File tree

9 files changed

+156
-130
lines changed

9 files changed

+156
-130
lines changed

packages/core/src/insight/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ export default class Insight<
173173
const elements: BaseElement[] = [];
174174
(parseResult.elements || []).forEach((item) => {
175175
if ('id' in item) {
176-
const element = elementById(item.id);
176+
const element = elementById(item?.id);
177177

178178
if (!element) {
179179
console.warn(

packages/core/src/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ export interface ExecutionTaskApply<
383383
param: TaskParam,
384384
context: ExecutorContext,
385385
) => // biome-ignore lint/suspicious/noConfusingVoidType: <explanation>
386-
| Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>
386+
| Promise<ExecutionTaskReturn<TaskOutput, TaskLog> | undefined | void>
387387
| undefined
388388
| void;
389389
}

packages/shared/src/extractor/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,8 @@ export { extractTreeNodeAsString as webExtractNodeTreeAsString } from './web-ext
3030

3131
export { setNodeHashCacheListOnWindow } from './util';
3232

33-
export { getXpathsById, getElementInfoByNode } from './locator';
33+
export {
34+
getXpathsById,
35+
getElementInfoByNode,
36+
getElementInfoByXpath,
37+
} from './locator';

packages/shared/src/extractor/locator.ts

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@ const findFirstAncestorWithId = (element: Element): Element | null => {
3232
};
3333

3434
const getElementXPath = (element: Node): string => {
35+
// 处理文本节点
36+
if (element.nodeType === Node.TEXT_NODE) {
37+
// 获取父元素的 XPath
38+
const parentNode = element.parentNode;
39+
if (parentNode && parentNode.nodeType === Node.ELEMENT_NODE) {
40+
const parentXPath = getElementXPath(parentNode);
41+
return `${parentXPath}/text()`;
42+
}
43+
return '';
44+
}
45+
3546
if (element.nodeType !== Node.ELEMENT_NODE) return '';
3647

3748
const el = element as Element;
@@ -99,19 +110,37 @@ function generateXPaths(node: Node | null): string[] {
99110
}
100111

101112
export function getXpathsById(id: string): string[] | null {
102-
let node = getNodeFromCacheList(id);
113+
const node = getNodeFromCacheList(id);
103114

104115
if (!node) {
105116
return null;
106117
}
107118

108-
if (isTextElement(node)) {
109-
node = node.parentElement;
110-
}
111-
112119
return generateXPaths(node);
113120
}
114121

115122
export function getElementInfoByNode(node: Node): ElementInfo | null {
116123
return collectElementInfo(node, window, document);
117124
}
125+
126+
export function getElementInfoByXpath(xpath: string): ElementInfo | null {
127+
const xpathResult = document.evaluate(
128+
xpath,
129+
document,
130+
null,
131+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
132+
null,
133+
);
134+
135+
if (xpathResult.snapshotLength !== 1) {
136+
return null;
137+
}
138+
139+
const node = xpathResult.snapshotItem(0);
140+
141+
if (!node) {
142+
return null;
143+
}
144+
145+
return collectElementInfo(node, window, document);
146+
}

packages/web-integration/src/common/task-cache.ts

Lines changed: 20 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import { existsSync, readFileSync } from 'node:fs';
22
import { join } from 'node:path';
3-
import type {
4-
AIElementLocatorResponse,
5-
LocateResultElement,
6-
PlanningAIResponse,
7-
} from '@midscene/core';
3+
import type { LocateResultElement, PlanningAIResponse } from '@midscene/core';
84
import type { vlmPlanning } from '@midscene/core/ai-model';
95
import { stringifyDumpData, writeLogFile } from '@midscene/core/utils';
106
import { getMidsceneRunSubDir } from '@midscene/shared/common';
@@ -13,10 +9,12 @@ import { getRunningPkgInfo } from '@midscene/shared/fs';
139
import { getDebug } from '@midscene/shared/logger';
1410
import { ifInBrowser } from '@midscene/shared/utils';
1511
import semver from 'semver';
16-
import type { PlaywrightWebPage } from '../playwright';
17-
import type { PuppeteerWebPage } from '../puppeteer';
1812
import type { WebPage } from './page';
19-
import { type WebUIContext, replaceIllegalPathCharsAndSpace } from './utils';
13+
import {
14+
type WebUIContext,
15+
checkElementExistsByXPath,
16+
replaceIllegalPathCharsAndSpace,
17+
} from './utils';
2018
const debug = getDebug('cache');
2119

2220
export type PlanTask = {
@@ -55,7 +53,9 @@ export type LocateTask = {
5553
height: number;
5654
};
5755
};
58-
response: AIElementLocatorResponse;
56+
response: {
57+
xpaths: string[];
58+
};
5959
element: LocateResultElement;
6060
};
6161

@@ -190,24 +190,6 @@ export class TaskCache {
190190
* @param userPrompt String type, representing user prompt information
191191
* @return Returns a Promise object that resolves to a boolean or object
192192
*/
193-
async matchCache(
194-
pageContext: WebUIContext,
195-
type: 'plan',
196-
userPrompt: string,
197-
cacheGroup: AiTasks,
198-
): Promise<PlanTask['response']>;
199-
async matchCache(
200-
pageContext: WebUIContext,
201-
type: 'ui-tars-plan',
202-
userPrompt: string,
203-
cacheGroup: AiTasks,
204-
): Promise<UITarsPlanTask['response']>;
205-
async matchCache(
206-
pageContext: WebUIContext,
207-
type: 'locate',
208-
userPrompt: string,
209-
cacheGroup: AiTasks,
210-
): Promise<LocateTask['response']>;
211193
async matchCache(
212194
pageContext: WebUIContext,
213195
type: 'plan' | 'locate' | 'ui-tars-plan',
@@ -243,69 +225,24 @@ export class TaskCache {
243225

244226
// The corresponding element cannot be found in the new context
245227
if (taskRes?.type === 'locate') {
246-
const xpaths = taskRes.response?.elements?.[0]?.xpaths;
228+
const xpaths = taskRes.response?.xpaths;
247229

248230
if (!xpaths || !xpaths.length) {
249231
debug('no xpaths in cached response');
250232
return false;
251233
}
252234

253-
for (const xpath of xpaths) {
254-
if (this.page.pageType === 'playwright') {
255-
try {
256-
const playwrightPage = (this.page as PlaywrightWebPage)
257-
.underlyingPage;
258-
const xpathLocator = playwrightPage.locator(`xpath=${xpath}`);
259-
const xpathCount = await xpathLocator.count();
260-
if (xpathCount === 1) {
261-
debug(
262-
'cache hit, type: %s, prompt: %s, xpath: %s',
263-
type,
264-
userPrompt,
265-
xpath,
266-
);
267-
const xpathElement = await xpathLocator.first();
268-
await xpathElement.evaluate((element) => {
269-
element.setAttribute('data-midscene', 'cache-hit');
270-
element.scrollIntoView();
271-
});
272-
return taskRes.response;
273-
}
274-
} catch (error) {
275-
debug('playwright xpath locator error', error);
276-
}
277-
} else if (this.page.pageType === 'puppeteer') {
278-
try {
279-
const puppeteerPage = (this.page as PuppeteerWebPage)
280-
.underlyingPage;
281-
const xpathElements = await puppeteerPage.$$(`xpath=${xpath}`);
282-
if (xpathElements && xpathElements.length === 1) {
283-
debug(
284-
'cache hit, type: %s, prompt: %s, xpath: %s',
285-
type,
286-
userPrompt,
287-
xpath,
288-
);
289-
await xpathElements[0].evaluate((element) => {
290-
element.setAttribute('data-midscene', 'cache-hit');
291-
element.scrollIntoView();
292-
});
293-
return taskRes.response;
294-
}
295-
} catch (error) {
296-
debug('puppeteer xpath locator error', error);
297-
}
298-
} else {
299-
debug('unknown page type, will not match cache', {
300-
pageType: this.page.pageType,
301-
});
302-
}
303-
304-
debug('cannot match element with same id in current page', {
305-
element: taskRes.element,
306-
});
307-
return false;
235+
const elementExists = await checkElementExistsByXPath(
236+
this.page,
237+
xpaths,
238+
{ type, userPrompt, debug },
239+
);
240+
241+
if (elementExists) {
242+
return taskRes.response;
308243
}
244+
245+
return false;
309246
}
310247

311248
if (taskRes && taskRes.type === type && taskRes.prompt === userPrompt) {

packages/web-integration/src/common/tasks.ts

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -196,21 +196,21 @@ export class PageTaskExecutor {
196196
cachePrompt,
197197
);
198198
let elementInfo: ElementInfo | null = null;
199-
const oldId = locateCache?.elements?.[0]?.id;
199+
const xpaths = locateCache?.xpaths;
200200
let newId = null;
201201
try {
202-
if (oldId) {
202+
if (xpaths?.length) {
203203
// hit cache, use new id
204204
const elementInfosScriptContent =
205205
getElementInfosScriptContent();
206206
elementInfo = await this.page.evaluateJavaScript?.(
207-
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByNode(document.querySelector('[data-midscene="cache-hit"]'))`,
207+
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('${xpaths[0]}')`,
208208
);
209209
newId = elementInfo?.id;
210-
debug('get new id by node', newId);
210+
debug('get new id by xpath', elementInfo?.id);
211211
}
212212
} catch (error) {
213-
debug('get element info by node error: ', error);
213+
debug('get element info by xpath error: ', error);
214214
}
215215
let cacheHitFlag = false;
216216

@@ -253,12 +253,7 @@ export class PageTaskExecutor {
253253
},
254254
prompt: cachePrompt,
255255
response: {
256-
elements: [
257-
{
258-
id: newId || element.id,
259-
xpaths,
260-
},
261-
],
256+
xpaths,
262257
},
263258
element,
264259
});

packages/web-integration/src/common/utils.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import { assert, logMsg, uuid } from '@midscene/shared/utils';
1414
import dayjs from 'dayjs';
1515
import type { Page as PlaywrightPage } from 'playwright';
1616
import type { Page as PuppeteerPage } from 'puppeteer';
17+
import type { PlaywrightWebPage } from '../playwright';
18+
import type { PuppeteerWebPage } from '../puppeteer';
1719
import { WebElementInfo } from '../web-element';
1820
import type { WebPage } from './page';
1921

@@ -183,3 +185,76 @@ export function forceClosePopup(
183185
}
184186
});
185187
}
188+
189+
function replaceTextInXPath(xpath: string) {
190+
return xpath.replace('/text()', ''); // /text() can't get locator
191+
}
192+
193+
export async function checkElementExistsByXPath(
194+
page: WebPage,
195+
xpaths: string[],
196+
logData: {
197+
type: string;
198+
userPrompt: string;
199+
debug: (...args: any[]) => void;
200+
},
201+
): Promise<boolean> {
202+
const { type, userPrompt, debug } = logData;
203+
204+
for (const xpath of xpaths) {
205+
if (page.pageType === 'playwright') {
206+
try {
207+
const playwrightPage = (page as PlaywrightWebPage).underlyingPage;
208+
const xpathLocator = playwrightPage.locator(
209+
`xpath=${replaceTextInXPath(xpath)}`,
210+
);
211+
const xpathCount = await xpathLocator.count();
212+
if (xpathCount === 1) {
213+
debug(
214+
'cache hit, type: %s, prompt: %s, xpath: %s',
215+
type,
216+
userPrompt,
217+
xpath,
218+
);
219+
const xpathElement = await xpathLocator.first();
220+
await xpathElement.evaluate((element: Element) => {
221+
element.scrollIntoView();
222+
element.setAttribute('data-midscene', 'cache-hit');
223+
});
224+
return true;
225+
}
226+
} catch (error) {
227+
debug('playwright xpath locator error', error);
228+
}
229+
} else if (page.pageType === 'puppeteer') {
230+
try {
231+
const puppeteerPage = (page as PuppeteerWebPage).underlyingPage;
232+
const xpathElements = await puppeteerPage.$$(
233+
`xpath=${replaceTextInXPath(xpath)}`,
234+
);
235+
if (xpathElements && xpathElements.length === 1) {
236+
debug(
237+
'cache hit, type: %s, prompt: %s, xpath: %s',
238+
type,
239+
userPrompt,
240+
xpath,
241+
);
242+
await xpathElements[0].evaluate((element: Element) => {
243+
element.scrollIntoView();
244+
element.setAttribute('data-midscene', 'cache-hit');
245+
});
246+
return true;
247+
}
248+
} catch (error) {
249+
debug('puppeteer xpath locator error', error);
250+
}
251+
} else {
252+
debug('unknown page type, will not match cache', {
253+
pageType: page.pageType,
254+
});
255+
}
256+
}
257+
258+
debug('cannot match element with same id in current page');
259+
return false;
260+
}

packages/web-integration/tests/ai/web/puppeteer/open-new-tab.test.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { PuppeteerAgent } from '@/puppeteer';
22
import { sleep } from '@midscene/core/utils';
3-
import { afterEach, describe, expect, it, vi } from 'vitest';
3+
import { afterEach, describe, it, vi } from 'vitest';
44
import { launchPage } from './utils';
55

66
vi.setConfig({
@@ -16,7 +16,9 @@ describe('agent with forceSameTabNavigation', () => {
1616
});
1717

1818
it('open new tab', async () => {
19-
const { originPage, reset } = await launchPage('https://www.bing.com/');
19+
const { originPage, reset } = await launchPage('https://www.bing.com/', {
20+
headless: false,
21+
});
2022
resetFn = reset;
2123
const agent = new PuppeteerAgent(originPage, {
2224
cacheId: 'puppeteer-open-new-tab',

0 commit comments

Comments
 (0)