From 33cc8dcccd2bc0494a29bb421721ced303f211fe Mon Sep 17 00:00:00 2001 From: lmmsoft Date: Sat, 6 Jun 2026 03:40:54 +0800 Subject: [PATCH 1/4] fix(doubao): refresh history and detail extraction Generated on: cmcc-i5 --- clis/doubao/history.js | 2 +- clis/doubao/utils.js | 235 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 226 insertions(+), 11 deletions(-) diff --git a/clis/doubao/history.js b/clis/doubao/history.js index 2c6a96977..5d1a7dc06 100644 --- a/clis/doubao/history.js +++ b/clis/doubao/history.js @@ -16,7 +16,7 @@ export const historyCommand = cli({ columns: ['Index', 'Id', 'Title', 'Url'], func: async (page, kwargs) => { const limit = parseInt(kwargs.limit, 10) || 50; - const conversations = await getDoubaoConversationList(page); + const conversations = await getDoubaoConversationList(page, { limit }); if (conversations.length === 0) { return [{ Index: 0, Id: '', Title: 'No conversation history found. Make sure you are logged in.', Url: '' }]; } diff --git a/clis/doubao/utils.js b/clis/doubao/utils.js index 23adcf1e2..7cee03ee5 100644 --- a/clis/doubao/utils.js +++ b/clis/doubao/utils.js @@ -819,22 +819,105 @@ export function collectDoubaoTranscriptAdditions(beforeLines, currentLines, prom .map(({ sanitized }) => sanitized) .join('\n'); } +function getRecentConversationsScript(limit) { + return ` + (async () => { + const clean = (value) => String(value || '').replace(/\\u00a0/g, ' ').replace(/\\s+/g, ' ').trim(); + const requestedLimit = Math.max(1, Math.min(Number(${JSON.stringify(limit)}) || 50, 1000)); + const resources = performance.getEntriesByType('resource') + .map((entry) => entry.name) + .filter((name) => typeof name === 'string'); + const recentUrl = [...resources].reverse().find((name) => name.includes('/im/chain/recent_conv')); + if (!recentUrl) return { ok: false, reason: 'recent_conv resource not found', conversations: [] }; + + const conversations = []; + const seen = new Set(); + let convVersion = 0; + let hasMore = true; + + for (let pageIndex = 0; pageIndex < 60 && hasMore && conversations.length < requestedLimit; pageIndex += 1) { + const batchLimit = Math.max(1, Math.min(50, requestedLimit - conversations.length)); + const body = { + cmd: 3200, + sequence_id: String(Date.now()) + '_' + pageIndex, + channel: 2, + version: '1', + uplink_body: { + pull_recent_conv_chain_uplink_body: { + api_version: 1, + conv_version: Number(convVersion) || 0, + direction: Number(convVersion) === 0 ? 3 : 1, + limit: batchLimit, + message_count_per_conv: 10, + option: { + not_need_message: true, + need_complete_conversation: true, + need_coco_conversation: Number(convVersion) === 0, + need_coco_bot: Number(convVersion) === 0, + need_pc_pin_chain: true, + pc_pin_query_type: 0, + }, + }, + }, + }; + const response = await fetch(recentUrl, { + method: 'POST', + headers: { 'Content-Type': 'application/json; encoding=utf-8' }, + body: JSON.stringify(body), + }); + const json = await response.json().catch(() => ({})); + if (json.status_code !== 0) { + return { + ok: false, + reason: json.status_desc || json.message || 'recent_conv request failed', + conversations, + }; + } + + const downlink = json.downlink_body?.pull_recent_conv_chain_downlink_body || {}; + for (const cell of downlink.cells || []) { + const conversation = cell?.conversation || {}; + const id = clean(conversation.conversation_id || cell?.id || ''); + if (!id || seen.has(id)) continue; + seen.add(id); + conversations.push({ + id, + title: clean(conversation.name || conversation.title || 'Untitled conversation').slice(0, 200), + href: '/chat/' + id, + updateTime: clean(conversation.update_time || ''), + latestIndex: clean(conversation.latest_index || ''), + botId: clean(conversation.bot_id || ''), + botType: conversation.bot_type ?? '', + }); + if (conversations.length >= requestedLimit) break; + } + + hasMore = Boolean(downlink.has_more); + convVersion = downlink.next_conv_version || 0; + if (!convVersion || !(downlink.cells || []).length) break; + } + + return { ok: true, conversations }; + })() + `; +} function getConversationListScript() { return ` (() => { - const sidebar = document.querySelector('[data-testid="flow_chat_sidebar"]'); + const sidebar = document.querySelector('[data-testid="flow_chat_sidebar"], #flow_chat_sidebar'); if (!sidebar) return []; const items = Array.from( - sidebar.querySelectorAll('a[data-testid="chat_list_thread_item"]') + sidebar.querySelectorAll('a[data-testid="chat_list_thread_item"], a[id^="conversation_"], a[href*="/chat/"]') ); return items .map(a => { const href = a.getAttribute('href') || ''; - const match = href.match(/\\/chat\\/(\\d{10,})/); - if (!match) return null; - const id = match[1]; + const idFromAttr = (a.getAttribute('id') || '').match(/^conversation_(\\d{10,})$/)?.[1] || ''; + const idFromHref = href.match(/\\/chat\\/(?:bot\\/chat\\/)?(\\d{10,})/)?.[1] || ''; + const id = idFromAttr || idFromHref; + if (!id) return null; const textContent = (a.textContent || a.innerText || '').trim(); const title = textContent .replace(/\\s+/g, ' ') @@ -845,8 +928,21 @@ function getConversationListScript() { })() `; } -export async function getDoubaoConversationList(page) { +export async function getDoubaoConversationList(page, options = {}) { await ensureDoubaoChatPage(page); + const requestedLimit = Math.max(1, parseInt(String(options.limit || '50'), 10) || 50); + const apiResult = await page.evaluate(getRecentConversationsScript(requestedLimit)).catch(() => null); + if (apiResult?.ok && Array.isArray(apiResult.conversations) && apiResult.conversations.length > 0) { + return apiResult.conversations.map((item) => ({ + Id: item.id, + Title: item.title || 'Untitled conversation', + Url: `${DOUBAO_CHAT_URL}/${item.id}`, + UpdateTime: item.updateTime, + LatestIndex: item.latestIndex, + BotId: item.botId, + BotType: item.botType, + })); + } const raw = await page.evaluate(getConversationListScript()); if (!Array.isArray(raw)) return []; @@ -863,9 +959,22 @@ export function parseDoubaoConversationId(input) { function getConversationDetailScript() { return ` (() => { - const clean = (v) => (v || '').replace(/\\u00a0/g, ' ').replace(/\\n{3,}/g, '\\n\\n').trim(); + const clean = (v) => (v || '') + .replace(/\\u00a0/g, ' ') + .replace(/\\n{3,}/g, '\\n\\n') + .trim(); - const messageList = document.querySelector('[data-testid="message-list"]'); + const isVisible = (el) => { + if (!(el instanceof HTMLElement)) return false; + const style = window.getComputedStyle(el); + if (style.display === 'none' || style.visibility === 'hidden') return false; + const rect = el.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + }; + + const messageList = document.querySelector( + '[data-testid="message-list"], .conversation-page-message-host, [class*="message-list-"]' + ); if (!messageList) return { messages: [], meeting: null }; const meetingCard = messageList.querySelector('[data-testid="meeting-minutes-card"]'); @@ -879,8 +988,114 @@ function getConversationDetailScript() { }; } + const roleFor = (root) => { + if ( + root.matches('[data-testid="send_message"], [class*="send-message"], [class*="justify-end"]') + || root.querySelector('[data-testid="send_message"], [class*="send-message"], [class*="bg-g-send-msg-bubble"]') + || root.querySelector('[data-foundation-type="send-message-action-bar"]') + ) { + return 'User'; + } + if ( + root.matches('[data-testid="receive_message"], [data-testid*="receive_message"], [class*="receive-message"]') + || root.querySelector('[data-testid="receive_message"], [data-testid*="receive_message"], [class*="receive-message"]') + || root.querySelector('[data-foundation-type="receive-message-action-bar"]') + || root.querySelector('.md-box-root, [class*="md-box-root"], .flow-markdown-body, [class*="markdown"]') + ) { + return 'Assistant'; + } + return ''; + }; + + const textSelectors = [ + '[data-testid="message_text_content"]', + '[data-testid="message_content"]', + '[data-testid*="message_text"]', + '[data-testid*="message_content"]', + '[class*="bg-g-send-msg-bubble"]', + '[class*="bg-g-receive-msg-bubble"]', + '.md-box-root', + '[class*="md-box-root"]', + '.flow-markdown-body', + '[class*="message-content"]', + ]; + + const extractImageLines = (root) => Array.from(root.querySelectorAll('img')) + .filter((img) => img instanceof HTMLImageElement && isVisible(img)) + .map((img) => { + const width = img.naturalWidth || img.width || 0; + const height = img.naturalHeight || img.height || 0; + if (width > 0 && height > 0 && width <= 48 && height <= 48) return ''; + const url = clean(img.currentSrc || img.src || ''); + return /^https?:\\/\\//i.test(url) ? 'Image: ' + url : ''; + }) + .filter((line, index, lines) => line && lines.indexOf(line) === index); + + const extractText = (root) => { + const chunks = []; + const seenText = new Set(); + for (const selector of textSelectors) { + const nodes = Array.from(root.querySelectorAll(selector)).filter(isVisible); + for (const node of nodes) { + const text = clean(node.innerText || node.textContent || ''); + if (!text || seenText.has(text)) continue; + seenText.add(text); + chunks.push(text); + } + if (chunks.length > 0) break; + } + const text = chunks.length > 0 ? clean(chunks.join('\\n')) : clean(root.innerText || root.textContent || ''); + const imageLines = extractImageLines(root); + return clean([text, ...imageLines].filter(Boolean).join('\\n')); + }; + + const roots = []; + const seenNodes = new Set(); + const selectors = [ + '[data-testid="union_message"]', + '[data-testid="message-block-container"]', + '.v_list_row [data-message-id]', + '[data-message-id]', + ]; + for (const selector of selectors) { + messageList.querySelectorAll(selector).forEach((node) => { + if (!(node instanceof HTMLElement) || seenNodes.has(node)) return; + seenNodes.add(node); + roots.push(node); + }); + } + + const filteredRoots = roots + .filter((node) => isVisible(node) && !node.closest('script, style, noscript')) + .filter((node, index, nodes) => !nodes.some((other, otherIndex) => otherIndex !== index && other.contains(node))); + + filteredRoots.sort((a, b) => { + if (a === b) return 0; + const pos = a.compareDocumentPosition(b); + return pos & Node.DOCUMENT_POSITION_FOLLOWING ? -1 : 1; + }); + + const deduped = []; + const seenMessages = new Set(); + for (const root of filteredRoots) { + const role = roleFor(root) || 'System'; + const text = extractText(root); + if (!text) continue; + const key = role + '::' + text; + if (seenMessages.has(key)) continue; + seenMessages.add(key); + deduped.push({ + role, + text, + hasMeetingCard: !!root.querySelector('[data-testid="meeting-minutes-card"]'), + }); + } + + const messages = deduped.filter((message) => message.text); + if (messages.length > 0) return { messages, meeting }; + const unions = Array.from(messageList.querySelectorAll('[data-testid="union_message"]')); - const messages = unions.map(u => { + const legacyMessages = unions.map(u => { const isSend = !!u.querySelector('[data-testid="send_message"]'); const isReceive = !!u.querySelector('[data-testid="receive_message"]'); const textEl = u.querySelector('[data-testid="message_text_content"]'); @@ -892,7 +1107,7 @@ function getConversationDetailScript() { }; }).filter(m => m.text); - return { messages, meeting }; + return { messages: legacyMessages, meeting }; })() `; } From c00b7f2dd4a1f0a34c8068110f3a04b0dccd41aa Mon Sep 17 00:00:00 2001 From: lmmsoft Date: Sat, 6 Jun 2026 09:08:10 +0800 Subject: [PATCH 2/4] fix(doubao): carry pin query type across history pages Generated on: cmcc-i5 --- clis/doubao/utils.js | 5 ++++- clis/doubao/utils.test.js | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clis/doubao/utils.js b/clis/doubao/utils.js index 7cee03ee5..305dc46d1 100644 --- a/clis/doubao/utils.js +++ b/clis/doubao/utils.js @@ -834,6 +834,7 @@ function getRecentConversationsScript(limit) { const seen = new Set(); let convVersion = 0; let hasMore = true; + let pcPinQueryType = 0; for (let pageIndex = 0; pageIndex < 60 && hasMore && conversations.length < requestedLimit; pageIndex += 1) { const batchLimit = Math.max(1, Math.min(50, requestedLimit - conversations.length)); @@ -855,7 +856,7 @@ function getRecentConversationsScript(limit) { need_coco_conversation: Number(convVersion) === 0, need_coco_bot: Number(convVersion) === 0, need_pc_pin_chain: true, - pc_pin_query_type: 0, + pc_pin_query_type: pcPinQueryType, }, }, }, @@ -894,6 +895,7 @@ function getRecentConversationsScript(limit) { hasMore = Boolean(downlink.has_more); convVersion = downlink.next_conv_version || 0; + pcPinQueryType = downlink.extra?.pc_pin_query_type ?? pcPinQueryType; if (!convVersion || !(downlink.cells || []).length) break; } @@ -1355,6 +1357,7 @@ export const __test__ = { clickSendButtonScript, composerStateScript, detectDoubaoVerificationScript, + getRecentConversationsScript, getTurnsScript, getTranscriptLinesScript, }; diff --git a/clis/doubao/utils.test.js b/clis/doubao/utils.test.js index b7f216ad4..b6cd52da4 100644 --- a/clis/doubao/utils.test.js +++ b/clis/doubao/utils.test.js @@ -275,6 +275,13 @@ describe('doubao receive strategy', () => { expect(transcriptScript).toContain('请仔细甄别'); expect(transcriptScript).toContain('下载电脑版'); }); + + it('carries the server pc_pin_query_type across recent-conversation pages', () => { + const recentScript = __test__.getRecentConversationsScript(100); + expect(recentScript).toContain('let pcPinQueryType = 0'); + expect(recentScript).toContain('pc_pin_query_type: pcPinQueryType'); + expect(recentScript).toContain('pcPinQueryType = downlink.extra?.pc_pin_query_type ?? pcPinQueryType'); + }); }); describe('collectDoubaoTranscriptAdditions', () => { it('ignores landing-page capability chips that are not assistant content', () => { From ddbea1b9ad1e78dd32af7b8fe7ccde094adbafe3 Mon Sep 17 00:00:00 2001 From: lmmsoft Date: Sat, 6 Jun 2026 11:53:15 +0800 Subject: [PATCH 3/4] Add doubao media download command Generated on: cmcc-i5 --- cli-manifest.json | 56 ++++++++++++ clis/doubao/download.js | 95 ++++++++++++++++++++ clis/doubao/download.test.js | 117 ++++++++++++++++++++++++ clis/doubao/utils.js | 168 +++++++++++++++++++++++++++++++++++ clis/doubao/utils.test.js | 69 ++++++++++++++ 5 files changed, 505 insertions(+) create mode 100644 clis/doubao/download.js create mode 100644 clis/doubao/download.test.js diff --git a/cli-manifest.json b/cli-manifest.json index 36d8255f5..9ccf26592 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -9756,6 +9756,62 @@ "navigateBefore": false, "siteSession": "persistent" }, + { + "site": "doubao", + "name": "download", + "description": "Download images and media from a Doubao conversation", + "access": "read", + "domain": "www.doubao.com", + "strategy": "cookie", + "browser": true, + "args": [ + { + "name": "id", + "type": "str", + "required": true, + "positional": true, + "help": "Conversation ID (numeric or full URL)" + }, + { + "name": "output", + "type": "str", + "default": "./doubao-downloads", + "required": false, + "help": "Output directory" + }, + { + "name": "variant", + "type": "str", + "default": "original", + "required": false, + "help": "Image variant: original, raw, preview, or thumb" + }, + { + "name": "limit", + "type": "str", + "default": "0", + "required": false, + "help": "Max media files to download; 0 means all" + }, + { + "name": "timeout", + "type": "str", + "default": "15000", + "required": false, + "help": "Per-file download timeout in milliseconds" + } + ], + "columns": [ + "index", + "type", + "status", + "size" + ], + "type": "js", + "modulePath": "doubao/download.js", + "sourceFile": "doubao/download.js", + "navigateBefore": false + }, { "site": "doubao", "name": "history", diff --git a/clis/doubao/download.js b/clis/doubao/download.js new file mode 100644 index 000000000..b520ae911 --- /dev/null +++ b/clis/doubao/download.js @@ -0,0 +1,95 @@ +import * as path from 'node:path'; +import { cli, Strategy } from '@jackwener/opencli/registry'; +import { formatCookieHeader } from '@jackwener/opencli/download'; +import { downloadMedia } from '@jackwener/opencli/download/media-download'; +import { ArgumentError } from '@jackwener/opencli/errors'; +import { DOUBAO_DOMAIN, getConversationAssets, parseDoubaoConversationId } from './utils.js'; + +const SUPPORTED_VARIANTS = new Set(['original', 'raw', 'preview', 'thumb']); + +function sanitizeFilenamePart(value) { + return String(value || '') + .replace(/[\\/:*?"<>|]+/g, '_') + .replace(/\s+/g, '_') + .replace(/^_+|_+$/g, '') + .slice(0, 120); +} + +function extensionFromAsset(asset) { + const format = String(asset.format || '').toLowerCase().replace(/[^a-z0-9]/g, ''); + if (format) { + if (format === 'jpeg') + return '.jpg'; + if (format === 'heic') + return '.heic'; + return `.${format}`; + } + try { + const ext = path.extname(new URL(asset.url).pathname).toLowerCase(); + if (ext) + return ext; + } + catch { } + return asset.type === 'video' ? '.mp4' : '.jpg'; +} + +function filenameForAsset(conversationId, asset, index) { + const rawStem = String(asset.resourceId || asset.identifier || asset.key || `${conversationId}_${index}`); + const basename = rawStem.split('/').filter(Boolean).pop() || rawStem; + const stem = sanitizeFilenamePart(basename.replace(/\.[a-z0-9]{2,5}$/i, '')); + const ext = extensionFromAsset(asset); + return `${String(index).padStart(3, '0')}_${stem || conversationId}${ext}`; +} + +export const downloadCommand = cli({ + site: 'doubao', + name: 'download', + access: 'read', + description: 'Download images and media from a Doubao conversation', + domain: DOUBAO_DOMAIN, + strategy: Strategy.COOKIE, + browser: true, + navigateBefore: false, + args: [ + { name: 'id', required: true, positional: true, help: 'Conversation ID (numeric or full URL)' }, + { name: 'output', required: false, default: './doubao-downloads', help: 'Output directory' }, + { name: 'variant', required: false, default: 'original', help: 'Image variant: original, raw, preview, or thumb' }, + { name: 'limit', required: false, default: '0', help: 'Max media files to download; 0 means all' }, + { name: 'timeout', required: false, default: '15000', help: 'Per-file download timeout in milliseconds' }, + ], + columns: ['index', 'type', 'status', 'size'], + func: async (page, kwargs) => { + const conversationId = parseDoubaoConversationId(String(kwargs.id || '')); + const output = String(kwargs.output || './doubao-downloads'); + const variant = String(kwargs.variant || 'original'); + if (!SUPPORTED_VARIANTS.has(variant)) { + throw new ArgumentError(`Invalid Doubao image variant: ${variant}`, 'Use original, raw, preview, or thumb.'); + } + const limit = parseInt(String(kwargs.limit || '0'), 10) || 0; + if (limit < 0) { + throw new ArgumentError(`Invalid Doubao media limit: ${kwargs.limit}`, 'Use 0 for all media, or a positive integer.'); + } + const timeout = parseInt(String(kwargs.timeout || '15000'), 10) || 15000; + if (timeout <= 0) { + throw new ArgumentError(`Invalid Doubao download timeout: ${kwargs.timeout}`, 'Use a positive timeout in milliseconds.'); + } + const assets = await getConversationAssets(page, conversationId, { variant }); + const selectedAssets = limit > 0 ? assets.slice(0, limit) : assets; + if (selectedAssets.length === 0) { + return [{ index: 0, type: '-', status: 'failed', size: 'No media found' }]; + } + const cookies = formatCookieHeader(await page.getCookies({ domain: 'doubao.com' })); + const mediaItems = selectedAssets.map((asset, index) => ({ + type: asset.type === 'video' ? 'video' : 'image', + url: asset.url, + filename: filenameForAsset(conversationId, asset, index + 1), + })); + return downloadMedia(mediaItems, { + output, + subdir: conversationId, + cookies, + filenamePrefix: conversationId, + timeout, + }); + }, +}); diff --git a/clis/doubao/download.test.js b/clis/doubao/download.test.js new file mode 100644 index 000000000..84e29f60b --- /dev/null +++ b/clis/doubao/download.test.js @@ -0,0 +1,117 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { mockDownloadMedia, mockFormatCookieHeader, mockGetConversationAssets } = vi.hoisted(() => ({ + mockDownloadMedia: vi.fn(), + mockFormatCookieHeader: vi.fn(() => 'sid=secret'), + mockGetConversationAssets: vi.fn(), +})); + +vi.mock('@jackwener/opencli/download/media-download', () => ({ + downloadMedia: mockDownloadMedia, +})); + +vi.mock('@jackwener/opencli/download', () => ({ + formatCookieHeader: mockFormatCookieHeader, +})); + +vi.mock('./utils.js', async () => { + const actual = await vi.importActual('./utils.js'); + return { + ...actual, + getConversationAssets: mockGetConversationAssets, + }; +}); + +import { getRegistry } from '@jackwener/opencli/registry'; +import './download.js'; + +function createPageMock() { + return { + getCookies: vi.fn().mockResolvedValue([{ name: 'sid', value: 'secret', domain: '.doubao.com' }]), + }; +} + +describe('doubao download', () => { + const download = getRegistry().get('doubao/download'); + + beforeEach(() => { + mockDownloadMedia.mockReset(); + mockFormatCookieHeader.mockClear(); + mockGetConversationAssets.mockReset(); + mockDownloadMedia.mockResolvedValue([{ index: 1, type: 'image', status: 'success', size: '2 MB' }]); + }); + + it('extracts media for a conversation URL and downloads into a conversation subdirectory', async () => { + const page = createPageMock(); + mockGetConversationAssets.mockResolvedValue([ + { + type: 'image', + url: 'https://p3-flow-imagex-sign.byteimg.com/tos-cn/example.jpeg?x-signature=abc', + key: 'tos-cn/example.jpeg', + format: 'jpeg', + }, + ]); + await download.func(page, { + id: 'https://www.doubao.com/chat/1234567890123', + output: './out', + }); + expect(mockGetConversationAssets).toHaveBeenCalledWith(page, '1234567890123', { variant: 'original' }); + expect(page.getCookies).toHaveBeenCalledWith({ domain: 'doubao.com' }); + expect(mockDownloadMedia).toHaveBeenCalledWith([ + { + type: 'image', + url: 'https://p3-flow-imagex-sign.byteimg.com/tos-cn/example.jpeg?x-signature=abc', + filename: '001_example.jpg', + }, + ], expect.objectContaining({ + output: './out', + subdir: '1234567890123', + cookies: 'sid=secret', + filenamePrefix: '1234567890123', + timeout: 15000, + })); + }); + + it('supports limiting the number of downloaded media items', async () => { + const page = createPageMock(); + mockGetConversationAssets.mockResolvedValue([ + { type: 'image', url: 'https://example.com/1.png', key: 'one.png' }, + { type: 'image', url: 'https://example.com/2.png', key: 'two.png' }, + ]); + await download.func(page, { id: '1234567890123', output: './out', limit: '1', timeout: '5000' }); + expect(mockDownloadMedia).toHaveBeenCalledWith([ + expect.objectContaining({ url: 'https://example.com/1.png' }), + ], expect.objectContaining({ + timeout: 5000, + })); + }); + + it('returns an explicit failed row when no media is present', async () => { + const page = createPageMock(); + mockGetConversationAssets.mockResolvedValue([]); + await expect(download.func(page, { id: '1234567890123' })).resolves.toEqual([ + { index: 0, type: '-', status: 'failed', size: 'No media found' }, + ]); + expect(mockDownloadMedia).not.toHaveBeenCalled(); + }); + + it('rejects unsupported image variants before browser work', async () => { + const page = createPageMock(); + await expect(download.func(page, { id: '1234567890123', variant: 'large' })).rejects.toMatchObject({ + code: 'ARGUMENT', + message: expect.stringContaining('Invalid Doubao image variant'), + }); + expect(mockGetConversationAssets).not.toHaveBeenCalled(); + }); + + it('rejects invalid limit and timeout values before browser work', async () => { + const page = createPageMock(); + await expect(download.func(page, { id: '1234567890123', limit: '-1' })).rejects.toMatchObject({ + code: 'ARGUMENT', + }); + await expect(download.func(page, { id: '1234567890123', timeout: '-1' })).rejects.toMatchObject({ + code: 'ARGUMENT', + }); + expect(mockGetConversationAssets).not.toHaveBeenCalled(); + }); +}); diff --git a/clis/doubao/utils.js b/clis/doubao/utils.js index 305dc46d1..24431f75c 100644 --- a/clis/doubao/utils.js +++ b/clis/doubao/utils.js @@ -1133,6 +1133,173 @@ export async function getConversationDetail(page, conversationId) { })); return { messages, meeting: raw.meeting }; } +function getConversationAssetsScript(conversationId, variant) { + return ` + (() => { + const conversationId = ${JSON.stringify(conversationId)}; + const variant = ${JSON.stringify(variant)}; + const assets = []; + const seen = new Set(); + + const clean = (value) => String(value || '').trim(); + const isHttpUrl = (value) => /^https?:\\/\\//i.test(clean(value)); + const push = (item) => { + const url = clean(item.url); + if (!isHttpUrl(url)) return; + const key = item.type + ':' + url; + if (seen.has(key)) return; + seen.add(key); + assets.push({ + type: item.type, + url, + key: clean(item.key), + label: clean(item.label), + width: Number(item.width) || 0, + height: Number(item.height) || 0, + resourceId: clean(item.resourceId), + identifier: clean(item.identifier), + format: clean(item.format), + }); + }; + + const pickUrlObject = (image) => { + const candidates = [ + ['raw', image.image_raw || image.raw_image], + ['original', image.image_ori || image.image_original], + ['preview', image.preview_img || image.image_preview], + ['thumb', image.image_thumb], + ['url', image], + ]; + const preferred = candidates.find(([name, value]) => name === variant && isHttpUrl(value?.url)); + if (preferred) return { label: preferred[0], value: preferred[1] }; + return candidates + .map(([name, value]) => ({ label: name, value })) + .find((item) => isHttpUrl(item.value?.url)); + }; + + const pushImage = (image, owner = {}) => { + if (!image || typeof image !== 'object') return; + const picked = pickUrlObject(image); + if (!picked) return; + push({ + type: 'image', + url: picked.value.url, + key: image.key || owner.key, + label: picked.label, + width: picked.value.width || image.width, + height: picked.value.height || image.height, + resourceId: image.resource_id || owner.resource_id, + identifier: image.identifier || owner.identifier, + format: picked.value.format || image.format, + }); + }; + + const looksLikeVideoUrl = (value) => /\\.(?:mp4|m3u8|webm)(?:[?#]|$)|\\/video\\//i.test(value); + const visit = (value, owner = {}) => { + if (!value) return; + + if (typeof value === 'string') { + const trimmed = value.trim(); + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + try { + visit(JSON.parse(trimmed), owner); + } catch {} + } + return; + } + + if (Array.isArray(value)) { + for (const item of value) visit(item, owner); + return; + } + + if (typeof value !== 'object') return; + const record = value; + const nextOwner = { + key: record.key || owner.key, + resource_id: record.resource_id || owner.resource_id, + identifier: record.identifier || owner.identifier, + }; + + if (record.entity_content?.image) pushImage(record.entity_content.image, nextOwner); + if (record.image) pushImage(record.image, nextOwner); + if (record.cover) pushImage(record.cover, nextOwner); + if ( + (record.image_ori || record.image_raw || record.raw_image || record.preview_img || record.image_thumb) + && (record.key || record.url || record.image_ori?.url || record.image_raw?.url || record.raw_image?.url) + ) { + pushImage(record, nextOwner); + } + + for (const [key, child] of Object.entries(record)) { + if (key === 'download_url' && isHttpUrl(child)) { + push({ + type: looksLikeVideoUrl(child) ? 'video' : 'image', + url: child, + key: record.vid || record.key || nextOwner.key, + label: key, + width: record.width, + height: record.height, + resourceId: record.resource_id || nextOwner.resource_id, + identifier: record.identifier || nextOwner.identifier, + format: record.video_type || record.format, + }); + continue; + } + + if ((key === 'main_url' || key.startsWith('backup_url')) && typeof child === 'string') { + try { + const decoded = atob(child); + if (isHttpUrl(decoded)) { + push({ + type: looksLikeVideoUrl(decoded) ? 'video' : 'image', + url: decoded, + key: record.file_id || record.vid || nextOwner.key, + label: key, + width: record.vwidth || record.width, + height: record.vheight || record.height, + resourceId: record.resource_id || nextOwner.resource_id, + identifier: record.identifier || nextOwner.identifier, + format: record.vtype || record.video_type || record.format, + }); + continue; + } + } catch {} + } + + visit(child, nextOwner); + } + }; + + const loaderData = window._ROUTER_DATA?.loaderData || {}; + const scoped = Object.entries(loaderData) + .filter(([key, value]) => key.includes(conversationId) || key.includes('chat_') || value?.messageList || value?.messages) + .map(([, value]) => value); + const roots = scoped.length > 0 ? scoped : [loaderData]; + for (const root of roots) visit(root); + + const domSeen = new Set(assets.map((item) => item.url)); + document.querySelectorAll('img').forEach((img) => { + const url = img.currentSrc || img.src || ''; + const width = img.naturalWidth || img.width || 0; + const height = img.naturalHeight || img.height || 0; + if (!isHttpUrl(url) || domSeen.has(url)) return; + if (width > 0 && height > 0 && width <= 64 && height <= 64) return; + push({ type: 'image', url, label: 'dom', width, height }); + }); + + return assets; + })() + `; +} +export async function getConversationAssets(page, conversationId, options = {}) { + const variant = ['original', 'raw', 'preview', 'thumb'].includes(options.variant) + ? options.variant + : 'original'; + await navigateToConversation(page, conversationId); + const assets = await page.evaluate(getConversationAssetsScript(conversationId, variant)); + return Array.isArray(assets) ? assets : []; +} // --------------------------------------------------------------------------- // Meeting minutes panel helpers // --------------------------------------------------------------------------- @@ -1358,6 +1525,7 @@ export const __test__ = { composerStateScript, detectDoubaoVerificationScript, getRecentConversationsScript, + getConversationAssetsScript, getTurnsScript, getTranscriptLinesScript, }; diff --git a/clis/doubao/utils.test.js b/clis/doubao/utils.test.js index b6cd52da4..8928a620d 100644 --- a/clis/doubao/utils.test.js +++ b/clis/doubao/utils.test.js @@ -1,5 +1,6 @@ import { JSDOM } from 'jsdom'; import { describe, expect, it, vi } from 'vitest'; +import { JSDOM } from 'jsdom'; import { CommandExecutionError } from '@jackwener/opencli/errors'; import { __test__, @@ -282,6 +283,74 @@ describe('doubao receive strategy', () => { expect(recentScript).toContain('pc_pin_query_type: pcPinQueryType'); expect(recentScript).toContain('pcPinQueryType = downlink.extra?.pc_pin_query_type ?? pcPinQueryType'); }); + + it('extracts image and video media from Doubao route data', () => { + const dom = new JSDOM('', { + url: 'https://www.doubao.com/chat/1234567890123', + runScripts: 'outside-only', + }); + dom.window._ROUTER_DATA = { + loaderData: { + 'chat_1234567890123/page': { + messageList: [ + { + entities: [ + { + entity_content: { + image: { + key: 'tos-cn-i-a9rns2rl98/example.jpeg', + image_ori: { + url: 'https://p3-flow-imagex-sign.byteimg.com/tos-cn-i-a9rns2rl98/example.jpeg?x-signature=abc', + width: 1440, + height: 1080, + format: 'jpeg', + }, + resource_id: 'resource-1', + }, + }, + identifier: 'identifier-1', + }, + ], + }, + { + content: { + creation_block: { + creations: [ + { + video: { + vid: 'video-1', + download_url: 'https://v.example.com/video/example.mp4', + video_type: 'mp4', + }, + }, + ], + }, + }, + }, + ], + }, + }, + }; + const assets = dom.window.eval(__test__.getConversationAssetsScript('1234567890123', 'original')); + expect(assets).toEqual(expect.arrayContaining([ + expect.objectContaining({ + type: 'image', + url: 'https://p3-flow-imagex-sign.byteimg.com/tos-cn-i-a9rns2rl98/example.jpeg?x-signature=abc', + key: 'tos-cn-i-a9rns2rl98/example.jpeg', + resourceId: 'resource-1', + }), + expect.objectContaining({ + type: 'video', + url: 'https://v.example.com/video/example.mp4', + key: 'video-1', + }), + expect.objectContaining({ + type: 'image', + url: 'https://example.com/dom.png', + label: 'dom', + }), + ])); + }); }); describe('collectDoubaoTranscriptAdditions', () => { it('ignores landing-page capability chips that are not assistant content', () => { From 0c431d29c3b7e0e36b16301b84162ae5824174f1 Mon Sep 17 00:00:00 2001 From: lmmsoft Date: Sat, 6 Jun 2026 13:09:37 +0800 Subject: [PATCH 4/4] Avoid stale Doubao media extraction Generated on: cmcc-i5 --- clis/doubao/utils.js | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/clis/doubao/utils.js b/clis/doubao/utils.js index 24431f75c..8b3987744 100644 --- a/clis/doubao/utils.js +++ b/clis/doubao/utils.js @@ -1272,19 +1272,41 @@ function getConversationAssetsScript(conversationId, variant) { }; const loaderData = window._ROUTER_DATA?.loaderData || {}; - const scoped = Object.entries(loaderData) - .filter(([key, value]) => key.includes(conversationId) || key.includes('chat_') || value?.messageList || value?.messages) - .map(([, value]) => value); - const roots = scoped.length > 0 ? scoped : [loaderData]; + const scoped = []; + const collectScoped = (value, key = '', depth = 0) => { + if (!value || typeof value !== 'object' || depth > 6) return; + if ( + key.includes(conversationId) + || value.conversationId === conversationId + || value.conversation_id === conversationId + || value.conversationInfo?.conversation_id === conversationId + ) { + scoped.push(value); + return; + } + for (const [childKey, childValue] of Object.entries(value)) { + collectScoped(childValue, childKey, depth + 1); + } + }; + collectScoped(loaderData); + + const roots = scoped.flatMap((root) => { + if (root?.messageList) return [root.messageList]; + if (root?.messages) return [root.messages]; + return [root]; + }); for (const root of roots) visit(root); const domSeen = new Set(assets.map((item) => item.url)); - document.querySelectorAll('img').forEach((img) => { + const messageList = document.querySelector('[data-testid="message-list"], .conversation-page-message-host, [class*="message-list-"]'); + const isIgnoredDomImage = (url) => !/^https?:\\/\\//i.test(url) + || /doubao_avatar|user-avatar|passport|FileBizType\\.BIZ_BOT_ICON|\\/chat\\/static\\/image\\/intro/i.test(url); + (messageList || document).querySelectorAll('img').forEach((img) => { const url = img.currentSrc || img.src || ''; const width = img.naturalWidth || img.width || 0; const height = img.naturalHeight || img.height || 0; - if (!isHttpUrl(url) || domSeen.has(url)) return; - if (width > 0 && height > 0 && width <= 64 && height <= 64) return; + if (isIgnoredDomImage(url) || domSeen.has(url)) return; + if (width > 0 && height > 0 && (width <= 256 || height <= 256)) return; push({ type: 'image', url, label: 'dom', width, height }); });