From 53d77543ecd2bacc41cf9c307c96f288dc523bad Mon Sep 17 00:00:00 2001
From: "J.Chen" <nehoo@live.com>
Date: Mon, 11 May 2026 02:38:06 +0800
Subject: [PATCH 1/3] fix(facebook/feed): add fallback extraction for empty
 article nodes

Add fallback extraction for Facebook feed posts when [role=article] nodes exist but contain empty text. Includes diagnostic errors, content/author cleanup, nested-container dedupe, and an evaluate-script syntax regression test.
---
 clis/facebook/feed.js      | 159 +++++++++++++++++++++++++++++--------
 clis/facebook/feed.test.js |  25 ++++++
 2 files changed, 150 insertions(+), 34 deletions(-)
 create mode 100644 clis/facebook/feed.test.js

diff --git a/clis/facebook/feed.js b/clis/facebook/feed.js
index a2bd538e0..ccf4ece5f 100644
--- a/clis/facebook/feed.js
+++ b/clis/facebook/feed.js
@@ -13,47 +13,138 @@ cli({
         { navigate: { url: 'https://www.facebook.com/', settleMs: 4000 } },
         { evaluate: `(() => {
   const limit = \${{ args.limit }};
-  const posts = document.querySelectorAll('[role="article"]');
-  return Array.from(posts)
+
+  // ── Primary extraction via [role="article"] ──────────────────────────
+  const articleNodes = document.querySelectorAll('[role="article"]');
+  const primaryPosts = Array.from(articleNodes)
     .filter(el => {
       const text = el.textContent.trim();
-      // Filter out "People you may know" suggestions (both CN and EN)
       return text.length > 30 &&
         !text.startsWith('可能认识') &&
         !text.startsWith('People you may know') &&
         !text.startsWith('People You May Know');
-    })
-    .slice(0, limit)
-    .map((el, i) => {
-      // Author from header link
-      const headerLink = el.querySelector('h2 a, h3 a, h4 a, strong a');
-      const author = headerLink ? headerLink.textContent.trim() : '';
-
-      // Post text: grab visible spans, filter noise
-      const spans = Array.from(el.querySelectorAll('div[dir="auto"]'))
-        .map(s => s.textContent.trim())
-        .filter(t => t.length > 10 && t.length < 500);
-      const content = spans.length > 0 ? spans[0] : '';
-
-      // Engagement: find like/comment/share counts (CN + EN)
-      const allText = el.textContent;
-      const likesMatch = allText.match(/所有心情：([\\d,.\\s]*[\\d万亿KMk]+)/) ||
-                         allText.match(/All:\\s*([\\d,.KMk]+)/) ||
-                         allText.match(/([\\d,.KMk]+)\\s*(?:likes?|reactions?)/i);
-      const commentsMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/) ||
-                            allText.match(/([\\d,.KMk]+)\\s*comments?/i);
-      const sharesMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/) ||
-                          allText.match(/([\\d,.KMk]+)\\s*shares?/i);
-
-      return {
-        index: i + 1,
-        author: author.substring(0, 50),
-        content: content.replace(/\\n/g, ' ').substring(0, 120),
-        likes: likesMatch ? likesMatch[1] : '-',
-        comments: commentsMatch ? commentsMatch[1] : '-',
-        shares: sharesMatch ? sharesMatch[1] : '-',
-      };
     });
+
+  // ── Fallback extraction via action buttons ────────────────────────────
+  // Facebook periodically restructures its DOM so [role="article"] nodes
+  // exist but have empty textContent. When that happens we locate post
+  // boundaries via the Like/Comment action buttons, then walk up the DOM
+  // to the nearest ancestor that contains meaningful text.
+  function fallbackExtract() {
+    const main = document.querySelector('[role="main"]');
+    if (!main) return null;
+
+    const likeSelectors = [
+      '[aria-label="Like"]', '[aria-label="赞"]',
+      '[aria-label="Comment"]', '[aria-label="评论"]',
+    ];
+    const actionButtons = Array.from(
+      main.querySelectorAll(likeSelectors.join(','))
+    );
+
+    const seen = new WeakSet();
+    const containers = [];
+    for (const btn of actionButtons) {
+      let node = btn.parentElement;
+      let found = null;
+      for (let depth = 0; depth < 20 && node; depth++, node = node.parentElement) {
+        if (node.textContent.trim().length >= 80) { found = node; break; }
+      }
+      if (!found || seen.has(found)) continue;
+      seen.add(found);
+      containers.push(found);
+    }
+    return containers.length ? containers : null;
+  }
+
+  // ── Extract fields from a post container ─────────────────────────────
+  function extractPost(el, i) {
+    // Try progressively broader selectors: heading links → role=link → any profile link → first substantial link
+    const authorLink =
+      el.querySelector('h2 a, h3 a, h4 a, strong a') ||
+      el.querySelector('a[href*="/"][role="link"]') ||
+      el.querySelector('a[href*="facebook.com/"]') ||
+      Array.from(el.querySelectorAll('a[href]')).find(a => {
+        const t = a.textContent.trim();
+        return t.length > 2 && t.length < 60 && !/^(like|comment|share|follow|\\d)/i.test(t);
+      });
+    // Fallback for sponsored posts where the advertiser name is not in a link
+    const author = (authorLink ? authorLink.textContent.trim() : '') ||
+      (() => {
+        const short = Array.from(el.querySelectorAll('[dir="auto"]'))
+          .map(s => s.textContent.trim())
+          .find(t => t.length > 2 && t.length <= 60 && !t.startsWith('#'));
+        return short || '';
+      })();
+
+    const seen = new Set();
+    const dirAutos = Array.from(el.querySelectorAll('[dir="auto"]'))
+      .map(s => s.textContent.trim())
+      .filter(t => t.length > 10 && t.length < 600 && !seen.has(t) && seen.add(t));
+    const content = dirAutos.join(' ');
+
+    const allText = el.textContent;
+    const likesMatch = allText.match(/所有心情：([\\d,.\\s]*[\\d万亿KMk]+)/) ||
+                       allText.match(/All:\\s*([\\d,.KMk]+)/) ||
+                       allText.match(/([\\d,.KMk]+)\\s*(?:likes?|reactions?)/i);
+    const commentsMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/) ||
+                          allText.match(/([\\d,.KMk]+)\\s*comments?/i);
+    const sharesMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/) ||
+                        allText.match(/([\\d,.KMk]+)\\s*shares?/i);
+
+    return {
+      index: i + 1,
+      author: author.substring(0, 50),
+      content: content.replace(/\\n/g, ' ').substring(0, 120),
+      likes: likesMatch ? likesMatch[1] : '-',
+      comments: commentsMatch ? commentsMatch[1] : '-',
+      shares: sharesMatch ? sharesMatch[1] : '-',
+    };
+  }
+
+  // ── Route: primary alone if sufficient, else supplement with fallback ──
+  const isNotSuggestion = el => {
+    const t = el.textContent.trim();
+    return !t.startsWith('可能认识') && !t.startsWith('People you may know') && !t.startsWith('People You May Know');
+  };
+
+  if (primaryPosts.length >= limit) {
+    return primaryPosts.slice(0, limit).map((el, i) => extractPost(el, i));
+  }
+
+  const fallbackContainers = fallbackExtract();
+  const fallbackPosts = fallbackContainers ? fallbackContainers.filter(isNotSuggestion) : [];
+
+  if (primaryPosts.length > 0 || fallbackPosts.length > 0) {
+    const primarySet = new WeakSet(primaryPosts);
+    const extra = fallbackPosts.filter(el => !primarySet.has(el));
+    const combined = [...primaryPosts, ...extra];
+    // Deduplicate nested containers of the same post: same-post ancestors
+    // share all [dir="auto"] blocks, so joining them gives a stable signature.
+    // Different posts by the same author differ in body text even if they
+    // share an author-name prefix, so they won't collide here.
+    const seenContent = new Set();
+    const deduped = combined.filter(el => {
+      const key = Array.from(el.querySelectorAll('[dir="auto"]'))
+        .map(s => s.textContent.trim()).filter(t => t.length > 5)
+        .join('|').substring(0, 200);
+      if (!key || seenContent.has(key)) return false;
+      seenContent.add(key);
+      return true;
+    });
+    return deduped.slice(0, limit).map((el, i) => extractPost(el, i));
+  }
+
+  // ── Diagnostic when both paths return nothing ─────────────────────────
+  const mainEl = document.querySelector('[role="main"]');
+  const articleCount = articleNodes.length;
+  const mainLen = mainEl ? mainEl.textContent.trim().length : 0;
+  throw new Error(
+    'facebook feed: no posts found. ' +
+    'article nodes=' + articleCount + ' (all empty text), ' +
+    'main textLength=' + mainLen + '. ' +
+    'The page may not be fully loaded or Facebook DOM changed again.'
+  );
 })()
 ` },
     ],
diff --git a/clis/facebook/feed.test.js b/clis/facebook/feed.test.js
new file mode 100644
index 000000000..1ae1f475f
--- /dev/null
+++ b/clis/facebook/feed.test.js
@@ -0,0 +1,25 @@
+/**
+ * Regression test: evaluate scripts inside template literals must produce
+ * syntactically valid JavaScript after framework placeholder substitution.
+ * Catches double-escaping bugs (\d, \s, \n) that typecheck cannot see
+ * because the code lives inside a string passed to page.evaluate.
+ */
+import { describe, expect, it } from 'vitest';
+import { getRegistry } from '@jackwener/opencli/registry';
+import './feed.js';
+
+describe('facebook feed evaluate script', () => {
+  it('produces valid JS after placeholder substitution', () => {
+    const cmd = getRegistry().get('facebook/feed');
+    expect(cmd).toBeDefined();
+
+    const evaluateStep = cmd.pipeline?.find(step => 'evaluate' in step);
+    expect(evaluateStep).toBeDefined();
+
+    // Replace framework placeholders ${{ expr }} with dummy values so
+    // new Function() can parse the script without substitution support.
+    const script = evaluateStep.evaluate.replace(/\$\{\{[^}]*\}\}/g, '10');
+
+    expect(() => new Function(`return (${script})`)).not.toThrow();
+  });
+});

From eba7698f0b24929c50f0c26768f8575f8e18c655 Mon Sep 17 00:00:00 2001
From: jackwener <jakevingoo@gmail.com>
Date: Thu, 14 May 2026 18:30:03 +0800
Subject: [PATCH 2/3] fix(facebook): bound feed fallback extraction

---
 cli-manifest.json          |   2 +-
 clis/facebook/feed.js      | 426 +++++++++++++++++++++++++------------
 clis/facebook/feed.test.js | 144 +++++++++++--
 3 files changed, 419 insertions(+), 153 deletions(-)

diff --git a/cli-manifest.json b/cli-manifest.json
index 17b77d182..20d77a567 100644
--- a/cli-manifest.json
+++ b/cli-manifest.json
@@ -9387,7 +9387,7 @@
     "type": "js",
     "modulePath": "facebook/feed.js",
     "sourceFile": "facebook/feed.js",
-    "navigateBefore": "https://www.facebook.com"
+    "navigateBefore": false
   },
   {
     "site": "facebook",
diff --git a/clis/facebook/feed.js b/clis/facebook/feed.js
index ccf4ece5f..b1db20c34 100644
--- a/clis/facebook/feed.js
+++ b/clis/facebook/feed.js
@@ -1,151 +1,305 @@
-import { cli } from '@jackwener/opencli/registry';
-cli({
-    site: 'facebook',
-    name: 'feed',
-    access: 'read',
-    description: 'Get your Facebook news feed',
-    domain: 'www.facebook.com',
-    args: [
-        { name: 'limit', type: 'int', default: 10, help: 'Number of posts' },
-    ],
-    columns: ['index', 'author', 'content', 'likes', 'comments', 'shares'],
-    pipeline: [
-        { navigate: { url: 'https://www.facebook.com/', settleMs: 4000 } },
-        { evaluate: `(() => {
-  const limit = \${{ args.limit }};
-
-  // ── Primary extraction via [role="article"] ──────────────────────────
-  const articleNodes = document.querySelectorAll('[role="article"]');
-  const primaryPosts = Array.from(articleNodes)
-    .filter(el => {
-      const text = el.textContent.trim();
-      return text.length > 30 &&
-        !text.startsWith('可能认识') &&
-        !text.startsWith('People you may know') &&
-        !text.startsWith('People You May Know');
-    });
-
-  // ── Fallback extraction via action buttons ────────────────────────────
-  // Facebook periodically restructures its DOM so [role="article"] nodes
-  // exist but have empty textContent. When that happens we locate post
-  // boundaries via the Like/Comment action buttons, then walk up the DOM
-  // to the nearest ancestor that contains meaningful text.
-  function fallbackExtract() {
-    const main = document.querySelector('[role="main"]');
-    if (!main) return null;
-
-    const likeSelectors = [
-      '[aria-label="Like"]', '[aria-label="赞"]',
-      '[aria-label="Comment"]', '[aria-label="评论"]',
-    ];
-    const actionButtons = Array.from(
-      main.querySelectorAll(likeSelectors.join(','))
-    );
+import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
+import { cli, Strategy } from '@jackwener/opencli/registry';
+
+const FACEBOOK_HOME = 'https://www.facebook.com/';
+const MAX_LIMIT = 50;
+
+function requireLimit(value) {
+  const n = Number(value);
+  if (!Number.isInteger(n) || n < 1 || n > MAX_LIMIT) {
+    throw new ArgumentError(`facebook feed --limit must be an integer between 1 and ${MAX_LIMIT}`);
+  }
+  return n;
+}
+
+function unwrapBrowserResult(value) {
+  if (value && typeof value === 'object' && 'data' in value) {
+    return value.data;
+  }
+  return value;
+}
+
+function buildFeedExtractScript(limit) {
+  return `(() => {
+    const limit = ${limit};
+
+    function clean(value) {
+      return String(value || '').replace(/\\s+/g, ' ').trim();
+    }
+
+    function textOf(el) {
+      return clean(el && el.textContent);
+    }
+
+    function labelOf(el) {
+      return clean(el && el.getAttribute && el.getAttribute('aria-label'));
+    }
+
+    function isAuthPage() {
+      const path = window.location && window.location.pathname ? window.location.pathname : '';
+      const body = textOf(document.body);
+      return /^\\/(login|checkpoint)(\\/|$|\\.php)/.test(path)
+        || /^(Log in to Facebook|Facebook登录|登录 Facebook)/i.test(body)
+        || /You must log in to continue/i.test(body);
+    }
+
+    function isExplicitEmptyFeed() {
+      const body = textOf(document.body);
+      return /No posts available|Nothing to show|暂无动态|没有更多动态|还没有帖子/i.test(body);
+    }
 
-    const seen = new WeakSet();
-    const containers = [];
-    for (const btn of actionButtons) {
-      let node = btn.parentElement;
-      let found = null;
-      for (let depth = 0; depth < 20 && node; depth++, node = node.parentElement) {
-        if (node.textContent.trim().length >= 80) { found = node; break; }
+    function isSuggestionOrChrome(text) {
+      return /^(People you may know|People You May Know|可能认识的人?|你可能认识的人?)/i.test(text)
+        || /^(Suggested for you|Suggested Groups|推荐小组|推荐内容)/i.test(text);
+    }
+
+    function isSponsored(text) {
+      return /(^|\\s)(Sponsored|赞助|广告)(\\s|$)/i.test(text);
+    }
+
+    function isActionText(text) {
+      return /^(Like|Comment|Share|Send|Follow|赞|评论|分享|发送|关注)$/i.test(text);
+    }
+
+    function isMetricText(text) {
+      return /^(All:|所有心情：)/i.test(text)
+        || /\\b(likes?|reactions?|comments?|shares?)\\b/i.test(text)
+        || /(条评论|次分享)$/.test(text);
+    }
+
+    function isTimestampText(text) {
+      return /^(\\d+\\s*(s|m|h|d|w|mo|yr|min|sec|second|minute|hour|day|week|month|year)s?|Just now|Yesterday|刚刚|昨天|\\d+小时|\\d+天)(\\s*[·•.])?$/i.test(text);
+    }
+
+    function postUrlFrom(root) {
+      const links = Array.from(root.querySelectorAll('a[href]'));
+      for (const link of links) {
+        const href = link.href || link.getAttribute('href') || '';
+        if (/\\/posts\\/|\\/permalink\\.php|\\/story\\.php|\\/photo\\/\\?fbid=|\\/groups\\/[^/]+\\/posts\\//i.test(href)) {
+          return href;
+        }
       }
-      if (!found || seen.has(found)) continue;
-      seen.add(found);
-      containers.push(found);
+      return '';
+    }
+
+    function actionKinds(root) {
+      const kinds = new Set();
+      for (const el of root.querySelectorAll('[aria-label]')) {
+        const label = labelOf(el);
+        if (/^(Like|赞)$/i.test(label)) kinds.add('like');
+        if (/^(Comment|评论)$/i.test(label)) kinds.add('comment');
+        if (/^(Share|分享)$/i.test(label)) kinds.add('share');
+      }
+      return kinds;
+    }
+
+    function visibleBlocks(root) {
+      const seen = new Set();
+      return Array.from(root.querySelectorAll('[dir="auto"]'))
+        .map(textOf)
+        .filter((text) => {
+          if (!text || text.length > 600 || seen.has(text)) return false;
+          seen.add(text);
+          return true;
+        });
+    }
+
+    function findAuthor(root) {
+      const links = [
+        root.querySelector('h2 a[href], h3 a[href], h4 a[href], strong a[href]'),
+        ...Array.from(root.querySelectorAll('a[role="link"][href]')),
+      ].filter(Boolean);
+      for (const link of links) {
+        const text = textOf(link);
+        const href = link.href || link.getAttribute('href') || '';
+        if (text.length > 1 && text.length <= 80
+          && !isActionText(text)
+          && !isMetricText(text)
+          && !isTimestampText(text)
+          && !/\\/groups\\/|\\/watch\\/|\\/reel\\/|\\/events\\/|\\/friends\\//i.test(href)) {
+          return text;
+        }
+      }
+      return '';
     }
-    return containers.length ? containers : null;
-  }
 
-  // ── Extract fields from a post container ─────────────────────────────
-  function extractPost(el, i) {
-    // Try progressively broader selectors: heading links → role=link → any profile link → first substantial link
-    const authorLink =
-      el.querySelector('h2 a, h3 a, h4 a, strong a') ||
-      el.querySelector('a[href*="/"][role="link"]') ||
-      el.querySelector('a[href*="facebook.com/"]') ||
-      Array.from(el.querySelectorAll('a[href]')).find(a => {
-        const t = a.textContent.trim();
-        return t.length > 2 && t.length < 60 && !/^(like|comment|share|follow|\\d)/i.test(t);
+    function contentBlocks(root, author) {
+      return visibleBlocks(root).filter((text) => {
+        if (text === author) return false;
+        if (text.length <= 10) return false;
+        if (isSuggestionOrChrome(text) || isSponsored(text)) return false;
+        if (isActionText(text) || isMetricText(text) || isTimestampText(text)) return false;
+        if (/^(See more|查看更多|更多)$/i.test(text)) return false;
+        return true;
       });
-    // Fallback for sponsored posts where the advertiser name is not in a link
-    const author = (authorLink ? authorLink.textContent.trim() : '') ||
-      (() => {
-        const short = Array.from(el.querySelectorAll('[dir="auto"]'))
-          .map(s => s.textContent.trim())
-          .find(t => t.length > 2 && t.length <= 60 && !t.startsWith('#'));
-        return short || '';
-      })();
-
-    const seen = new Set();
-    const dirAutos = Array.from(el.querySelectorAll('[dir="auto"]'))
-      .map(s => s.textContent.trim())
-      .filter(t => t.length > 10 && t.length < 600 && !seen.has(t) && seen.add(t));
-    const content = dirAutos.join(' ');
-
-    const allText = el.textContent;
-    const likesMatch = allText.match(/所有心情：([\\d,.\\s]*[\\d万亿KMk]+)/) ||
-                       allText.match(/All:\\s*([\\d,.KMk]+)/) ||
-                       allText.match(/([\\d,.KMk]+)\\s*(?:likes?|reactions?)/i);
-    const commentsMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/) ||
-                          allText.match(/([\\d,.KMk]+)\\s*comments?/i);
-    const sharesMatch = allText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/) ||
-                        allText.match(/([\\d,.KMk]+)\\s*shares?/i);
+    }
+
+    function extractPost(root, index) {
+      const fullText = textOf(root);
+      if (!fullText || isSuggestionOrChrome(fullText) || isSponsored(fullText)) return null;
+
+      const author = findAuthor(root);
+      const blocks = contentBlocks(root, author);
+      const content = clean(blocks.join(' '));
+      const postUrl = postUrlFrom(root);
+      const kinds = actionKinds(root);
+
+      if (!author && !content) return null;
+      if (!content && !postUrl && kinds.size < 2) return null;
+
+      const likesMatch = fullText.match(/所有心情：([\\d,.\\s]*[\\d万亿KMk]+)/)
+        || fullText.match(/All:\\s*([\\d,.KMk]+)/)
+        || fullText.match(/([\\d,.KMk]+)\\s*(?:likes?|reactions?)/i);
+      const commentsMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/)
+        || fullText.match(/([\\d,.KMk]+)\\s*comments?/i);
+      const sharesMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/)
+        || fullText.match(/([\\d,.KMk]+)\\s*shares?/i);
+
+      return {
+        index,
+        author: author.substring(0, 50),
+        content: content.substring(0, 120),
+        likes: likesMatch ? clean(likesMatch[1]) : '-',
+        comments: commentsMatch ? clean(commentsMatch[1]) : '-',
+        shares: sharesMatch ? clean(sharesMatch[1]) : '-',
+      };
+    }
+
+    function primaryContainers() {
+      return Array.from(document.querySelectorAll('[role="article"]'))
+        .filter((el) => textOf(el).length > 30);
+    }
+
+    function fallbackContainers() {
+      const main = document.querySelector('[role="main"]');
+      if (!main) return [];
+      const buttons = Array.from(main.querySelectorAll('[aria-label="Like"], [aria-label="赞"], [aria-label="Comment"], [aria-label="评论"], [aria-label="Share"], [aria-label="分享"]'));
+      const seen = new WeakSet();
+      const containers = [];
+      for (const button of buttons) {
+        let node = button.parentElement;
+        for (let depth = 0; depth < 16 && node && node !== main && node !== document.body; depth += 1, node = node.parentElement) {
+          const text = textOf(node);
+          const kinds = actionKinds(node);
+          const blocks = visibleBlocks(node);
+          const hasPostEvidence = Boolean(postUrlFrom(node)) || blocks.some((block) => block.length > 20 && !isActionText(block) && !isMetricText(block));
+          if (text.length >= 80 && kinds.has('like') && (kinds.has('comment') || kinds.has('share')) && hasPostEvidence) {
+            if (!seen.has(node)) {
+              seen.add(node);
+              containers.push(node);
+            }
+            break;
+          }
+        }
+      }
+      return containers;
+    }
+
+    function dedupe(containers) {
+      const seen = new Set();
+      const result = [];
+      for (const node of containers) {
+        const key = postUrlFrom(node) || contentBlocks(node, findAuthor(node)).join('|').substring(0, 200);
+        if (!key || seen.has(key)) continue;
+        seen.add(key);
+        result.push(node);
+      }
+      return result;
+    }
+
+    if (isAuthPage()) return { status: 'auth', rows: [], diagnostics: {} };
+
+    const primary = primaryContainers();
+    const combined = primary.length >= limit ? primary : dedupe([...primary, ...fallbackContainers()]);
+    const rows = [];
+    for (const container of combined) {
+      const row = extractPost(container, rows.length + 1);
+      if (row) rows.push(row);
+      if (rows.length >= limit) break;
+    }
 
     return {
-      index: i + 1,
-      author: author.substring(0, 50),
-      content: content.replace(/\\n/g, ' ').substring(0, 120),
-      likes: likesMatch ? likesMatch[1] : '-',
-      comments: commentsMatch ? commentsMatch[1] : '-',
-      shares: sharesMatch ? sharesMatch[1] : '-',
+      status: rows.length ? 'ok' : (isExplicitEmptyFeed() ? 'empty' : 'no_rows'),
+      rows,
+      diagnostics: {
+        articleCount: document.querySelectorAll('[role="article"]').length,
+        primaryCount: primary.length,
+        fallbackActionCount: document.querySelectorAll('[role="main"] [aria-label="Like"], [role="main"] [aria-label="赞"], [role="main"] [aria-label="Comment"], [role="main"] [aria-label="评论"]').length,
+        mainTextLength: textOf(document.querySelector('[role="main"]')).length,
+      },
     };
+  })()`;
+}
+
+async function getFacebookFeed(page, kwargs) {
+  const limit = requireLimit(kwargs.limit ?? 10);
+  try {
+    await page.goto(FACEBOOK_HOME, { settleMs: 4000 });
+  } catch (err) {
+    throw new CommandExecutionError(
+      `Failed to navigate to facebook feed: ${err instanceof Error ? err.message : err}`,
+      'Check that facebook.com is reachable and the browser extension is connected.',
+    );
   }
 
-  // ── Route: primary alone if sufficient, else supplement with fallback ──
-  const isNotSuggestion = el => {
-    const t = el.textContent.trim();
-    return !t.startsWith('可能认识') && !t.startsWith('People you may know') && !t.startsWith('People You May Know');
-  };
+  let payload;
+  try {
+    payload = unwrapBrowserResult(await page.evaluate(buildFeedExtractScript(limit)));
+  } catch (err) {
+    throw new CommandExecutionError(
+      `Failed to read facebook feed: ${err instanceof Error ? err.message : err}`,
+      'Facebook may not have rendered or the feed markup may have changed.',
+    );
+  }
+
+  if (!payload || typeof payload !== 'object' || !Array.isArray(payload.rows)) {
+    throw new CommandExecutionError('facebook feed returned malformed extraction payload');
+  }
 
-  if (primaryPosts.length >= limit) {
-    return primaryPosts.slice(0, limit).map((el, i) => extractPost(el, i));
+  if (payload.status === 'auth') {
+    throw new AuthRequiredError('www.facebook.com', 'Open Chrome and log in to Facebook before retrying.');
   }
 
-  const fallbackContainers = fallbackExtract();
-  const fallbackPosts = fallbackContainers ? fallbackContainers.filter(isNotSuggestion) : [];
-
-  if (primaryPosts.length > 0 || fallbackPosts.length > 0) {
-    const primarySet = new WeakSet(primaryPosts);
-    const extra = fallbackPosts.filter(el => !primarySet.has(el));
-    const combined = [...primaryPosts, ...extra];
-    // Deduplicate nested containers of the same post: same-post ancestors
-    // share all [dir="auto"] blocks, so joining them gives a stable signature.
-    // Different posts by the same author differ in body text even if they
-    // share an author-name prefix, so they won't collide here.
-    const seenContent = new Set();
-    const deduped = combined.filter(el => {
-      const key = Array.from(el.querySelectorAll('[dir="auto"]'))
-        .map(s => s.textContent.trim()).filter(t => t.length > 5)
-        .join('|').substring(0, 200);
-      if (!key || seenContent.has(key)) return false;
-      seenContent.add(key);
-      return true;
-    });
-    return deduped.slice(0, limit).map((el, i) => extractPost(el, i));
+  if (payload.rows.length > 0) {
+    return payload.rows;
   }
 
-  // ── Diagnostic when both paths return nothing ─────────────────────────
-  const mainEl = document.querySelector('[role="main"]');
-  const articleCount = articleNodes.length;
-  const mainLen = mainEl ? mainEl.textContent.trim().length : 0;
-  throw new Error(
-    'facebook feed: no posts found. ' +
-    'article nodes=' + articleCount + ' (all empty text), ' +
-    'main textLength=' + mainLen + '. ' +
-    'The page may not be fully loaded or Facebook DOM changed again.'
-  );
-})()
-` },
-    ],
-});
+  if (payload.status === 'empty') {
+    throw new EmptyResultError('facebook feed', 'Facebook did not show any feed posts for this account.');
+  }
+
+  const diagnostics = payload.diagnostics || {};
+  if (diagnostics.articleCount || diagnostics.fallbackActionCount || diagnostics.mainTextLength > 200) {
+    throw new CommandExecutionError(
+      'facebook feed page rendered but no feed rows could be extracted',
+      `Diagnostics: articles=${diagnostics.articleCount || 0}, actions=${diagnostics.fallbackActionCount || 0}, mainTextLength=${diagnostics.mainTextLength || 0}.`,
+    );
+  }
+
+  throw new EmptyResultError('facebook feed', 'No Facebook feed content was visible in the current browser session.');
+}
+
+const command = {
+  site: 'facebook',
+  name: 'feed',
+  access: 'read',
+  description: 'Get your Facebook news feed',
+  domain: 'www.facebook.com',
+  strategy: Strategy.COOKIE,
+  browser: true,
+  navigateBefore: false,
+  args: [
+    { name: 'limit', type: 'int', default: 10, help: 'Number of posts' },
+  ],
+  columns: ['index', 'author', 'content', 'likes', 'comments', 'shares'],
+  func: getFacebookFeed,
+};
+
+cli(command);
+
+export const __test__ = {
+  buildFeedExtractScript,
+  command,
+  getFacebookFeed,
+  requireLimit,
+};
diff --git a/clis/facebook/feed.test.js b/clis/facebook/feed.test.js
index 1ae1f475f..b836480de 100644
--- a/clis/facebook/feed.test.js
+++ b/clis/facebook/feed.test.js
@@ -1,25 +1,137 @@
-/**
- * Regression test: evaluate scripts inside template literals must produce
- * syntactically valid JavaScript after framework placeholder substitution.
- * Catches double-escaping bugs (\d, \s, \n) that typecheck cannot see
- * because the code lives inside a string passed to page.evaluate.
- */
-import { describe, expect, it } from 'vitest';
+import { describe, expect, it, vi } from 'vitest';
+import { JSDOM } from 'jsdom';
+import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
 import { getRegistry } from '@jackwener/opencli/registry';
-import './feed.js';
+import { __test__ } from './feed.js';
 
-describe('facebook feed evaluate script', () => {
-  it('produces valid JS after placeholder substitution', () => {
+function runExtract(html, limit = 10, url = 'https://www.facebook.com/') {
+  const dom = new JSDOM(html, { url });
+  return Function('window', 'document', `return ${__test__.buildFeedExtractScript(limit)};`)(dom.window, dom.window.document);
+}
+
+function createPage(payload) {
+  return {
+    goto: vi.fn().mockResolvedValue(undefined),
+    evaluate: vi.fn().mockResolvedValue(payload),
+  };
+}
+
+describe('facebook feed', () => {
+  it('registers the feed command with the existing row contract', () => {
     const cmd = getRegistry().get('facebook/feed');
     expect(cmd).toBeDefined();
+    expect(cmd.columns).toEqual(['index', 'author', 'content', 'likes', 'comments', 'shares']);
+  });
+
+  it('extracts existing role=article feed rows', () => {
+    const payload = runExtract(`
+      <main role="main">
+        <div role="article">
+          <h2><a href="https://www.facebook.com/alice">Alice Example</a></h2>
+          <div dir="auto">This is a normal Facebook feed post with enough text to extract.</div>
+          <span>All: 12</span>
+          <span>3 comments</span>
+          <span>2 shares</span>
+          <div aria-label="Like"></div><div aria-label="Comment"></div>
+        </div>
+      </main>
+    `);
+
+    expect(payload.status).toBe('ok');
+    expect(payload.rows).toEqual([{
+      index: 1,
+      author: 'Alice Example',
+      content: 'This is a normal Facebook feed post with enough text to extract.',
+      likes: '12',
+      comments: '3',
+      shares: '2',
+    }]);
+  });
+
+  it('falls back from empty article nodes to action-bounded feed containers', () => {
+    const payload = runExtract(`
+      <main role="main">
+        <div role="article"></div>
+        <section>
+          <div>
+            <h2><a href="https://www.facebook.com/bob/posts/123">Bob Builder</a></h2>
+            <div dir="auto">Fallback post body from a Facebook feed card with empty article text.</div>
+            <a href="https://www.facebook.com/bob/posts/123">Permalink</a>
+            <span>All: 1.2K</span>
+            <span>4 comments</span>
+            <span>1 shares</span>
+            <div><button aria-label="Like">Like</button><button aria-label="Comment">Comment</button></div>
+          </div>
+        </section>
+      </main>
+    `);
+
+    expect(payload.status).toBe('ok');
+    expect(payload.rows).toEqual([{
+      index: 1,
+      author: 'Bob Builder',
+      content: 'Fallback post body from a Facebook feed card with empty article text.',
+      likes: '1.2K',
+      comments: '4',
+      shares: '1',
+    }]);
+  });
 
-    const evaluateStep = cmd.pipeline?.find(step => 'evaluate' in step);
-    expect(evaluateStep).toBeDefined();
+  it('does not turn suggestions or side chrome action buttons into feed rows', () => {
+    const payload = runExtract(`
+      <main role="main">
+        <aside>
+          <h2>People you may know</h2>
+          <div dir="auto">Charlie Suggested</div>
+          <div dir="auto">Add friend from suggested people card with plenty of text.</div>
+          <button aria-label="Like">Like</button>
+          <button aria-label="Comment">Comment</button>
+        </aside>
+        <nav>
+          <div dir="auto">Navigation item with a Like button but not a feed post.</div>
+          <button aria-label="Like">Like</button>
+          <button aria-label="Comment">Comment</button>
+        </nav>
+      </main>
+    `);
 
-    // Replace framework placeholders ${{ expr }} with dummy values so
-    // new Function() can parse the script without substitution support.
-    const script = evaluateStep.evaluate.replace(/\$\{\{[^}]*\}\}/g, '10');
+    expect(payload.status).toBe('no_rows');
+    expect(payload.rows).toEqual([]);
+  });
+
+  it('reports auth pages from the browser extractor', () => {
+    const payload = runExtract('<main role="main">Log in to Facebook</main>', 10, 'https://www.facebook.com/login/');
+    expect(payload.status).toBe('auth');
+    expect(payload.rows).toEqual([]);
+  });
+
+  it('validates limit before browser navigation', async () => {
+    const page = createPage({ status: 'ok', rows: [] });
+    await expect(__test__.command.func(page, { limit: 0 })).rejects.toBeInstanceOf(ArgumentError);
+    expect(page.goto).not.toHaveBeenCalled();
+  });
+
+  it('maps browser envelopes and returns extracted rows', async () => {
+    const page = createPage({ session: 'site:facebook', data: { status: 'ok', rows: [{ index: 1, author: 'A', content: 'Body', likes: '-', comments: '-', shares: '-' }] } });
+
+    await expect(__test__.command.func(page, { limit: 1 })).resolves.toEqual([{
+      index: 1,
+      author: 'A',
+      content: 'Body',
+      likes: '-',
+      comments: '-',
+      shares: '-',
+    }]);
+  });
 
-    expect(() => new Function(`return (${script})`)).not.toThrow();
+  it('maps auth, real empty, parser drift, and malformed payloads to typed errors', async () => {
+    await expect(__test__.command.func(createPage({ status: 'auth', rows: [] }), { limit: 1 }))
+      .rejects.toBeInstanceOf(AuthRequiredError);
+    await expect(__test__.command.func(createPage({ status: 'empty', rows: [] }), { limit: 1 }))
+      .rejects.toBeInstanceOf(EmptyResultError);
+    await expect(__test__.command.func(createPage({ status: 'no_rows', rows: [], diagnostics: { articleCount: 1, fallbackActionCount: 2, mainTextLength: 500 } }), { limit: 1 }))
+      .rejects.toBeInstanceOf(CommandExecutionError);
+    await expect(__test__.command.func(createPage({ rows: null }), { limit: 1 }))
+      .rejects.toBeInstanceOf(CommandExecutionError);
   });
 });

From b57c257978ed3dfc92ed9bc55b46666e17015537 Mon Sep 17 00:00:00 2001
From: jackwener <jakevingoo@gmail.com>
Date: Thu, 14 May 2026 18:38:19 +0800
Subject: [PATCH 3/3] fix(facebook): keep feed fallback available after chrome
 articles

---
 clis/facebook/feed.js      | 12 ++++++------
 clis/facebook/feed.test.js | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/clis/facebook/feed.js b/clis/facebook/feed.js
index b1db20c34..1690a58d2 100644
--- a/clis/facebook/feed.js
+++ b/clis/facebook/feed.js
@@ -147,13 +147,13 @@ function buildFeedExtractScript(limit) {
       if (!author && !content) return null;
       if (!content && !postUrl && kinds.size < 2) return null;
 
-      const likesMatch = fullText.match(/所有心情：([\\d,.\\s]*[\\d万亿KMk]+)/)
-        || fullText.match(/All:\\s*([\\d,.KMk]+)/)
-        || fullText.match(/([\\d,.KMk]+)\\s*(?:likes?|reactions?)/i);
+      const likesMatch = fullText.match(/所有心情：\\s*(\\d[\\d,.\\s万亿KMk]*)/)
+        || fullText.match(/All:\\s*(\\d[\\d,.KMk]*)/)
+        || fullText.match(/(\\d[\\d,.KMk]*)\\s*(?:likes?|reactions?)/i);
       const commentsMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*条评论/)
-        || fullText.match(/([\\d,.KMk]+)\\s*comments?/i);
+        || fullText.match(/(\\d[\\d,.KMk]*)\\s*comments?/i);
       const sharesMatch = fullText.match(/([\\d,.]+\\s*[万亿]?)\\s*次分享/)
-        || fullText.match(/([\\d,.KMk]+)\\s*shares?/i);
+        || fullText.match(/(\\d[\\d,.KMk]*)\\s*shares?/i);
 
       return {
         index,
@@ -210,7 +210,7 @@ function buildFeedExtractScript(limit) {
     if (isAuthPage()) return { status: 'auth', rows: [], diagnostics: {} };
 
     const primary = primaryContainers();
-    const combined = primary.length >= limit ? primary : dedupe([...primary, ...fallbackContainers()]);
+    const combined = dedupe([...primary, ...fallbackContainers()]);
     const rows = [];
     for (const container of combined) {
       const row = extractPost(container, rows.length + 1);
diff --git a/clis/facebook/feed.test.js b/clis/facebook/feed.test.js
index b836480de..4bb6fd290 100644
--- a/clis/facebook/feed.test.js
+++ b/clis/facebook/feed.test.js
@@ -99,6 +99,38 @@ describe('facebook feed', () => {
     expect(payload.rows).toEqual([]);
   });
 
+  it('still considers bounded fallback rows when article nodes are suggestion chrome', () => {
+    const payload = runExtract(`
+      <main role="main">
+        <div role="article">
+          <h2>People you may know</h2>
+          <div dir="auto">Suggested profile card with enough text to look article-like.</div>
+          <button aria-label="Like">Like</button>
+          <button aria-label="Comment">Comment</button>
+        </div>
+        <section>
+          <div>
+            <h2><a href="https://www.facebook.com/dana/posts/456">Dana Poster</a></h2>
+            <div dir="auto">Fallback feed post should still be extracted after suggestion articles are filtered.</div>
+            <a href="https://www.facebook.com/dana/posts/456">Permalink</a>
+            <button aria-label="Like">Like</button>
+            <button aria-label="Comment">Comment</button>
+          </div>
+        </section>
+      </main>
+    `, 1);
+
+    expect(payload.status).toBe('ok');
+    expect(payload.rows).toEqual([{
+      index: 1,
+      author: 'Dana Poster',
+      content: 'Fallback feed post should still be extracted after suggestion articles are filtered.',
+      likes: '-',
+      comments: '-',
+      shares: '-',
+    }]);
+  });
+
   it('reports auth pages from the browser extractor', () => {
     const payload = runExtract('<main role="main">Log in to Facebook</main>', 10, 'https://www.facebook.com/login/');
     expect(payload.status).toBe('auth');