diff --git a/src/cdn-logs-report/sql/agentic-hits-for-urls.sql b/src/cdn-logs-report/sql/agentic-hits-for-urls.sql new file mode 100644 index 000000000..64b6ac6bc --- /dev/null +++ b/src/cdn-logs-report/sql/agentic-hits-for-urls.sql @@ -0,0 +1,8 @@ +SELECT + url, + SUM(number_of_hits) AS number_of_hits +FROM {{databaseName}}.{{tableName}} +{{whereClause}} +AND url IN ({{urlList}}) +GROUP BY url; + diff --git a/src/cdn-logs-report/utils/query-builder.js b/src/cdn-logs-report/utils/query-builder.js index 0c6e309ec..3251ac994 100644 --- a/src/cdn-logs-report/utils/query-builder.js +++ b/src/cdn-logs-report/utils/query-builder.js @@ -161,8 +161,40 @@ async function createTopUrlsQuery(options) { }); } +/** + * Build an Athena query to fetch agentic hits for a specific set of URL paths. + * The passed urlPaths should be path-only (e.g., "/foo/bar"), matching the "url" field in logs. + */ +export async function createAgenticHitsForUrlsQuery(options) { + const { + periods, databaseName, tableName, site, urlPaths, + } = options; + + const filters = site.getConfig().getLlmoCdnlogsFilter(); + const siteFilters = buildSiteFilters(filters, site); + const lastWeek = periods.weeks[periods.weeks.length - 1]; + const whereClause = buildWhereClause( + [buildDateFilter(lastWeek.startDate, lastWeek.endDate)], + siteFilters, + ); + + // Escape single quotes inside paths for SQL safety + const escapeSql = (s) => String(s).replace(/'/g, "''"); + const urlList = urlPaths && urlPaths.length > 0 + ? urlPaths.map((p) => `'${escapeSql(p)}'`).join(', ') + : ''; // will produce empty IN() which returns no results + + return loadSql('agentic-hits-for-urls', { + databaseName, + tableName, + whereClause, + urlList, + }); +} + export const weeklyBreakdownQueries = { createAgenticReportQuery, createReferralReportQuery, createTopUrlsQuery, + createAgenticHitsForUrlsQuery, }; diff --git a/src/prerender/handler.js b/src/prerender/handler.js index 8eb5207de..a17533238 100644 --- a/src/prerender/handler.js +++ b/src/prerender/handler.js @@ -12,17 +12,258 @@ import { PutObjectCommand } from '@aws-sdk/client-s3'; import { Audit, Suggestion } from '@adobe/spacecat-shared-data-access'; +import { AWSAthenaClient } from '@adobe/spacecat-shared-athena-client'; +import ExcelJS from 'exceljs'; import { AuditBuilder } from '../common/audit-builder.js'; import { convertToOpportunity } from '../common/opportunity.js'; import { syncSuggestions } from '../utils/data-access.js'; import { getObjectFromKey } from '../utils/s3-utils.js'; +import { createLLMOSharepointClient, readFromSharePoint } from '../utils/report-uploader.js'; import { createOpportunityData } from './opportunity-data-mapper.js'; -import { analyzeHtmlForPrerender } from './html-comparator-utils.js'; +import { analyzeHtmlForPrerender } from './utils/html-comparator.js'; +import { + generateReportingPeriods, + getS3Config, + downloadExistingCdnSheet, + weeklyBreakdownQueries, +} from './utils/shared.js'; +import { CONTENT_GAIN_THRESHOLD } from './utils/constants.js'; const AUDIT_TYPE = Audit.AUDIT_TYPES.PRERENDER; const { AUDIT_STEP_DESTINATIONS } = Audit; +const TOP_AGENTIC_URLS_LIMIT = 50; -const CONTENT_GAIN_THRESHOLD = 1.1; +/** + * Fetch top Agentic URLs using Athena (preferred). + * Groups by URL across agentic rows, filters out pooled 'Other', sorts by hits. + * @param {any} site + * @param {any} context + * @param {number} limit + * @returns {Promise} + */ +async function getTopAgenticUrlsFromAthena(site, context, limit = 200) { + const { log } = context; + try { + const s3Config = await getS3Config(site, context); + const periods = generateReportingPeriods(); + const athenaClient = AWSAthenaClient.fromContext(context, s3Config.getAthenaTempLocation()); + // Build query using agentic report query builder (date-range + LLM UA + site filters) + const query = await weeklyBreakdownQueries.createAgenticReportQuery({ + periods, + databaseName: s3Config.databaseName, + tableName: s3Config.tableName, + site, + }); + log.info('[PRERENDER] Executing Athena query for top agentic URLs...'); + const results = await athenaClient.query( + query, + s3Config.databaseName, + '[Athena Query] Prerender - Top Agentic URLs', + ); + + if (!Array.isArray(results) || results.length === 0) { + log.warn('[PRERENDER] Athena returned no agentic rows.'); + return []; + } + + // Aggregate by URL + const byUrl = new Map(); + for (const row of results) { + const url = row?.url || ''; + const hits = Number(row?.number_of_hits || 0) || 0; + if (url && url !== 'Other') { + const prev = byUrl.get(url) || 0; + byUrl.set(url, prev + hits); + } + } + + const baseUrl = site.getBaseURL?.() || ''; + const topUrls = Array.from(byUrl.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, limit) + .map(([path, hits]) => { + try { + return { + url: new URL(path, baseUrl).toString(), + agenticTraffic: hits, + }; + } catch { + return { + url: path, + agenticTraffic: hits, + }; + } + }); + + log.info(`[PRERENDER] Selected ${topUrls.length} top agentic URLs via Athena.`); + return topUrls; + } catch (e) { + log?.warn?.(`[PRERENDER] Athena agentic URL fetch failed: ${e.message}`); + return []; + } +} + +/** + * Fetch agentic traffic for a specific set of URLs using Athena. + * Ensures we can populate agenticTraffic for includedURLs even if not in top N. + * @param {any} site + * @param {any} context + * @param {string[]} targetUrls - absolute URLs + * @returns {Promise>} + */ +async function getAgenticTrafficForSpecificUrls(site, context, targetUrls = []) { + const { log } = context; + /* c8 ignore start */ + if (!Array.isArray(targetUrls) || targetUrls.length === 0) { + return []; + } + /* c8 ignore stop */ + try { + const s3Config = await getS3Config(site, context); + const periods = generateReportingPeriods(); + // We attach week at opportunity level, not per-URL + const athenaClient = AWSAthenaClient.fromContext(context, s3Config.getAthenaTempLocation()); + + // Convert absolute URLs to site-relative paths, consistent with logs' url field + const baseUrl = site.getBaseURL?.() || ''; + const toPath = (fullUrl) => { + try { + return new URL(fullUrl, baseUrl).pathname || '/'; + } catch { + return '/'; + } + }; + const uniquePaths = Array.from(new Set(targetUrls.map(toPath))); + + const query = await weeklyBreakdownQueries.createAgenticHitsForUrlsQuery({ + periods, + databaseName: s3Config.databaseName, + tableName: s3Config.tableName, + site, + urlPaths: uniquePaths, + }); + + log.info('[PRERENDER] Executing Athena query for specific included URLs agentic hits...'); + const rows = await athenaClient.query( + query, + s3Config.databaseName, + '[Athena Query] Prerender - Agentic Hits For Specific URLs', + ); + + // Build a map of path -> hits + const hitsByPath = new Map(); + for (const row of rows || []) { + const path = row?.url || ''; + const hits = Number(row?.number_of_hits || 0) || 0; + if (path) { + hitsByPath.set(path, hits); + } + } + + // Return entry per original included URL, defaulting to 0 when missing + return targetUrls.map((fullUrl) => { + const path = toPath(fullUrl); + const hits = hitsByPath.has(path) ? hitsByPath.get(path) : 0; + return { + url: fullUrl, + agenticTraffic: hits, + }; + }); + } catch (e) { + log?.warn?.(`[PRERENDER] Failed to fetch agentic traffic for specific URLs: ${e.message}`); + // On failure, still return zeros for provided URLs + return targetUrls.map((u) => ({ + url: u, + agenticTraffic: 0, + })); + } +} + +/** + * Fetch top Agentic URLs from the weekly Excel sheet (fallback). + * @param {any} site + * @param {any} context + * @param {number} limit + * @returns {Promise>} + */ +async function getTopAgenticUrlsFromSheet(site, context, limit = 200) { + const { log } = context; + try { + const s3Config = await getS3Config(site, context); + const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; + const outputLocation = `${llmoFolder}/agentic-traffic`; + + // Determine current week identifier (same logic as elsewhere) + const periods = generateReportingPeriods(); + const latestWeek = periods.weeks[0]; + const weekId = `w${String(latestWeek.weekNumber).padStart(2, '0')}-${latestWeek.year}`; + + // Download sheet + const sharepointClient = await createLLMOSharepointClient(context); + const rows = await downloadExistingCdnSheet( + weekId, + outputLocation, + sharepointClient, + log, + readFromSharePoint, + ExcelJS, + ); + + if (!rows || rows.length === 0) { + log.warn(`[PRERENDER] No agentic traffic rows found in sheet for ${weekId} at ${outputLocation}`); + return []; + } + + // Aggregate by URL (sum hits) + const byUrl = new Map(); + for (const r of rows) { + const path = typeof r.url === 'string' && r.url.length > 0 ? r.url : '/'; + const hits = Number(r.number_of_hits || 0) || 0; + const prev = byUrl.get(path) || 0; + byUrl.set(path, prev + hits); + } + + const baseUrl = site.getBaseURL?.() || ''; + const top = Array.from(byUrl.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, limit) + .map(([path, hits]) => { + try { + return { + url: new URL(path, baseUrl).toString(), + agenticTraffic: hits, + }; + } catch { + return { + url: path, + agenticTraffic: hits, + }; + } + }); + + log.info(`[PRERENDER] Selected ${top.length} top agentic URLs via Sheet (${weekId}).`); + return top; + } catch (e) { + log?.warn?.(`[PRERENDER] Sheet-based agentic URL fetch failed: ${e?.message || e}`); + return []; + } +} + +/** + * Wrapper: Try Athena first, then fall back to sheet if needed. + * @param {any} site + * @param {any} context + * @param {number} limit + * @returns {Promise} + */ +async function getTopAgenticUrls(site, context, limit = 200) { + const fromAthena = await getTopAgenticUrlsFromAthena(site, context, limit); + if (Array.isArray(fromAthena) && fromAthena.length > 0) { + return fromAthena; + } + context?.log?.info?.('[PRERENDER] No agentic URLs from Athena; attempting Sheet fallback.'); + return getTopAgenticUrlsFromSheet(site, context, limit); +} /** * Sanitizes the import path by replacing special characters with hyphens @@ -157,43 +398,26 @@ async function compareHtmlContent(url, siteId, context) { } /** - * Step 1: Import top pages data - * @param {Object} context - Audit context with site and finalUrl - * @returns {Promise} - Import job configuration - */ -export async function importTopPages(context) { - const { site, finalUrl } = context; - - const s3BucketPath = `scrapes/${site.getId()}/`; - return { - type: 'top-pages', - siteId: site.getId(), - auditResult: { status: 'preparing', finalUrl }, - fullAuditRef: s3BucketPath, - }; -} - -/** - * Step 2: Submit URLs for scraping + * Step 1: Submit URLs for scraping * @param {Object} context - Audit context with site and dataAccess * @returns {Promise} - URLs to scrape and metadata */ export async function submitForScraping(context) { const { site, - dataAccess, log, + finalUrl, } = context; - const { SiteTopPage } = dataAccess; const siteId = site.getId(); - const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(siteId, 'ahrefs', 'global'); - const topPagesUrls = topPages.map((page) => page.getUrl()); - const includedURLs = await site?.getConfig?.()?.getIncludedURLs?.(AUDIT_TYPE) || []; - const finalUrls = [...new Set([...topPagesUrls, ...includedURLs])]; + // Fetch Top Agentic URLs (up to 200) + const agenticStats = await getTopAgenticUrls(site, context, TOP_AGENTIC_URLS_LIMIT); + const agenticUrls = agenticStats.map((s) => s.url); + + const finalUrls = [...new Set([...agenticUrls, ...includedURLs])]; log.info(`Prerender: Submitting ${finalUrls.length} URLs for scraping. baseUrl=${site.getBaseURL()}, siteId=${siteId}`); @@ -210,6 +434,13 @@ export async function submitForScraping(context) { type: AUDIT_TYPE, processingType: AUDIT_TYPE, allowCache: false, + // Ensure initial audit record has a valid auditResult payload + auditResult: { + status: 'preparing', + finalUrl: finalUrl || site.getBaseURL(), + }, + // Provide a stable fullAuditRef for downstream steps to reference + fullAuditRef: `scrapes/${siteId}/`, options: { pageLoadTimeout: 20000, storagePrefix: AUDIT_TYPE, @@ -281,7 +512,7 @@ export async function processOpportunityAndSuggestions(auditUrl, auditData, cont // Helper function to extract only the fields we want in suggestions const mapSuggestionData = (suggestion) => ({ url: suggestion.url, - organicTraffic: suggestion.organicTraffic, + agenticTraffic: suggestion.agenticTraffic, contentGainRatio: suggestion.contentGainRatio, wordCountBefore: suggestion.wordCountBefore, wordCountAfter: suggestion.wordCountAfter, @@ -298,7 +529,7 @@ export async function processOpportunityAndSuggestions(auditUrl, auditData, cont mapNewSuggestion: (suggestion) => ({ opportunityId: opportunity.getId(), type: Suggestion.TYPES.CONFIG_UPDATE, - rank: suggestion.organicTraffic, + rank: Number(suggestion.agenticTraffic), data: mapSuggestionData(suggestion), }), // Custom merge function: preserve existing fields, update with clean new data @@ -328,7 +559,7 @@ export async function uploadStatusSummaryToS3(auditUrl, auditData, context) { return; } - // Extract status information for all top pages + // Extract status information for all pages const statusSummary = { baseUrl: auditUrl, siteId, @@ -345,7 +576,7 @@ export async function uploadStatusSummaryToS3(auditUrl, auditData, context) { wordCountBefore: result.wordCountBefore || 0, wordCountAfter: result.wordCountAfter || 0, contentGainRatio: result.contentGainRatio || 0, - organicTraffic: result.organicTraffic || 0, + agenticTraffic: Number(result.agenticTraffic) || 0, }; // Include scrape error details if available @@ -375,13 +606,13 @@ export async function uploadStatusSummaryToS3(auditUrl, auditData, context) { } /** - * Step 3: Process scraped content and compare server-side vs client-side HTML + * Step 2: Process scraped content and compare server-side vs client-side HTML * @param {Object} context - Audit context with site, audit, and other dependencies * @returns {Promise} - Audit results with opportunities */ export async function processContentAndGenerateOpportunities(context) { const { - site, audit, log, dataAccess, scrapeResultPaths, + site, audit, log, scrapeResultPaths, } = context; const siteId = site.getId(); @@ -391,27 +622,56 @@ export async function processContentAndGenerateOpportunities(context) { try { let urlsToCheck = []; - const trafficMap = new Map(); - - const { SiteTopPage } = dataAccess; - const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(siteId, 'ahrefs', 'global'); - - topPages.forEach((page) => { - trafficMap.set(page.getUrl(), page.getTraffic()); - }); + const agenticTrafficMap = new Map(); // agentic traffic (Athena) + const { agenticDateRange } = (() => { + const periods = generateReportingPeriods(); + const latestWeek = periods.weeks[0]; + const fmt = (d) => d.toISOString().slice(0, 10); + const startDate = fmt(latestWeek.startDate); + const endDate = fmt(latestWeek.endDate); + return { + agenticDateRange: { startDate, endDate }, + }; + })(); + + // Build agentic traffic map (best-effort) + let agenticStats = []; + try { + agenticStats = await getTopAgenticUrls(site, context, TOP_AGENTIC_URLS_LIMIT); + agenticStats.forEach(({ url, agenticTraffic }) => { + agenticTrafficMap.set(url, Number(agenticTraffic || 0) || 0); + }); + // Ensure includedURLs have agentic traffic populated even if not in top set + /* c8 ignore start */ + const includedURLs = await site?.getConfig?.()?.getIncludedURLs?.(AUDIT_TYPE) || []; + const missingIncluded = includedURLs.filter((u) => !agenticTrafficMap.has(u)); + if (missingIncluded.length > 0) { + const includedStats = await getAgenticTrafficForSpecificUrls( + site, + context, + missingIncluded, + ); + includedStats.forEach(({ url, agenticTraffic }) => { + agenticTrafficMap.set(url, Number(agenticTraffic || 0) || 0); + }); + } + /* c8 ignore stop */ + } catch (e) { + log?.warn?.(`[PRERENDER] Failed to fetch agentic traffic for mapping: ${e.message}`); + } // Try to get URLs from the audit context first if (scrapeResultPaths?.size > 0) { urlsToCheck = Array.from(context.scrapeResultPaths.keys()); log.info(`Prerender - Found ${urlsToCheck.length} URLs from scrape results`); } else { - // Fallback: get top pages and included URLs - urlsToCheck = topPages.map((page) => page.getUrl()); + // Fallback: get agentic URLs and included URLs + urlsToCheck = [...new Set([...agenticStats.map((s) => s.url)])]; /* c8 ignore start */ const includedURLs = await site?.getConfig?.()?.getIncludedURLs?.(AUDIT_TYPE) || []; urlsToCheck = [...new Set([...urlsToCheck, ...includedURLs])]; /* c8 ignore stop */ - log.info(`Prerender - Fallback for baseUrl=${site.getBaseURL()}, siteId=${siteId}. Using topPages=${topPages.length}, includedURLs=${includedURLs.length}, total=${urlsToCheck.length}`); + log.info(`Prerender - Fallback for baseUrl=${site.getBaseURL()}, siteId=${siteId}. Using agenticURLs=${agenticStats.length}, includedURLs=${includedURLs.length}, total=${urlsToCheck.length}`); } if (urlsToCheck.length === 0) { @@ -423,14 +683,16 @@ export async function processContentAndGenerateOpportunities(context) { const comparisonResults = await Promise.all( urlsToCheck.map(async (url) => { const result = await compareHtmlContent(url, siteId, context); - const organicTraffic = trafficMap.get(url) || 0; + const agenticTraffic = agenticTrafficMap.has(url) ? agenticTrafficMap.get(url) : 0; return { ...result, - organicTraffic, + agenticTraffic, }; }), ); + // No server-side sorting; ranking is applied in suggestions and UI sorts client-side. + const urlsNeedingPrerender = comparisonResults.filter((result) => result.needsPrerender); const successfulComparisons = comparisonResults.filter((result) => !result.error); @@ -460,6 +722,7 @@ export async function processContentAndGenerateOpportunities(context) { siteId, auditId: audit.getId(), auditResult, + agenticDateRange, }, context); } else if (scrapeForbidden) { // Create a dummy opportunity when scraping is forbidden (403) @@ -468,6 +731,7 @@ export async function processContentAndGenerateOpportunities(context) { siteId, auditId: audit.getId(), auditResult, + agenticDateRange, }, context); } else { log.info(`Prerender - No opportunity found. baseUrl=${site.getBaseURL()}, siteId=${siteId}, scrapeForbidden=${scrapeForbidden}`); @@ -506,7 +770,6 @@ export async function processContentAndGenerateOpportunities(context) { export default new AuditBuilder() .withUrlResolver((site) => site.getBaseURL()) - .addStep('submit-for-import-top-pages', importTopPages, AUDIT_STEP_DESTINATIONS.IMPORT_WORKER) .addStep('submit-for-scraping', submitForScraping, AUDIT_STEP_DESTINATIONS.CONTENT_SCRAPER) .addStep('process-content-and-generate-opportunities', processContentAndGenerateOpportunities) .build(); diff --git a/src/prerender/opportunity-data-mapper.js b/src/prerender/opportunity-data-mapper.js index 6c12b9012..c47ae9a15 100644 --- a/src/prerender/opportunity-data-mapper.js +++ b/src/prerender/opportunity-data-mapper.js @@ -11,6 +11,7 @@ */ import { DATA_SOURCES } from '../common/constants.js'; +import { CONTENT_GAIN_THRESHOLD } from './utils/constants.js'; /** * Creates opportunity data for prerender audit results @@ -20,6 +21,7 @@ import { DATA_SOURCES } from '../common/constants.js'; export function createOpportunityData(auditData) { const { auditResult } = auditData || {}; const { scrapeForbidden } = auditResult || {}; + const trafficDuration = auditData?.agenticDateRange ?? {}; return { runbook: '', @@ -41,8 +43,9 @@ export function createOpportunityData(auditData) { data: { dataSources: [DATA_SOURCES.AHREFS, DATA_SOURCES.SITE], thresholds: { - contentGainRatio: 1.2, + contentGainRatio: CONTENT_GAIN_THRESHOLD, }, + trafficDuration, benefits: [ 'Improved LLM visibility and brand presence', 'Better LLM indexing and search results', diff --git a/src/prerender/utils/constants.js b/src/prerender/utils/constants.js new file mode 100644 index 000000000..6aae5639f --- /dev/null +++ b/src/prerender/utils/constants.js @@ -0,0 +1,13 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +export const CONTENT_GAIN_THRESHOLD = 1.1; diff --git a/src/prerender/html-comparator-utils.js b/src/prerender/utils/html-comparator.js similarity index 100% rename from src/prerender/html-comparator-utils.js rename to src/prerender/utils/html-comparator.js diff --git a/src/prerender/utils/shared.js b/src/prerender/utils/shared.js new file mode 100644 index 000000000..a05ede1dd --- /dev/null +++ b/src/prerender/utils/shared.js @@ -0,0 +1,43 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* + * Shared utilities for the Prerender audit. + * Keeps Prerender decoupled from other audit modules. + */ + +import { generateReportingPeriods as genPeriods } from '../../cdn-logs-report/utils/report-utils.js'; +import { resolveConsolidatedBucketName, extractCustomerDomain } from '../../utils/cdn-utils.js'; +// For sheet reading, reuse the existing implementation while keeping handler imports local +export { downloadExistingCdnSheet } from '../../llm-error-pages/utils.js'; +// Re-export query builders used by prerender to avoid cross-audit imports in handler +export { weeklyBreakdownQueries } from '../../cdn-logs-report/utils/query-builder.js'; + +export function generateReportingPeriods(referenceDate = new Date()) { + return genPeriods(referenceDate); +} + +export async function getS3Config(site, context) { + const customerDomain = extractCustomerDomain(site); + const domainParts = customerDomain.split(/[._]/); + const customerName = domainParts[0] === 'www' && domainParts.length > 1 ? domainParts[1] : domainParts[0]; + const bucket = resolveConsolidatedBucketName(context); + + return { + bucket, + customerName, + customerDomain, + databaseName: `cdn_logs_${customerDomain}`, + tableName: `aggregated_logs_${customerDomain}_consolidated`, + getAthenaTempLocation: () => `s3://${bucket}/temp/athena-results/`, + }; +} diff --git a/test/audits/cdn-logs-report/query-builder.test.js b/test/audits/cdn-logs-report/query-builder.test.js index b49755c91..761e2ab90 100644 --- a/test/audits/cdn-logs-report/query-builder.test.js +++ b/test/audits/cdn-logs-report/query-builder.test.js @@ -215,4 +215,24 @@ describe('CDN Logs Query Builder', () => { expect(query).to.include('(?i)ChatGPT|GPTBot|OAI-SearchBot'); expect(query).to.include('(?i)Claude(?!-web)'); }); + + it('creates agentic-hits-for-urls query with escaped url list', async () => { + const customOptions = createMockOptions({ + urlPaths: ['/a', '/b', "/a'quote"], + }); + const query = await weeklyBreakdownQueries.createAgenticHitsForUrlsQuery(customOptions); + expect(query).to.be.a('string'); + expect(query).to.include('FROM test_db.test_table'); + // Single quote in path should be escaped as two single quotes in SQL + expect(query).to.include("IN ('/a', '/b', '/a''quote')"); + }); + + it('creates agentic-hits-for-urls query with empty list producing empty IN()', async () => { + const customOptions = createMockOptions({ + urlPaths: [], + }); + const query = await weeklyBreakdownQueries.createAgenticHitsForUrlsQuery(customOptions); + expect(query).to.be.a('string'); + expect(query).to.include('IN ()'); + }); }); diff --git a/test/audits/prerender.test.js b/test/audits/prerender.test.js index 76024ef8d..fdf1704f6 100644 --- a/test/audits/prerender.test.js +++ b/test/audits/prerender.test.js @@ -18,14 +18,13 @@ import esmock from 'esmock'; use(sinonChai); import prerenderHandler, { - importTopPages, submitForScraping, processContentAndGenerateOpportunities, processOpportunityAndSuggestions, createScrapeForbiddenOpportunity, uploadStatusSummaryToS3, } from '../../src/prerender/handler.js'; -import { analyzeHtmlForPrerender } from '../../src/prerender/html-comparator-utils.js'; +import { analyzeHtmlForPrerender } from '../../src/prerender/utils/html-comparator.js'; import { createOpportunityData } from '../../src/prerender/opportunity-data-mapper.js'; describe('Prerender Audit', () => { @@ -46,7 +45,6 @@ describe('Prerender Audit', () => { }); it('should export step functions', () => { - expect(importTopPages).to.be.a('function'); expect(submitForScraping).to.be.a('function'); expect(processContentAndGenerateOpportunities).to.be.a('function'); }); @@ -215,23 +213,6 @@ describe('Prerender Audit', () => { }); describe('Step Functions', () => { - describe('importTopPages', () => { - it('should return import configuration', async () => { - const context = { - site: { getId: () => 'test-site-id' }, - finalUrl: 'https://example.com', - }; - - const result = await importTopPages(context); - - expect(result).to.deep.equal({ - type: 'top-pages', - siteId: 'test-site-id', - auditResult: { status: 'preparing', finalUrl: 'https://example.com' }, - fullAuditRef: 'scrapes/test-site-id/', - }); - }); - }); describe('submitForScraping', () => { it('should return URLs for scraping', async () => { @@ -297,19 +278,16 @@ describe('Prerender Audit', () => { const result = await submitForScraping(context); - expect(result).to.deep.equal({ - urls: [{ - url: 'https://example.com', - }], - siteId: 'test-site-id', - type: 'prerender', - processingType: 'prerender', - allowCache: false, - options: { - pageLoadTimeout: 20000, - storagePrefix: 'prerender', - }, + // Validate essential fields without requiring strict deep equality + expect(result.siteId).to.equal('test-site-id'); + expect(result.type).to.equal('prerender'); + expect(result.processingType).to.equal('prerender'); + expect(result.allowCache).to.equal(false); + expect(result.options).to.deep.equal({ + pageLoadTimeout: 20000, + storagePrefix: 'prerender', }); + expect(result.urls).to.deep.equal([{ url: 'https://example.com' }]); }); it('should include includedURLs from site config', async () => { @@ -331,10 +309,130 @@ describe('Prerender Audit', () => { const result = await submitForScraping(context); - expect(result.urls).to.have.length(2); - expect(result.urls.map(u => u.url)).to.include('https://example.com/page1'); + // With agentic as primary, we no longer include top pages here. + // Expect only includedURLs when agentic fetch yields none. + expect(result.urls).to.have.length(1); expect(result.urls.map(u => u.url)).to.include('https://example.com/special'); }); + + it('should fall back to sheet when Athena returns no data and use weekId from shared utils', async () => { + const athenaQueryStub = sinon.stub().resolves([]); + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: athenaQueryStub }) }, + }, + '../../src/utils/report-uploader.js': { + createLLMOSharepointClient: async () => ({}), + readFromSharePoint: async () => ({}), + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + periodIdentifier: 'w45-2025', + }), + getS3Config: async () => ({ + bucket: 'b', + customerName: 'adobe', + customerDomain: 'adobe_com', + databaseName: 'db', + tableName: 'tbl', + aggregatedLocation: 'agg/', + getAthenaTempLocation: () => 's3://tmp/', + }), + downloadExistingCdnSheet: sinon.stub().callsFake(async (weekId) => { + // Ensure weekId is passed from generateReportingPeriods + expect(weekId).to.equal('w45-2025'); + // Return 60 rows to verify limit trimming + const rows = []; + for (let i = 0; i < 60; i += 1) { + rows.push({ url: `/p${i}`, number_of_hits: 100 - i }); + } + return rows; + }), + }, + }); + + const context = { + site: { + getId: () => 'test-site-id', + getBaseURL: () => 'https://example.com', + getConfig: () => ({ + getIncludedURLs: () => [], + getLlmoDataFolder: () => 'adobe', + }), + }, + log: { info: sinon.stub(), debug: sinon.stub(), warn: sinon.stub() }, + env: { + S3_SCRAPER_BUCKET_NAME: 'test-bucket', + AUDIT_JOBS_QUEUE_URL: 'https://sqs.test.com/test-queue', + }, + auditContext: { + next: 'process-content-and-generate-opportunities', + auditId: 'test-audit-id', + auditType: 'prerender', + }, + }; + + const result = await mockHandler.submitForScraping(context); + // TOP_AGENTIC_URLS_LIMIT is 50 in handler; expect 50 agentic URLs used + expect(result.urls).to.have.length(50); + // Sanity: top URL comes from sheet aggregation and is normalized against base URL + expect(result.urls[0].url).to.equal('https://example.com/p0'); + }); + + it('should merge includedURLs with sheet fallback agentic URLs (unique union)', async () => { + const athenaQueryStub = sinon.stub().resolves([]); + const downloadStub = sinon.stub().resolves([ + { url: '/a', number_of_hits: 10 }, + { url: '/b', number_of_hits: 9 }, + ]); + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: athenaQueryStub }) }, + }, + '../../src/utils/report-uploader.js': { + createLLMOSharepointClient: async () => ({}), + readFromSharePoint: async () => ({}), + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + periodIdentifier: 'w45-2025', + }), + getS3Config: async () => ({ + bucket: 'b', + customerName: 'adobe', + customerDomain: 'adobe_com', + databaseName: 'db', + tableName: 'tbl', + aggregatedLocation: 'agg/', + getAthenaTempLocation: () => 's3://tmp/', + }), + downloadExistingCdnSheet: downloadStub, + }, + }); + + const context = { + site: { + getId: () => 'test-site-id', + getBaseURL: () => 'https://example.com', + getConfig: () => ({ + getIncludedURLs: () => ['https://example.com/b', 'https://example.com/c'], + getLlmoDataFolder: () => 'adobe', + }), + }, + log: { info: sinon.stub(), debug: sinon.stub(), warn: sinon.stub() }, + }; + + const result = await mockHandler.submitForScraping(context); + const urls = result.urls.map((u) => u.url); + // Sheet /a and /b plus included /c (note: /b overlaps) + expect(urls).to.include('https://example.com/a'); + expect(urls).to.include('https://example.com/b'); + expect(urls).to.include('https://example.com/c'); + // Unique union => 3 + expect(urls.length).to.equal(3); + }); }); describe('processContentAndGenerateOpportunities', () => { @@ -395,10 +493,11 @@ describe('Prerender Audit', () => { const result = await processContentAndGenerateOpportunities(context); + // With the new flow (no SiteTopPage usage), errors there won't bubble. + // Function should complete gracefully. expect(result).to.be.an('object'); - expect(result.error).to.be.a('string'); - expect(result.totalUrlsChecked).to.equal(0); - expect(result.urlsNeedingPrerender).to.equal(0); + expect(result.status).to.equal('complete'); + expect(result.auditResult).to.be.an('object'); }); it('should process URLs with scrape result paths', async () => { @@ -703,7 +802,7 @@ describe('Prerender Audit', () => { { url: 'https://example.com/page1', needsPrerender: true, - organicTraffic: 100, + agenticTraffic: 100, contentGainRatio: 1.5, }, ], @@ -737,13 +836,13 @@ describe('Prerender Audit', () => { { url: 'https://example.com/page1', needsPrerender: true, - organicTraffic: 500, + agenticTraffic: 500, contentGainRatio: 2.1, }, { url: 'https://example.com/page2', needsPrerender: true, - organicTraffic: 300, + agenticTraffic: 300, contentGainRatio: 1.8, }, ], @@ -807,7 +906,7 @@ describe('Prerender Audit', () => { { url: 'https://example.com/page1', needsPrerender: true, - organicTraffic: 500, + agenticTraffic: 500, contentGainRatio: 2.1, }, ], @@ -832,6 +931,365 @@ describe('Prerender Audit', () => { }); }); + describe('Athena and Sheet Fetch Coverage', () => { + it('should return top agentic URLs from Athena and filter "Other"; uses path fallback when baseUrl invalid', async () => { + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => ([ + { url: '/a', number_of_hits: 5 }, + { url: 'Other', number_of_hits: 100 }, + { url: '/b', number_of_hits: 3 }, + ]) }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + }, + }, + }); + + const result = await mockHandler.submitForScraping({ + site: { + getId: () => 'site', + getBaseURL: () => 'invalid', // force URL constructor to throw for fallback branch + getConfig: () => ({ getIncludedURLs: () => [] }), + }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub() }, + }); + + const urls = result.urls.map((u) => u.url); + // '/a' normalized will fail due to invalid baseUrl, mapping will keep path string + expect(urls).to.include('/a'); + expect(urls).to.include('/b'); + // Ensure "Other" excluded and limit respected + expect(urls).to.have.length(2); + }); + + it('should populate agentic traffic for included URLs via Athena (hits-for-urls)', async () => { + const html = '

x

'; + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async (q) => { + // Return hits for the specific-URLs query + if (q === 'SELECT 2') return [{ url: '/inc', number_of_hits: 7 }]; + return []; + } }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + createAgenticHitsForUrlsQuery: async () => 'SELECT 2', + }, + }, + '../../src/utils/s3-utils.js': { + getObjectFromKey: async () => html, + }, + }); + + const ctx = { + site: { + getId: () => 'site', + getBaseURL: () => 'https://example.com', + getConfig: () => ({ getIncludedURLs: () => ['https://example.com/inc'] }), + }, + audit: { getId: () => 'a' }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub(), error: sinon.stub() }, + s3Client: {}, + env: { S3_SCRAPER_BUCKETNAME: 'b' }, + }; + const res = await mockHandler.processContentAndGenerateOpportunities(ctx); + const found = res.auditResult.results.find((r) => r.url.includes('/inc')); + expect(found).to.exist; + expect(found.agenticTraffic).to.equal(7); + }); + + it('should default agentic traffic to 0 when hits-for-urls query fails', async () => { + const html = '

x

'; + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => { throw new Error('boom'); } }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + createAgenticHitsForUrlsQuery: async () => 'SELECT 2', + }, + }, + '../../src/utils/s3-utils.js': { + getObjectFromKey: async () => html, + }, + }); + + const ctx = { + site: { + getId: () => 'site', + getBaseURL: () => 'https://example.com', + getConfig: () => ({ getIncludedURLs: () => ['https://example.com/inc'] }), + }, + audit: { getId: () => 'a' }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub(), error: sinon.stub() }, + s3Client: {}, + env: { S3_SCRAPER_BUCKETNAME: 'b' }, + }; + const res = await mockHandler.processContentAndGenerateOpportunities(ctx); + const found = res.auditResult.results.find((r) => r.url.includes('/inc')); + expect(found).to.exist; + expect(found.agenticTraffic).to.equal(0); + }); + + it('should return [] when sheet fallback has no rows', async () => { + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => [] }) }, + }, + '../../src/utils/report-uploader.js': { + createLLMOSharepointClient: async () => ({}), + readFromSharePoint: async () => ({}), + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + customerName: 'adobe', + }), + downloadExistingCdnSheet: async () => [], + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + }, + }, + }); + + const res = await mockHandler.submitForScraping({ + site: { getId: () => 'id', getBaseURL: () => 'https://example.com', getConfig: () => ({ getIncludedURLs: () => [] }) }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub() }, + }); + // Falls back to baseUrl when no URLs at all + expect(res.urls.length === 0 || res.urls[0].url === 'https://example.com').to.be.true; + }); + }); + + describe('Additional branch coverage (mapping, catches)', () => { + it('should hit toPath catch for malformed included URL', async () => { + const html = '

x

'; + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => [] }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + createAgenticHitsForUrlsQuery: async () => 'SELECT 2', + }, + }, + '../../src/utils/s3-utils.js': { + getObjectFromKey: async () => html, + }, + }); + + const ctx = { + site: { + getId: () => 'site', + getBaseURL: () => 'invalid', // force new URL(fullUrl, baseUrl) to throw in toPath + getConfig: () => ({ getIncludedURLs: () => ['::'] }), + }, + audit: { getId: () => 'a' }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub(), error: sinon.stub() }, + s3Client: {}, + env: { S3_SCRAPER_BUCKETNAME: 'b' }, + }; + const res = await mockHandler.processContentAndGenerateOpportunities(ctx); + expect(res).to.be.an('object'); + // At least one result exists; exact agenticTraffic may be 0 + expect(res.auditResult).to.be.an('object'); + }); + + it('should hit sheet mapping catch when baseUrl invalid', async () => { + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => [] }) }, + }, + '../../src/utils/report-uploader.js': { + createLLMOSharepointClient: async () => ({}), + readFromSharePoint: async () => ({}), + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + customerName: 'adobe', + }), + downloadExistingCdnSheet: async () => [ + { url: '/p1', number_of_hits: 10 }, + ], + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + }, + }, + }); + + const res = await mockHandler.submitForScraping({ + site: { getId: () => 'id', getBaseURL: () => 'invalid', getConfig: () => ({ getIncludedURLs: () => [] }) }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub() }, + }); + // mapping catch keeps raw '/p1' + expect(res.urls.map((u) => u.url)).to.include('/p1'); + }); + + it('should cover agenticStats mapping and ranking loop by returning non-empty top list', async () => { + const serverHtml = 'Same'; + const clientHtml = 'Same'; + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => ([ + { url: '/x', number_of_hits: 2 }, + ]) }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + }, + }, + '../../src/utils/s3-utils.js': { + getObjectFromKey: async (_c, _b, key) => { + if (key.endsWith('server-side.html')) return serverHtml; + if (key.endsWith('client-side.html')) return clientHtml; + return null; + }, + }, + }); + const ctx = { + site: { getId: () => 'site', getBaseURL: () => 'https://example.com', getConfig: () => ({ getIncludedURLs: () => [] }) }, + audit: { getId: () => 'a' }, + log: { info: sinon.stub(), warn: sinon.stub(), debug: sinon.stub(), error: sinon.stub() }, + s3Client: { send: sinon.stub().resolves({}) }, + env: { S3_SCRAPER_BUCKET_NAME: 'test-bucket' }, + // Provide a scrape result to bypass later includedURLs call path + scrapeResultPaths: new Map([['https://example.com/x', '/tmp/x']]), + }; + const res = await mockHandler.processContentAndGenerateOpportunities(ctx); + // Should have processed at least the '/x' entry + expect(res.status).to.equal('complete'); + expect(res.auditResult.totalUrlsChecked).to.be.greaterThan(0); + }); + + it('should log warn in mapping block catch when getIncludedURLs throws', async () => { + const html = '

x

'; + const mockHandler = await esmock('../../src/prerender/handler.js', { + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: () => ({ query: async () => ([ + { url: '/x', number_of_hits: 1 }, + ]) }) }, + }, + '../../src/prerender/utils/shared.js': { + generateReportingPeriods: () => ({ + weeks: [{ weekNumber: 45, year: 2025, startDate: new Date(), endDate: new Date() }], + }), + getS3Config: async () => ({ + databaseName: 'db', + tableName: 'tbl', + getAthenaTempLocation: () => 's3://tmp/', + }), + weeklyBreakdownQueries: { + createAgenticReportQuery: async () => 'SELECT 1', + }, + }, + '../../src/utils/s3-utils.js': { + getObjectFromKey: async () => html, + }, + }); + const warn = sinon.stub(); + const ctx = { + site: { + getId: () => 'site', + getBaseURL: () => 'https://example.com', + getConfig: () => { throw new Error('config failed'); }, + }, + audit: { getId: () => 'a' }, + log: { info: sinon.stub(), warn, debug: sinon.stub(), error: sinon.stub() }, + s3Client: { send: sinon.stub().resolves({}) }, + env: { S3_SCRAPER_BUCKET_NAME: 'test-bucket' }, + // Ensure we do not hit the later includedURLs call by providing scrape results + scrapeResultPaths: new Map([['https://example.com/y', '/tmp/y']]), + }; + const res = await mockHandler.processContentAndGenerateOpportunities(ctx); + expect(res.status).to.equal('complete'); + expect(warn.called).to.be.true; + expect(warn.args.some(a => String(a[0]).includes('Failed to fetch agentic traffic for mapping'))).to.be.true; + }); + }); + describe('Shared utils coverage', () => { + it('should return S3 config shape and temp location function', async () => { + const shared = await esmock('../../src/prerender/utils/shared.js', { + '../../src/utils/cdn-utils.js': { + // Avoid depending on env; just return a deterministic bucket + resolveConsolidatedBucketName: () => 'bucket', + extractCustomerDomain: () => 'adobe_com', + }, + }); + const cfg = await shared.getS3Config({ getBaseURL: () => 'https://www.adobe.com' }, { }); + expect(cfg).to.be.an('object'); + expect(cfg).to.have.property('databaseName'); + expect(cfg).to.have.property('tableName'); + expect(cfg).to.have.property('getAthenaTempLocation'); + expect(cfg.getAthenaTempLocation()).to.be.a('string'); + expect(cfg.getAthenaTempLocation()).to.include('/temp/athena-results/'); + }); + + it('should compute customerName correctly when domain starts with www', async () => { + const shared = await esmock('../../src/prerender/utils/shared.js', { + '../../src/utils/cdn-utils.js': { + resolveConsolidatedBucketName: () => 'bucket', + extractCustomerDomain: () => 'www.adobe_com', + }, + }); + const cfg = await shared.getS3Config({ }, { }); + expect(cfg.customerName).to.equal('adobe'); + expect(cfg.databaseName).to.equal('cdn_logs_www.adobe_com'); + expect(cfg.tableName).to.equal('aggregated_logs_www.adobe_com_consolidated'); + }); + }); describe('Edge Cases and Error Handling', () => { describe('HTML Content Processing', () => { it('should throw error for missing server-side HTML', async () => { @@ -1547,7 +2005,7 @@ describe('Prerender Audit', () => { .onFirstCall().resolves('Valid content') .onSecondCall().resolves('Valid content too'), }, - '../../src/prerender/html-comparator-utils.js': { + '../../src/prerender/utils/html-comparator.js': { analyzeHtmlForPrerender: sinon.stub().throws(new Error('Mocked analysis error')), }, }); @@ -1605,7 +2063,7 @@ describe('Prerender Audit', () => { { url: 'https://example.com/page1', needsPrerender: true, - organicTraffic: 500, + agenticTraffic: 500, contentGainRatio: 2.1, wordCountBefore: 100, wordCountAfter: 210, @@ -1646,7 +2104,7 @@ describe('Prerender Audit', () => { const existingData = { url: 'https://example.com/page1', customField: 'preserved' }; const newDataItem = { url: 'https://example.com/page1', - organicTraffic: 200, + agenticTraffic: 200, contentGainRatio: 2.5, wordCountBefore: 100, wordCountAfter: 250, @@ -1655,7 +2113,7 @@ describe('Prerender Audit', () => { const mergedData = mergeDataFn(existingData, newDataItem); expect(mergedData).to.have.property('customField', 'preserved'); // Existing field preserved expect(mergedData).to.have.property('url', 'https://example.com/page1'); - expect(mergedData).to.have.property('organicTraffic', 200); + expect(mergedData).to.have.property('agenticTraffic', 200); expect(mergedData).to.not.have.property('needsPrerender'); // Filtered out by mapSuggestionData }); @@ -1745,7 +2203,7 @@ describe('Prerender Audit', () => { it('should throw when calculateStats fails', async () => { // Mock the HTML analysis to throw an error during processing - const mockAnalyze = await esmock('../../src/prerender/html-comparator-utils.js', { + const mockAnalyze = await esmock('../../src/prerender/utils/html-comparator.js', { '@adobe/spacecat-shared-html-analyzer': { calculateStats: sinon.stub().throws(new Error('Stats calculation failed')), }, @@ -2060,7 +2518,7 @@ describe('Prerender Audit', () => { // Mock analyzeHtmlForPrerender to throw an error const mockHandler = await esmock('../../src/prerender/handler.js', { '../../src/utils/s3-utils.js': { getObjectFromKey: getObjectFromKeyStub }, - '../../src/prerender/html-comparator-utils.js': { + '../../src/prerender/utils/html-comparator.js': { analyzeHtmlForPrerender: sandbox.stub().throws(new Error('Analysis failed')), }, }); @@ -2122,7 +2580,7 @@ describe('Prerender Audit', () => { wordCountBefore: 100, wordCountAfter: 250, contentGainRatio: 2.5, - organicTraffic: 1000, + agenticTraffic: 1000, }, { url: 'https://example.com/page2', @@ -2131,7 +2589,7 @@ describe('Prerender Audit', () => { wordCountBefore: 200, wordCountAfter: 220, contentGainRatio: 1.1, - organicTraffic: 500, + agenticTraffic: 500, }, ], }, @@ -2164,7 +2622,7 @@ describe('Prerender Audit', () => { wordCountBefore: 100, wordCountAfter: 250, contentGainRatio: 2.5, - organicTraffic: 1000, + agenticTraffic: 1000, }); expect(context.log.info).to.have.been.calledWith( @@ -2227,7 +2685,7 @@ describe('Prerender Audit', () => { wordCountBefore: 100, wordCountAfter: 110, contentGainRatio: 1.1, - organicTraffic: 500, + agenticTraffic: 500, scrapeError: null, }, ], @@ -2408,7 +2866,7 @@ describe('Prerender Audit', () => { wordCountBefore: 0, wordCountAfter: 0, contentGainRatio: 0, - organicTraffic: 0, + agenticTraffic: 0, }); }); });