diff --git a/src/redirect-chains/handler.js b/src/redirect-chains/handler.js index 1d0bef877..424f8bf54 100644 --- a/src/redirect-chains/handler.js +++ b/src/redirect-chains/handler.js @@ -15,11 +15,9 @@ import { prependSchema, tracingFetch as fetch, } from '@adobe/spacecat-shared-utils'; -import { - extractDomainAndProtocol, - getUrlWithoutPath, -} from '../support/utils.js'; +import { getUrlWithoutPath } from '../support/utils.js'; import { AuditBuilder } from '../common/audit-builder.js'; +import { noopUrlResolver } from '../common/base-audit.js'; import { syncSuggestions } from '../utils/data-access.js'; import { convertToOpportunity } from '../common/opportunity.js'; import { createOpportunityData } from './opportunity-data-mapper.js'; @@ -85,7 +83,7 @@ function buildUniqueKey(result) { * } */ async function countRedirects(url, maxRedirects = STOP_AFTER_N_REDIRECTS) { - const domain = new URL(url).hostname; + const domain = new URL(url).hostname; // Given: https://www.example.com/subpath --> www.example.com let redirectUrl = url; let redirectCount = 0; @@ -153,7 +151,7 @@ async function countRedirects(url, maxRedirects = STOP_AFTER_N_REDIRECTS) { * tooQualified: boolean, // if true, this entry unnecessarily uses fully qualified URLs * hasSameSrcDest: boolean, // if true, the source and destination URLs are the same * } - * @param domain - Optional. The domain to prepend if missing from the URL + * @param fullBaseUrl - Optional. The base URL to use for relative URLs. * * @returns {Promise<{}>} An object with the following properties: * { @@ -176,12 +174,12 @@ async function countRedirects(url, maxRedirects = STOP_AFTER_N_REDIRECTS) { * error: string, // can be an empty string * } */ -async function followAnyRedirectForUrl(urlStruct, domain = '') { +async function followAnyRedirectForUrl(urlStruct, fullBaseUrl = '') { // ensure full URLs - const srcUrl = ensureFullUrl(urlStruct.origSrc, domain); + const srcUrl = ensureFullUrl(urlStruct.origSrc, fullBaseUrl); let fullDest = urlStruct.origDest; if (fullDest) { - fullDest = ensureFullUrl(fullDest, domain); + fullDest = ensureFullUrl(fullDest, fullBaseUrl); } else { fullDest = srcUrl; } @@ -359,24 +357,44 @@ export async function getJsonData(url, log) { /** * Processes the /redirects.json file and returns a sorted array of all the page URLs referenced. + * * Filters out entries to only include those source URLs that start with the audit scope. * * Duplicates are marked. The last occurrence of a duplicate is kept as the original. * * Marks any entries that are fully qualified (vs relative) as 'tooQualified'. * * Marks any entries that have the same source and destination URLs as 'hasSameSrcDest'. + * * If there is no /redirects.json file, then an empty array is returned. * - * @param {string} baseUrl - The site's base URL for the /redirects.json file. - * Ex: https://www.example.com + * The function tries to find /redirects.json using the full auditScopeUrl (including subpaths). + * If not found, it falls back to trying the base URL without any subpaths. + * + * @param {string} auditScopeUrl - The audit scope URL defining which Source URLs to check. + * Ex: https://www.example.com or https://www.example.com/fr * @param {Object} log - The logger object to use for logging. * @returns {Promise} An array of page URLs. Might be empty. */ -export async function processRedirectsFile(baseUrl, log) { - // create the URL for the /redirects.json file - const redirectsUrl = `${baseUrl}/redirects.json`; - // retrieve the entire /redirects.json file +export async function processRedirectsFile(auditScopeUrl, log) { + // Try to find /redirects.json at the full audit scope URL first (with subpaths if present) + // Ensure that the very end of our URL does not have a trailing slash. Defensive coding. + const cleanedAuditScopeUrl = auditScopeUrl.endsWith('/') ? auditScopeUrl.slice(0, -1) : auditScopeUrl; + let redirectsUrl = `${cleanedAuditScopeUrl}/redirects.json`; + log.info(`${AUDIT_LOGGING_NAME} - Looking for redirects file at: ${redirectsUrl}`); let redirectsJson = await getJsonData(redirectsUrl, log); + // If not found and auditScopeUrl has subpaths, try without subpaths as fallback + if ((!redirectsJson || !redirectsJson.data || !redirectsJson.data.length)) { + const urlWithoutPath = getUrlWithoutPath(cleanedAuditScopeUrl); + // Only try this fallback URL is different from the cleanedAuditScopeUrl + if (urlWithoutPath !== cleanedAuditScopeUrl) { + redirectsUrl = `${urlWithoutPath}/redirects.json`; + log.info(`${AUDIT_LOGGING_NAME} - Redirects file not found with subpaths, trying fallback at: ${redirectsUrl}`); + redirectsJson = await getJsonData(redirectsUrl, log); + } + } if (!redirectsJson || !redirectsJson.data || !redirectsJson.data.length) { - return []; // no /redirects.json file found, or there are no entries in the file + log.info(`${AUDIT_LOGGING_NAME} - No redirects file found or file is empty`); + return []; // no /redirects.json file found anywhere, or there are no entries in the file } + + log.info(`${AUDIT_LOGGING_NAME} - Successfully loaded redirects file from: ${redirectsUrl}`); // if we only received part of the entries that are available, then ask for the entire file const totalEntries = redirectsJson.total; if (redirectsJson.data.length < totalEntries) { @@ -417,7 +435,8 @@ export async function processRedirectsFile(baseUrl, log) { ordinalDuplicate = 0; // reset for some future set of duplicate source URLs } // is this entry needlessly fully qualified? - if (pageUrls[i].origSrc.startsWith(baseUrl) || pageUrls[i].origDest.startsWith(baseUrl)) { + if (pageUrls[i].origSrc.startsWith(cleanedAuditScopeUrl) + || pageUrls[i].origDest.startsWith(cleanedAuditScopeUrl)) { pageUrls[i].tooQualified = true; } // does this entry have the same source and destination URLs? @@ -428,14 +447,49 @@ export async function processRedirectsFile(baseUrl, log) { // special case: inspect the ~last~ entry for any problems // ^^^ This is a needed due to the way we iterated over the list looking for duplicates: // we did ~not~ inspect the last entry for ~additional~ problems. - if (pageUrls[pageUrls.length - 1].origSrc.startsWith(baseUrl) - || pageUrls[pageUrls.length - 1].origDest.startsWith(baseUrl)) { + if (pageUrls[pageUrls.length - 1].origSrc.startsWith(cleanedAuditScopeUrl) + || pageUrls[pageUrls.length - 1].origDest.startsWith(cleanedAuditScopeUrl)) { pageUrls[pageUrls.length - 1].tooQualified = true; // "too qualified" } if (pageUrls[pageUrls.length - 1].origSrc === pageUrls[pageUrls.length - 1].origDest) { pageUrls[pageUrls.length - 1].hasSameSrcDest = true; // "has same source and destination URLs" } - return pageUrls; + + // Filter pageUrls to only include entries that match the audit scope + const parsedAuditScopeUrl = new URL(cleanedAuditScopeUrl); + const scopePath = parsedAuditScopeUrl.pathname; + const hasScopePath = scopePath && scopePath !== '/'; + + // If auditScopeUrl has no subpath, return all pageUrls + if (!hasScopePath) { + log.info(`${AUDIT_LOGGING_NAME} - No subpath in audit scope URL, returning all ${pageUrls.length} entries`); + return pageUrls; + } + + // Filter to only include entries whose origSrc starts with the scope path + // Ensure we match with a trailing slash to avoid false positives (e.g., /fr/ not /french) + const scopePathWithSlash = `${scopePath}/`; + const auditScopeUrlWithSlash = `${cleanedAuditScopeUrl}/`; + + const filteredPageUrls = pageUrls.filter((entry) => { + const { origSrc } = entry; + + // Check if origSrc is a relative path starting with the scope path + if (origSrc.startsWith(scopePathWithSlash)) { + return true; + } + + // Check if origSrc is a fully qualified URL containing the audit scope URL + if (origSrc.startsWith('http://') || origSrc.startsWith('https://')) { + // For fully qualified URLs, check if they start with the full auditScopeUrl + return origSrc.startsWith(auditScopeUrlWithSlash); + } + + return false; + }); + + log.info(`${AUDIT_LOGGING_NAME} - Filtered entries from ${pageUrls.length} to ${filteredPageUrls.length} based on audit scope: ${scopePathWithSlash}`); + return filteredPageUrls; } /** @@ -448,14 +502,19 @@ export async function processRedirectsFile(baseUrl, log) { * * @param {Object[]} pageUrls - An array of page URL objects to process. * See the structure returned by `processRedirectsFile`. - * @param {string} baseUrl - The base URL to use for relative URLs. + * @param {string} fullBaseUrl - The base URL to use for relative URLs. * @param {Object} log - The logger object to use for logging. * @param {number} maxConcurrency - The maximum number of concurrent requests to process. * * @returns {Promise} An array of results for each processed page URL. * Each result object has the structure as returned by `followAnyRedirectForUrl` */ -export async function processEntriesInParallel(pageUrls, baseUrl, log, maxConcurrency = 1000) { +export async function processEntriesInParallel( + pageUrls, + fullBaseUrl, + log, + maxConcurrency = 1000, +) { const BATCH_SIZE = maxConcurrency; // processing takes about 0.015 seconds per entry const allResults = []; @@ -464,7 +523,7 @@ export async function processEntriesInParallel(pageUrls, baseUrl, log, maxConcur const batch = pageUrls.slice(i, i + BATCH_SIZE); // Process current batch in parallel - const batchPromises = batch.map(async (row) => followAnyRedirectForUrl(row, baseUrl)); + const batchPromises = batch.map(async (row) => followAnyRedirectForUrl(row, fullBaseUrl)); // eslint-disable-next-line no-await-in-loop const batchResults = await Promise.all(batchPromises); @@ -610,7 +669,7 @@ export function filterIssuesToFitIntoSpace(issues, log) { log.info(`${AUDIT_LOGGING_NAME} - Total size of all the issues exceed space limit (${Math.round(totalSize / 1024)} KB > ${MAX_DB_OBJ_SIZE_BYTES / 1024} KB) ... filtering issues to fit within limit`); - // Categorize issues. Later we will re-create them into specific suggestions. + // Categorize issues. Later we will re-create these issues into specific suggestions. const categorizedIssues = { 'duplicate-src': [], // 'duplicate-src' 'too-qualified': [], // 'too-qualified' @@ -619,7 +678,6 @@ export function filterIssuesToFitIntoSpace(issues, log) { 'final-mismatch': [], // '404-page', 'src-is-final', 'final-mismatch' 'too-many-redirects': [], // 'msx-redirects-exceeded', 'high-redirect-count' }; - issues.forEach((issue) => { const category = categorizeIssue(issue); if (categorizedIssues[category]) { @@ -627,7 +685,7 @@ export function filterIssuesToFitIntoSpace(issues, log) { } }); - // Calculate size statistics for conservative estimation + // Based on all the issues, calculate a conservative size to represent a randomly large issue const issueSizes = issues.map((issue) => { const issueJson = JSON.stringify(issue); return getStringByteLength(issueJson); @@ -636,11 +694,12 @@ export function filterIssuesToFitIntoSpace(issues, log) { const largestSize = Math.max(...issueSizes); const conservativeSizeEstimate = Math.max(averageSize, largestSize * 0.8); // 80% of max size - // Calculate how many issues we can fit + // Calculate how many issues can fit into the database space const maxIssues = Math.floor(MAX_DB_OBJ_SIZE_BYTES / conservativeSizeEstimate); + + // Determine the set of categories that have at least one issue in them // eslint-disable-next-line max-len,no-shadow const nonEmptyCategories = Object.entries(categorizedIssues).filter(([_, issues]) => issues.length > 0); - if (nonEmptyCategories.length === 0) { return { filteredIssues: [], wasReduced: true }; // "should never happen" } @@ -680,15 +739,15 @@ export function filterIssuesToFitIntoSpace(issues, log) { * * @param {Object[]} pageUrls - An array of page URL objects to process. * See the structure returned by `processRedirectsFile`. - * @param {string} baseUrl - The base URL to use for relative URLs. + * @param {string} fullBaseUrl - The base URL to use for relative URLs. * @param {Object} log - The logger object to use for logging. * * @returns {Promise<{counts: {}, entriesWithProblems: *[]}>} * See the structures returned by `analyzeResults` for details. */ -async function processEntries(pageUrls, baseUrl, log) { +async function processEntries(pageUrls, fullBaseUrl, log) { // Step 1: Process all HTTP requests in parallel (the slow part) - const results = await processEntriesInParallel(pageUrls, baseUrl, log); + const results = await processEntriesInParallel(pageUrls, fullBaseUrl, log); // Step 2: Analyze results synchronously (the fast part) const { counts, entriesWithProblems } = analyzeResults(results); @@ -721,19 +780,85 @@ function calculateProjectedMetrics(totalIssues) { }; } +// ----- URL resolution --------------------------------------------------------------------------- + +/** + * Determines the audit scope URL by following redirects and finding the longest common path prefix. + * This URL defines which Source URLs in /redirects.json should be checked during the audit. + * The function intelligently handles URL paths by finding the longest common prefix path: + * - If the original baseUrl has NO subpath, strips all paths from the resolved URL + * - If the original baseUrl HAS a subpath, keeps as many matching path segments as possible + * + * Examples: + * - https://bulk.com → www.bulk.com/uk → returns https://www.bulk.com (strips all paths) + * - https://bulk.com/fr → www.bulk.com/fr → returns https://www.bulk.com/fr (all segments match) + * - https://kpmg.com → kpmg.com/xx/en.html → returns https://kpmg.com (strips all paths) + * - https://realmadrid.com/area-vip → www.realmadrid.com/sites/area-vip → returns https://www.realmadrid.com (no matching segments) + * - https://westjet.com/en-ca/book-trip/flights → www.westjet.com/en-ca/best-of-travel → returns https://www.westjet.com/en-ca (first segment matches) + * + * @param {string} baseUrl - The site's original base URL from site.getBaseURL() + * @returns {Promise} The audit scope URL that defines which Source URLs to check + */ +export async function determineAuditScope(baseUrl) { + // Parse the original base URL to extract its path + const originalUrl = new URL(prependSchema(baseUrl)); + const originalPath = originalUrl.pathname; + const hasOriginalPath = originalPath && originalPath !== '/'; + + // Get the audit scope URL by following redirects + const auditScopeUrl = await composeAuditURL(baseUrl); // ex: www.example.com/fr + const auditScopeUrlWithSchema = prependSchema(auditScopeUrl); // ex: https://www.example.com/fr + const parsedAuditScopeUrl = new URL(auditScopeUrlWithSchema); + + // If the original URL has no subpath, strip all paths from the audit scope URL + if (!hasOriginalPath) { + return getUrlWithoutPath(auditScopeUrlWithSchema); // ex: https://www.example.com:4321 + } + + // Since the original URL has a subpath, find the longest common prefix path + const auditScopePath = parsedAuditScopeUrl.pathname; + + // Split paths into segments (filter out empty strings from leading/trailing slashes) + const originalSegments = originalPath.split('/').filter((seg) => seg.length > 0); + const scopedSegments = auditScopePath.split('/').filter((seg) => seg.length > 0); + + // Find the longest common prefix by comparing segments from the beginning + const commonSegments = []; + const maxDepthAvailable = Math.min(originalSegments.length, scopedSegments.length); + + // eslint-disable-next-line no-plusplus + for (let i = 0; i < maxDepthAvailable; i++) { + if (originalSegments[i] === scopedSegments[i]) { + commonSegments.push(originalSegments[i]); + } else { + // Stop at the first non-matching segment + break; + } + } + + // If we found matching segments, construct the path with them + if (commonSegments.length > 0) { + const commonPath = `/${commonSegments.join('/')}`; + return `${parsedAuditScopeUrl.protocol}//${parsedAuditScopeUrl.host}${commonPath}`; + } + + // No matching segments, strip all paths + return getUrlWithoutPath(auditScopeUrlWithSchema); +} + // ----- audit runner ----------------------------------------------------------------------------- /** * Runs the audit for the /redirects.json file. - * @param baseUrl - * @param context + * @param baseUrl - The base URL from site.getBaseURL() + * @param context - The audit context containing logger, data access, etc. * @returns {Promise<{ * fullAuditRef, - * url, * auditResult: { * success: boolean, * reasons: [{value: string}], - * details: {issues: *[]} // see `followAnyRedirectForUrl` for structure + * details: {issues: *[]}, // see `followAnyRedirectForUrl` for structure + * auditScopeUrl: string, * } * }>} */ @@ -745,29 +870,39 @@ export async function redirectsAuditRunner(baseUrl, context) { success: true, // default reasons: [{ value: 'File /redirects.json checked.' }], details: { issues: [] }, + auditScopeUrl: baseUrl, // use the baseUrl as the fallback URL for the audit's scope }; + log.info(`${AUDIT_LOGGING_NAME} - Original base URL: ${baseUrl}`); - // run the audit - log.info(`${AUDIT_LOGGING_NAME} - STARTED running audit worker for /redirects.json for ${baseUrl}`); - if (!extractDomainAndProtocol(baseUrl)) { + // Determine the audit scope URL to establish which Source URLs to check + let auditScopeUrl; + try { + auditScopeUrl = await determineAuditScope(baseUrl); + auditResult.auditScopeUrl = auditScopeUrl; // update with actual audit scope URL + log.info(`${AUDIT_LOGGING_NAME} - Audit's scope URL determined: ${auditScopeUrl}`); + } catch (error) { + log.error(`${AUDIT_LOGGING_NAME} - Failed to determine the audit scope URL: ${error.message}`); auditResult.success = false; auditResult.reasons = [{ value: baseUrl, error: 'INVALID URL' }]; + return { + fullAuditRef: baseUrl, + auditResult, + }; } // get a pre-processed array of page URLs from the /redirects.json file - let pageUrls = []; - if (auditResult.success) { - log.info(`${AUDIT_LOGGING_NAME} - PROCESSING /redirects.json for ${baseUrl}`); - pageUrls = await processRedirectsFile(baseUrl, log); - } + log.info(`${AUDIT_LOGGING_NAME} - STARTED running audit worker for /redirects.json for ${auditScopeUrl}`); + const pageUrls = await processRedirectsFile(auditScopeUrl, log); // process the entries & remember the results - const { counts, entriesWithProblems } = await processEntries(pageUrls, baseUrl, log); + const fullBaseUrl = getUrlWithoutPath(auditScopeUrl); + log.info(`${AUDIT_LOGGING_NAME} - Using the fullBaseUrl := ${fullBaseUrl}`); // TODO: !!REMOVE!! + const { counts, entriesWithProblems } = await processEntries(pageUrls, fullBaseUrl, log); if (counts.countTotalEntriesWithProblems > 0) { // Filter issues to fit within size limit const { filteredIssues } = filterIssuesToFitIntoSpace(entriesWithProblems, log); auditResult.details.issues = filteredIssues; - log.warn(`${AUDIT_LOGGING_NAME} - Issues might be reduced from ${entriesWithProblems.length} to ${filteredIssues.length} to fit within space limit`); + log.warn(`${AUDIT_LOGGING_NAME} - Issues could be reduced from ${entriesWithProblems.length} to ${filteredIssues.length} to fit within space limit`); } // end timer @@ -779,7 +914,7 @@ export async function redirectsAuditRunner(baseUrl, context) { log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json has total number of entries checked: ${pageUrls.length}`); log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json has total number of entries with problems: ${counts.countTotalEntriesWithProblems}`); if (counts.countTotalEntriesWithProblems > 0) { - log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. duplicate Source URLs: ${counts.countDuplicateSourceUrls}`); + log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. duplicated Source URLs: ${counts.countDuplicateSourceUrls}`); log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. too qualified URLs: ${counts.countTooQualifiedUrls}`); log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. same Source and Dest URLs: ${counts.countHasSameSrcDest}`); log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. resulted in HTTP error: ${counts.countHttpErrors}`); @@ -787,10 +922,9 @@ export async function redirectsAuditRunner(baseUrl, context) { log.info(`${AUDIT_LOGGING_NAME} - STATS: /redirects.json .. with too many redirects: ${counts.countTooManyRedirects}`); } - log.info(`${AUDIT_LOGGING_NAME} - DONE with audit worker for processing /redirects.json for ${baseUrl}. Completed in ${formattedElapsed} seconds.`); + log.info(`${AUDIT_LOGGING_NAME} - DONE with audit worker for processing /redirects.json for ${auditScopeUrl}. Completed in ${formattedElapsed} seconds.`); return { - fullAuditRef: baseUrl, - url: baseUrl, + fullAuditRef: auditScopeUrl, auditResult, }; } @@ -892,19 +1026,23 @@ export function getSuggestedFix(result) { * For each entry in the audit data, generates a suggested fix based on the issues found for * that entry. Note that some entries may be skipped based on certain conditions. * - * @param auditUrl - The URL of the audit + * @param auditUrl - The URL of the audit (original base URL from site.getBaseURL()) * @param auditData - The audit data containing the audit result and additional details * @param context - The context object containing the logger - * @returns {Array} The new "auditData" object containing an array of suggestions. + * @returns {Object} The new "auditData" object containing an array of suggestions. */ export function generateSuggestedFixes(auditUrl, auditData, context) { const { log } = context; + // Use the audit scope URL from the audit result + const auditScopeUrl = auditData?.auditResult?.auditScopeUrl || auditUrl; + log.info(`${AUDIT_LOGGING_NAME} - Using audit scope URL: ${auditScopeUrl}`); + const suggestedFixes = []; // {key: '...', fix: '...'} const entriesWithIssues = auditData?.auditResult?.details?.issues ?? []; let skippedEntriesCount = 0; // Counter for skipped entries - log.info(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has ${entriesWithIssues.length} affected entries.`); + log.info(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditScopeUrl} which has ${entriesWithIssues.length} affected entries.`); log.debug(`${AUDIT_LOGGING_NAME} - Audit data: ${JSON.stringify(auditData)}`); for (const row of entriesWithIssues) { @@ -1002,14 +1140,18 @@ export function generateSuggestedFixes(auditUrl, auditData, context) { * Synchronizes existing suggestions with new data by removing outdated suggestions and adding * new ones. * - * @param auditUrl - The URL of the audit + * @param auditUrl - The URL of the audit (original base URL from site.getBaseURL()) * @param auditData - The audit data containing the audit result and additional details * @param context - The context object containing the logger - * @returns {Promise<*>} + * @returns {Promise} The updated auditData object */ export async function generateOpportunities(auditUrl, auditData, context) { const { log } = context; + // Use the audit scope URL from the audit result + const auditScopeUrl = auditData?.auditResult?.auditScopeUrl || auditUrl; + log.info(`${AUDIT_LOGGING_NAME} - Using audit scope URL for opportunity generation: ${auditScopeUrl}`); + // check if audit itself ran successfully if (auditData.auditResult.success === false) { log.info(`${AUDIT_LOGGING_NAME} - Audit itself failed, skipping opportunity creation`); @@ -1037,6 +1179,7 @@ export async function generateOpportunities(auditUrl, auditData, context) { { projectedTrafficLost, projectedTrafficValue, + auditScopeUrl, }, ); @@ -1060,8 +1203,7 @@ export async function generateOpportunities(auditUrl, auditData, context) { } export default new AuditBuilder() + .withUrlResolver(noopUrlResolver) // == site.getBaseURL() .withRunner(redirectsAuditRunner) - .withUrlResolver((site) => composeAuditURL(site.getBaseURL()) - .then((url) => getUrlWithoutPath(prependSchema(url)))) .withPostProcessors([generateSuggestedFixes, generateOpportunities]) .build(); diff --git a/src/redirect-chains/opportunity-data-mapper.js b/src/redirect-chains/opportunity-data-mapper.js index c9fbed008..2bee6453a 100644 --- a/src/redirect-chains/opportunity-data-mapper.js +++ b/src/redirect-chains/opportunity-data-mapper.js @@ -12,8 +12,8 @@ import { DATA_SOURCES } from '../common/constants.js'; -export function createOpportunityData(projectedTrafficMetrics = {}) { - const { projectedTrafficLost, projectedTrafficValue } = projectedTrafficMetrics; +export function createOpportunityData(params = {}) { + const { projectedTrafficLost, projectedTrafficValue, auditScopeUrl } = params; return { runbook: 'https://adobe.sharepoint.com/:w:/r/sites/aemsites-engineering/Shared%20Documents/3%20-%20Experience%20Success/SpaceCat/Runbooks/Acquisition%20-%20SEO/Experience_Success_Studio_Redirect_Chains_Runbook.docx?d=w15b25d46a5124cf29543ed08acf6caae&csf=1&web=1&e=Kiosk9', @@ -30,6 +30,7 @@ export function createOpportunityData(projectedTrafficMetrics = {}) { dataSources: [DATA_SOURCES.SITE], projectedTrafficLost: projectedTrafficLost || 0, projectedTrafficValue: projectedTrafficValue || 0, + auditScopeUrl: auditScopeUrl || '', }, }; } diff --git a/src/redirect-chains/opportunity-utils.js b/src/redirect-chains/opportunity-utils.js index df9350422..6fe1e69cc 100644 --- a/src/redirect-chains/opportunity-utils.js +++ b/src/redirect-chains/opportunity-utils.js @@ -134,7 +134,7 @@ export function ensureFullUrl(url, domain = '') { // Neither has a slash, so add one reasonableUrl = `${domain}/${reasonableUrl}`; } else { - // One has a slash, so concatenate directly + // Only one has a slash, so concatenate directly reasonableUrl = domain + reasonableUrl; } addWwwSubdomain = true; // add 'www.' if needed diff --git a/test/audits/redirect-chains.test.js b/test/audits/redirect-chains.test.js index 00a46b2b6..bcf427b38 100644 --- a/test/audits/redirect-chains.test.js +++ b/test/audits/redirect-chains.test.js @@ -19,6 +19,7 @@ import chaiAsPromised from 'chai-as-promised'; import esmock from 'esmock'; import { redirectsAuditRunner, + determineAuditScope, getJsonData, processRedirectsFile, processEntriesInParallel, @@ -96,6 +97,180 @@ describe('Redirect Chains Audit', () => { }); describe('URL Utilities', () => { + describe('determineAuditScope', () => { + it('should strip all paths when baseUrl has no subpath - bulk.com example', async () => { + // baseUrl: https://bulk.com (no subpath) + // composedAuditURL resolves to: www.bulk.com/uk + // expected result: www.bulk.com (strip all paths) + nock('https://bulk.com') + .get('/') + .reply(301, '', { Location: 'https://www.bulk.com/uk' }); + + nock('https://www.bulk.com') + .get('/uk') + .reply(200); + + const result = await determineAuditScope('https://bulk.com'); + expect(result).to.equal('https://www.bulk.com'); + }); + + it('should keep matching subpath when baseUrl has subpath - bulk.com/fr example', async () => { + // baseUrl: https://bulk.com/fr (has subpath /fr) + // composedAuditURL resolves to: www.bulk.com/fr + // expected result: www.bulk.com/fr (keep matching path) + nock('https://bulk.com') + .get('/fr') + .reply(301, '', { Location: 'https://www.bulk.com/fr' }); + + nock('https://www.bulk.com') + .get('/fr') + .reply(200); + + const result = await determineAuditScope('https://bulk.com/fr'); + expect(result).to.equal('https://www.bulk.com/fr'); + }); + + it('should strip all paths when baseUrl has no subpath - kpmg.com example', async () => { + // baseUrl: https://kpmg.com (no subpath) + // composedAuditURL resolves to: kpmg.com/xx/en.html + // expected result: kpmg.com (strip all paths) + nock('https://kpmg.com') + .get('/') + .reply(301, '', { Location: 'https://kpmg.com/xx/en.html' }); + + nock('https://kpmg.com') + .get('/xx/en.html') + .reply(200); + + const result = await determineAuditScope('https://kpmg.com'); + expect(result).to.equal('https://kpmg.com'); + }); + + it('should strip non-matching paths when baseUrl subpath does not match - realmadrid.com example', async () => { + // baseUrl: https://realmadrid.com/area-vip (has subpath /area-vip) + // composedAuditURL resolves to: www.realmadrid.com/sites/area-vip + // expected result: www.realmadrid.com (paths don't match, strip all) + nock('https://realmadrid.com') + .get('/area-vip') + .reply(301, '', { Location: 'https://www.realmadrid.com/sites/area-vip' }); + + nock('https://www.realmadrid.com') + .get('/sites/area-vip') + .reply(200); + + const result = await determineAuditScope('https://realmadrid.com/area-vip'); + expect(result).to.equal('https://www.realmadrid.com'); + }); + + it('should handle simple redirect with no path change', async () => { + // baseUrl: https://example.com + // composedAuditURL resolves to: www.example.com + // expected result: www.example.com + nock('https://example.com') + .get('/') + .reply(301, '', { Location: 'https://www.example.com/' }); + + nock('https://www.example.com') + .get('/') + .reply(200); + + const result = await determineAuditScope('https://example.com'); + expect(result).to.equal('https://www.example.com'); + }); + + it('should handle baseUrl with path that gets redirected but path is preserved', async () => { + // baseUrl: https://example.com/blog + // composedAuditURL resolves to: www.example.com/blog + // expected result: www.example.com/blog (path matches) + nock('https://example.com') + .get('/blog') + .reply(301, '', { Location: 'https://www.example.com/blog' }); + + nock('https://www.example.com') + .get('/blog') + .reply(200); + + const result = await determineAuditScope('https://example.com/blog'); + expect(result).to.equal('https://www.example.com/blog'); + }); + + it('should handle baseUrl with path that gets redirected to different path', async () => { + // baseUrl: https://example.com/old-path + // composedAuditURL resolves to: www.example.com/new-path + // expected result: www.example.com (paths don't match, strip all) + nock('https://example.com') + .get('/old-path') + .reply(301, '', { Location: 'https://www.example.com/new-path' }); + + nock('https://www.example.com') + .get('/new-path') + .reply(200); + + const result = await determineAuditScope('https://example.com/old-path'); + expect(result).to.equal('https://www.example.com'); + }); + + it('should keep longest common prefix path - westjet.com example', async () => { + // baseUrl: https://westjet.com/en-ca/book-trip/flights/from-calgary + // composedAuditURL resolves to: www.westjet.com/en-ca/best-of-travel/canada/from-calgary + // Path segments comparison: + // original: ['en-ca', 'book-trip', 'flights', 'from-calgary'] + // resolved: ['en-ca', 'best-of-travel', 'canada', 'from-calgary'] + // common prefix: ['en-ca'] (first segment matches, second doesn't) + // expected result: www.westjet.com/en-ca + nock('https://westjet.com') + .get('/en-ca/book-trip/flights/from-calgary') + .reply(301, '', { Location: 'https://www.westjet.com/en-ca/best-of-travel/canada/from-calgary' }); + + nock('https://www.westjet.com') + .get('/en-ca/best-of-travel/canada/from-calgary') + .reply(200); + + const result = await determineAuditScope('https://westjet.com/en-ca/book-trip/flights/from-calgary'); + expect(result).to.equal('https://www.westjet.com/en-ca'); + }); + + it('should keep multiple matching path segments', async () => { + // baseUrl: https://example.com/en/us/products/item + // composedAuditURL resolves to: www.example.com/en/us/services/feature + // Path segments comparison: + // original: ['en', 'us', 'products', 'item'] + // resolved: ['en', 'us', 'services', 'feature'] + // common prefix: ['en', 'us'] (first two match, third doesn't) + // expected result: www.example.com/en/us + nock('https://example.com') + .get('/en/us/products/item') + .reply(301, '', { Location: 'https://www.example.com/en/us/services/feature' }); + + nock('https://www.example.com') + .get('/en/us/services/feature') + .reply(200); + + const result = await determineAuditScope('https://example.com/en/us/products/item'); + expect(result).to.equal('https://www.example.com/en/us'); + }); + + it('should strip all paths when no segments match from the beginning', async () => { + // baseUrl: https://example.com/products/item + // composedAuditURL resolves to: www.example.com/services/feature + // Path segments comparison: + // original: ['products', 'item'] + // resolved: ['services', 'feature'] + // common prefix: [] (first segment doesn't match) + // expected result: www.example.com + nock('https://example.com') + .get('/products/item') + .reply(301, '', { Location: 'https://www.example.com/services/feature' }); + + nock('https://www.example.com') + .get('/services/feature') + .reply(200); + + const result = await determineAuditScope('https://example.com/products/item'); + expect(result).to.equal('https://www.example.com'); + }); + }); + describe('hasProtocol', () => { it('should return true for URLs with protocol', () => { expect(hasProtocol('https://example.com')).to.be.true; @@ -660,6 +835,244 @@ describe('Redirect Chains Audit', () => { expect(result[4].isDuplicateSrc).to.be.false; expect(result[4].ordinalDuplicate).to.equal(0); }); + + it('should try fallback URL without subpath when redirects.json not found with subpath', async () => { + const auditScopeUrl = 'https://www.example.com/fr'; + const fallbackUrl = 'https://www.example.com'; + + const redirectsJson = { + total: 2, + offset: 0, + limit: 2, + data: [ + { Source: '/fr/test', Destination: '/fr/test-dest' }, + { Source: '/en/other', Destination: '/en/other-dest' }, + ], + }; + + // First try fails (with subpath) + nock(auditScopeUrl) + .get('/redirects.json') + .reply(404); + + // Fallback succeeds (without subpath) + nock(fallbackUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(1); // Only /fr/test matches the scope + expect(result[0].origSrc).to.equal('/fr/test'); + expect(result[0].origDest).to.equal('/fr/test-dest'); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Looking for redirects file at: https:\/\/www\.example\.com\/fr\/redirects\.json/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Redirects file not found with subpaths, trying fallback/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Successfully loaded redirects file from: https:\/\/www\.example\.com\/redirects\.json/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Filtered entries from 2 to 1 based on audit scope: \/fr/)); + }); + + it('should not try fallback when URL without path is same as auditScopeUrl', async () => { + const auditScopeUrl = 'https://www.example.com'; + + // Only one request should be made + nock(auditScopeUrl) + .get('/redirects.json') + .reply(404); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array').that.is.empty; + expect(context.log.info).to.have.been.calledWith(sinon.match(/Looking for redirects file at:/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/No redirects file found or file is empty/)); + // Should NOT log the fallback message since URLs are the same + expect(context.log.info).to.not.have.been.calledWith(sinon.match(/trying fallback/)); + }); + + it('should return empty array when both original and fallback fail', async () => { + const auditScopeUrl = 'https://www.example.com/en'; + const fallbackUrl = 'https://www.example.com'; + + // First try fails (with subpath) + nock(auditScopeUrl) + .get('/redirects.json') + .reply(404); + + // Fallback also fails (without subpath) + nock(fallbackUrl) + .get('/redirects.json') + .reply(404); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array').that.is.empty; + expect(context.log.info).to.have.been.calledWith(sinon.match(/Looking for redirects file at:/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/trying fallback/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/No redirects file found or file is empty/)); + }); + + it('should return all entries when auditScopeUrl has no subpath', async () => { + const auditScopeUrl = 'https://www.example.com'; + + const redirectsJson = { + total: 3, + offset: 0, + limit: 3, + data: [ + { Source: '/en/page1', Destination: '/en/new1' }, + { Source: '/fr/page2', Destination: '/fr/new2' }, + { Source: '/de/page3', Destination: '/de/new3' }, + ], + }; + + nock(auditScopeUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(3); + expect(context.log.info).to.have.been.calledWith(sinon.match(/No subpath in audit scope URL, returning all 3 entries/)); + }); + + it('should filter entries to only include those matching auditScopeUrl subpath with relative URLs', async () => { + const auditScopeUrl = 'https://www.example.com/fr'; + + const redirectsJson = { + total: 4, + offset: 0, + limit: 4, + data: [ + { Source: '/fr/page1', Destination: '/fr/new1' }, + { Source: '/fr/page2', Destination: '/fr/new2' }, + { Source: '/en/page3', Destination: '/en/new3' }, + { Source: '/de/page4', Destination: '/de/new4' }, + ], + }; + + nock(auditScopeUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(2); + expect(result[0].origSrc).to.equal('/fr/page1'); + expect(result[1].origSrc).to.equal('/fr/page2'); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Filtered entries from 4 to 2 based on audit scope: \/fr/)); + }); + + it('should filter entries with fully qualified URLs matching auditScopeUrl', async () => { + const auditScopeUrl = 'https://www.example.com/fr'; + + const redirectsJson = { + total: 3, + offset: 0, + limit: 3, + data: [ + { Source: 'https://www.example.com/fr/page1', Destination: '/fr/new1' }, + { Source: 'https://www.example.com/en/page2', Destination: '/en/new2' }, + { Source: '/fr/page3', Destination: '/fr/new3' }, + ], + }; + + nock(auditScopeUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(2); + // Results are sorted alphabetically, so /fr/page3 comes before https://www.example.com/fr/page1 + expect(result[0].origSrc).to.equal('/fr/page3'); + expect(result[1].origSrc).to.equal('https://www.example.com/fr/page1'); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Filtered entries from 3 to 2 based on audit scope: \/fr/)); + }); + + it('should return empty array when no entries match auditScopeUrl subpath', async () => { + const auditScopeUrl = 'https://www.example.com/fr'; + + const redirectsJson = { + total: 2, + offset: 0, + limit: 2, + data: [ + { Source: '/en/page1', Destination: '/en/new1' }, + { Source: '/de/page2', Destination: '/de/new2' }, + ], + }; + + nock(auditScopeUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array').that.is.empty; + expect(context.log.info).to.have.been.calledWith(sinon.match(/Filtered entries from 2 to 0 based on audit scope: \/fr/)); + }); + + it('should handle mixed relative and fully qualified URLs correctly', async () => { + const auditScopeUrl = 'https://www.example.com/en'; + + const redirectsJson = { + total: 5, + offset: 0, + limit: 5, + data: [ + { Source: '/en/page1', Destination: '/en/new1' }, + { Source: 'https://www.example.com/en/page2', Destination: '/en/new2' }, + { Source: '/fr/page3', Destination: '/fr/new3' }, + { Source: 'https://www.example.com/fr/page4', Destination: '/fr/new4' }, + { Source: '/en/us/page5', Destination: '/en/us/new5' }, + ], + }; + + nock(auditScopeUrl) + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrl, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(3); + // Results are sorted alphabetically: /en/page1, /en/us/page5, https://www.example.com/en/page2 + expect(result[0].origSrc).to.equal('/en/page1'); + expect(result[1].origSrc).to.equal('/en/us/page5'); + expect(result[2].origSrc).to.equal('https://www.example.com/en/page2'); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Filtered entries from 5 to 3 based on audit scope: \/en/)); + }); + + it('should handle auditScopeUrl with trailing slash by removing it', async () => { + const auditScopeUrlWithSlash = 'https://www.example.com/'; + + const redirectsJson = { + total: 2, + offset: 0, + limit: 2, + data: [ + { Source: '/page1', Destination: '/new1' }, + { Source: '/page2', Destination: '/new2' }, + ], + }; + + // The nock should expect the URL without the trailing slash + nock('https://www.example.com') + .get('/redirects.json') + .reply(200, redirectsJson); + + const result = await processRedirectsFile(auditScopeUrlWithSlash, context.log); + + expect(result).to.be.an('array'); + expect(result).to.have.lengthOf(2); + expect(result[0].origSrc).to.equal('/page1'); + expect(result[1].origSrc).to.equal('/page2'); + // Verify that the trailing slash was removed in the log message + expect(context.log.info).to.have.been.calledWith(sinon.match(/Looking for redirects file at: https:\/\/www\.example\.com\/redirects\.json/)); + }); }); }); @@ -1553,9 +1966,15 @@ describe('Redirect Chains Audit', () => { expect(fixResult.canApplyFixAutomatically).to.be.true; }); - it('should generate suggested fixes for multiple entries with issues', () => { + it('should generate suggested fixes for multiple entries with issues', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1611,13 +2030,19 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[1]).to.have.property('httpStatusCode'); expect(result.suggestions[1].fix).to.include('is the same as'); - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 2 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 2 affected entries/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 2 suggested fixes.`); }); - it('should generate suggested fixes for src-is-final fix type', () => { + it('should generate suggested fixes for src-is-final fix type', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1657,13 +2082,19 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[0]).to.have.property('httpStatusCode', 200); expect(result.suggestions[0].fix).to.include('Remove this entry since the Source URL redirects to itself'); - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 1 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 1 affected entries/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 1 suggested fixes.`); }); - it('should generate suggested fixes for src-is-final with query parameters', () => { + it('should generate suggested fixes for src-is-final with query parameters', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1698,9 +2129,15 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[0].fix).to.include('Remove this entry since the Source URL redirects to itself'); }); - it('should generate suggested fixes for src-is-final with hash fragments', () => { + it('should generate suggested fixes for src-is-final with hash fragments', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1735,9 +2172,15 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[0].fix).to.include('Remove this entry since the Source URL redirects to itself'); }); - it('should generate suggested fixes for src-is-final with multiple redirects', () => { + it('should generate suggested fixes for src-is-final with multiple redirects', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1772,9 +2215,15 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[0].fix).to.include('Remove this entry since the Source URL redirects to itself'); }); - it('should skip suggestions with HTTP 418 + "unexpected end of file" + source equals final', () => { + it('should skip suggestions with HTTP 418 + "unexpected end of file" + source equals final', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1822,15 +2271,21 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[0]).to.have.property('destinationUrl', '/normal-destination'); // Verify logging - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 2 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 2 affected entries/)); expect(context.log.debug).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Skipping suggestion for network error case: /network-error-page -> /destination-page`); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Skipped 1 entries due to exclusion criteria.`); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 1 suggested fixes.`); }); - it('should not skip suggestions when only some conditions are met', () => { + it('should not skip suggestions when only some conditions are met', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1894,16 +2349,22 @@ describe('Redirect Chains Audit', () => { expect(result.suggestions[2]).to.have.property('sourceUrl', '/partial-match-3'); // Verify logging - no skip messages should be present - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 3 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 3 affected entries/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 3 suggested fixes.`); // Verify that the correct number of suggestions were generated expect(result.suggestions).to.have.lengthOf(3); }); - it('should handle empty or missing issues array', () => { + it('should handle empty or missing issues array', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + + // Mock URL resolution + nock(baseUrl) + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -1916,23 +2377,48 @@ describe('Redirect Chains Audit', () => { expect(result).to.have.property('suggestions'); expect(result.suggestions).to.be.an('array').that.is.empty; - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 0 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 0 affected entries/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 0 suggested fixes.`); }); it('should handle missing audit data', () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; + const auditData = {}; const result = generateSuggestedFixes(auditUrl, auditData, context); expect(result).to.have.property('suggestions'); expect(result.suggestions).to.be.an('array').that.is.empty; - expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generating suggestions for URL ${auditUrl} which has 0 affected entries.`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Using audit scope URL/)); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 0 affected entries/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Generated 0 suggested fixes.`); }); + it('should use cached resolved URL when available in auditData', () => { + const baseUrl = 'https://www.example.com'; + const auditUrl = `${baseUrl}/redirects.json`; + const cachedResolvedUrl = 'https://www.example.com'; + + const auditData = { + auditResult: { + success: true, + auditScopeUrl: cachedResolvedUrl, + details: { + issues: [], + }, + }, + }; + + const result = generateSuggestedFixes(auditUrl, auditData, context); + + expect(result).to.have.property('suggestions'); + expect(result.suggestions).to.be.an('array').that.is.empty; + expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Using audit scope URL: ${cachedResolvedUrl}`); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Generating suggestions for URL.*which has 0 affected entries/)); + }); + it('should skip opportunity creation when audit fails', async () => { const baseUrl = 'https://www.example.com'; const auditUrl = `${baseUrl}/redirects.json`; @@ -1946,6 +2432,7 @@ describe('Redirect Chains Audit', () => { const result = await generateOpportunities(auditUrl, auditData, context); expect(result).to.deep.equal(auditData); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Using audit scope URL/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Audit itself failed, skipping opportunity creation`); }); @@ -1961,9 +2448,30 @@ describe('Redirect Chains Audit', () => { const result = await generateOpportunities(auditUrl, auditData, context); expect(result).to.deep.equal(auditData); + expect(context.log.info).to.have.been.calledWith(sinon.match(/Using audit scope URL/)); expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - No suggested fixes found, skipping opportunity creation`); }); + it('should use cached resolved URL when creating opportunities', async () => { + const baseUrl = 'https://www.example.com'; + const auditUrl = `${baseUrl}/redirects.json`; + const cachedResolvedUrl = 'https://www.example.com'; + + const auditData = { + auditResult: { + success: true, + auditScopeUrl: cachedResolvedUrl, + }, + suggestions: [{ key: 'test-key', fix: 'Test fix' }], + }; + + const result = await handlerModule.generateOpportunities(auditUrl, auditData, context); + + expect(result).to.deep.equal(auditData); + expect(context.log.info).to.have.been.calledWith(`${AUDIT_LOGGING_NAME} - Using audit scope URL for opportunity generation: ${cachedResolvedUrl}`); + expect(convertToOpportunityStub).to.have.been.calledOnce; + }); + it('should create opportunities for valid suggestions', async () => { const auditUrl = 'https://www.example.com/redirects.json'; const auditData = { @@ -2044,6 +2552,11 @@ describe('Redirect Chains Audit', () => { describe('Main Audit Runner', () => { it('should run audit successfully', async () => { + // Mock URL resolution for composeAuditURL + nock(url) + .get('/') + .reply(200); + nock(url) .get('/redirects.json') .reply(200, sampleRedirectsJson); @@ -2072,6 +2585,11 @@ describe('Redirect Chains Audit', () => { }); it('should handle missing redirects file', async () => { + // Mock URL resolution for composeAuditURL + nock(url) + .get('/') + .reply(200); + nock(url) .get('/redirects.json') .reply(404); @@ -2082,6 +2600,11 @@ describe('Redirect Chains Audit', () => { }); it('should handle network errors', async () => { + // Mock URL resolution for composeAuditURL + nock(url) + .get('/') + .reply(200); + nock(url) .get('/redirects.json') .replyWithError('Network error'); @@ -2490,6 +3013,11 @@ describe('Redirect Chains Audit', () => { describe('Main Audit Runner Integration', () => { it('should not filter issues when they fit within size limit', async () => { + // Mock URL resolution for composeAuditURL + nock(url) + .get('/') + .reply(200); + nock(url) .get('/redirects.json') .reply(200, { @@ -2505,7 +3033,7 @@ describe('Redirect Chains Audit', () => { expect(result).to.have.property('auditResult'); expect(result.auditResult).to.have.property('success'); expect(context.log.warn).to.have.been.calledWith( - sinon.match(/Issues might be reduced/) + sinon.match(/Issues could be reduced/) ); }); @@ -2535,8 +3063,14 @@ describe('Redirect Chains Audit', () => { }); describe('Size Analysis Logging', () => { - it('should log suggestion size analysis', () => { + it('should log suggestion size analysis', async () => { const auditUrl = 'https://www.example.com/redirects.json'; + + // Mock URL resolution for composeAuditURL + nock('https://www.example.com') + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -2584,8 +3118,14 @@ describe('Redirect Chains Audit', () => { ); }); - it('should log warning when suggestions exceed 400 KB', () => { + it('should log warning when suggestions exceed 400 KB', async () => { const auditUrl = 'https://www.example.com/redirects.json'; + + // Mock URL resolution + nock('https://www.example.com') + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -2616,8 +3156,14 @@ describe('Redirect Chains Audit', () => { ); }); - it('should log suggestion size analysis with individual metrics', () => { + it('should log suggestion size analysis with individual metrics', async () => { const auditUrl = 'https://www.example.com/redirects.json'; + + // Mock URL resolution + nock('https://www.example.com') + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: { @@ -2668,8 +3214,14 @@ describe('Redirect Chains Audit', () => { ); }); - it('should handle suggestions with zero-byte serialization (covers smallestSize = 0 branch)', () => { + it('should handle suggestions with zero-byte serialization (covers smallestSize = 0 branch)', async () => { const auditUrl = 'https://www.example.com/redirects.json'; + + // Mock URL resolution + nock('https://www.example.com') + .get('/redirects.json') + .reply(200); + const auditData = { auditResult: { details: {