Skip to content

Commit 0733a90

Browse files
radhikagpt1208radhikagpt1208
andauthored
feat: introduce headings audit in preflight (#1562)
This PR adds headings issues detection to the preflight audit. When a webpage is scraped, all heading elements are validated and are reported with SEO impact, actionable recommendations and AI generated suggestions. The headings audit in preflight will now return opportunities in this format: ``` { "result": [ { "pageUrl": "https://author-p125761-e1234605.adobeaemcloud.com/content/aso-preflight/index/preflight-headings-test.html", "step": "suggest", "audits": [ { "name": "headings", "type": "seo", "opportunities": [ { "check": "heading-missing-h1", "seoRecommendation": "Pages should have exactly one H1 element for SEO and accessibility.", "issue": "Missing H1 Heading", "issueDetails": "Page does not have an H1 element", "seoImpact": "High", "aiSuggestion": "Explore Pain Perception: Insights and Reflections" }, { "check": "heading-order-invalid", "seoRecommendation": "Heading levels should increase by one (example: H1→H2), not jump levels (example: H1→H3).", "issue": "Invalid Heading Order", "issueDetails": "Heading hierarchy skips levels.", "seoImpact": "Moderate", "suggestion": "Adjust heading levels to maintain proper hierarchy." }, { "check": "heading-order-invalid", "seoRecommendation": "Heading levels should increase by one (example: H1→H2), not jump levels (example: H1→H3).", "issue": "Invalid Heading Order", "issueDetails": "Heading hierarchy skips levels.", "seoImpact": "Moderate", "suggestion": "Adjust heading levels to maintain proper hierarchy." } ] } ] } ] } ``` Please ensure your pull request adheres to the following guidelines: - [ ] make sure to link the related issues in this description - [ ] when merging / squashing, make sure the fixed issue references are visible in the commits, for easy compilation of release notes - [ ] If data sources for any opportunity has been updated/added, please update the [wiki](https://wiki.corp.adobe.com/display/AEMSites/Data+Sources+for+Opportunities) for same opportunity. ## Related Issues Thanks for contributing! --------- Co-authored-by: radhikagpt1208 <[email protected]>
1 parent 008f5f5 commit 0733a90

File tree

8 files changed

+787
-74
lines changed

8 files changed

+787
-74
lines changed

src/headings/handler.js

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -31,36 +31,42 @@ export const HEADINGS_CHECKS = Object.freeze({
3131
HEADING_EMPTY: {
3232
check: 'heading-empty',
3333
title: 'Empty Heading',
34+
description: '{tagName} heading is empty.',
3435
explanation: 'Heading elements (H2–H6) should not be empty.',
3536
suggestion: 'Add descriptive text or remove the empty heading.',
3637
},
3738
HEADING_MISSING_H1: {
3839
check: 'heading-missing-h1',
3940
title: 'Missing H1 Heading',
41+
description: 'Page does not have an H1 element',
4042
explanation: 'Pages should have exactly one H1 element for SEO and accessibility.',
4143
suggestion: 'Add an H1 element describing the main content.',
4244
},
4345
HEADING_H1_LENGTH: {
4446
check: 'heading-h1-length',
4547
title: 'H1 Length',
48+
description: `H1 element is either empty or exceeds ${H1_LENGTH_CHARS} characters.`,
4649
explanation: `H1 elements should be less than ${H1_LENGTH_CHARS} characters.`,
4750
suggestion: `Update the H1 to be less than ${H1_LENGTH_CHARS} characters`,
4851
},
4952
HEADING_MULTIPLE_H1: {
5053
check: 'heading-multiple-h1',
5154
title: 'Multiple H1 Headings',
55+
description: 'Page has more than one H1 element.',
5256
explanation: 'Pages should have only one H1 element.',
5357
suggestion: 'Change additional H1 elements to H2 or appropriate levels.',
5458
},
5559
HEADING_ORDER_INVALID: {
5660
check: 'heading-order-invalid',
5761
title: 'Invalid Heading Order',
62+
description: 'Heading hierarchy skips levels.',
5863
explanation: 'Heading levels should increase by one (example: H1→H2), not jump levels (example: H1→H3).',
5964
suggestion: 'Adjust heading levels to maintain proper hierarchy.',
6065
},
6166
TOPPAGES: {
6267
check: 'top-pages',
6368
title: 'Top Pages',
69+
description: 'No top pages available for audit',
6470
explanation: 'No top pages found',
6571
},
6672
});
@@ -199,7 +205,7 @@ export async function getH1HeadingASuggestion(url, log, pageTags, context, brand
199205
}
200206
}
201207

202-
async function getBrandGuidelines(healthyTagsObject, log, context) {
208+
export async function getBrandGuidelines(healthyTagsObject, log, context) {
203209
const azureOpenAIClient = AzureOpenAIClient.createFrom(context);
204210
const prompt = await getPrompt(
205211
{
@@ -218,48 +224,30 @@ async function getBrandGuidelines(healthyTagsObject, log, context) {
218224
}
219225

220226
/**
221-
* Validate heading semantics for a single page.
227+
* Validate heading semantics for a single page from a scrapeJsonObject.
222228
* - Ensure heading level increases by at most 1 when going deeper (no jumps, e.g., h1 → h3)
223229
* - Ensure headings are not empty
224230
*
225-
* @param {string} url
226-
* @param {Object} log
231+
* @param {string} url - The URL being validated
232+
* @param {Object} scrapeJsonObject - The scraped page data from S3
233+
* @param {Object} log - Logger instance
234+
* @param {Object} context - Audit context
235+
* @param {Object} seoChecks - SeoChecks instance for tracking healthy tags
227236
* @returns {Promise<{url: string, checks: Array}>}
228237
*/
229-
export async function validatePageHeadings(
238+
export async function validatePageHeadingFromScrapeJson(
230239
url,
240+
scrapeJsonObject,
231241
log,
232-
site,
233-
allKeys,
234-
s3Client,
235-
S3_SCRAPER_BUCKET_NAME,
236-
context,
237242
seoChecks,
238243
) {
239-
if (!url) {
240-
log.error('URL is undefined or null, cannot validate headings');
241-
return {
242-
url,
243-
checks: [],
244-
};
245-
}
246-
247244
try {
248-
const scrapeJsonPath = getScrapeJsonPath(url, site.getId());
249-
const s3Key = allKeys.find((key) => key.includes(scrapeJsonPath));
250245
let document = null;
251-
let scrapeJsonObject = null;
252-
if (!s3Key) {
253-
log.error(`Scrape JSON path not found for ${url}, skipping headings audit`);
246+
if (!scrapeJsonObject) {
247+
log.error(`Scrape JSON object not found for ${url}, skipping headings audit`);
254248
return null;
255249
} else {
256-
scrapeJsonObject = await getObjectFromKey(s3Client, S3_SCRAPER_BUCKET_NAME, s3Key, log);
257-
if (!scrapeJsonObject) {
258-
log.error(`Scrape JSON object not found for ${url}, skipping headings audit`);
259-
return null;
260-
} else {
261-
document = new JSDOM(scrapeJsonObject.scrapeResult.rawBody).window.document;
262-
}
250+
document = new JSDOM(scrapeJsonObject.scrapeResult.rawBody).window.document;
263251
}
264252

265253
const pageTags = {
@@ -282,6 +270,7 @@ export async function validatePageHeadings(
282270
checks.push({
283271
check: HEADINGS_CHECKS.HEADING_MISSING_H1.check,
284272
checkTitle: HEADINGS_CHECKS.HEADING_MISSING_H1.title,
273+
description: HEADINGS_CHECKS.HEADING_MISSING_H1.description,
285274
success: false,
286275
explanation: HEADINGS_CHECKS.HEADING_MISSING_H1.explanation,
287276
suggestion: HEADINGS_CHECKS.HEADING_MISSING_H1.suggestion,
@@ -298,6 +287,7 @@ export async function validatePageHeadings(
298287
checks.push({
299288
check: HEADINGS_CHECKS.HEADING_MULTIPLE_H1.check,
300289
checkTitle: HEADINGS_CHECKS.HEADING_MULTIPLE_H1.title,
290+
description: HEADINGS_CHECKS.HEADING_MULTIPLE_H1.description,
301291
success: false,
302292
explanation: `Found ${h1Elements.length} h1 elements: ${HEADINGS_CHECKS.HEADING_MULTIPLE_H1.explanation}`,
303293
suggestion: HEADINGS_CHECKS.HEADING_MULTIPLE_H1.suggestion,
@@ -312,6 +302,7 @@ export async function validatePageHeadings(
312302
checks.push({
313303
check: HEADINGS_CHECKS.HEADING_H1_LENGTH.check,
314304
checkTitle: HEADINGS_CHECKS.HEADING_H1_LENGTH.title,
305+
description: HEADINGS_CHECKS.HEADING_H1_LENGTH.description,
315306
success: false,
316307
explanation: HEADINGS_CHECKS.HEADING_H1_LENGTH.explanation,
317308
suggestion: HEADINGS_CHECKS.HEADING_H1_LENGTH.suggestion,
@@ -334,6 +325,7 @@ export async function validatePageHeadings(
334325
return {
335326
check: HEADINGS_CHECKS.HEADING_EMPTY.check,
336327
checkTitle: HEADINGS_CHECKS.HEADING_EMPTY.title,
328+
description: HEADINGS_CHECKS.HEADING_EMPTY.description.replace('{tagName}', heading.tagName),
337329
success: false,
338330
explanation: `Found empty text for ${heading.tagName}: ${HEADINGS_CHECKS.HEADING_EMPTY.explanation}`,
339331
suggestion: HEADINGS_CHECKS.HEADING_EMPTY.suggestion,
@@ -390,6 +382,53 @@ export async function validatePageHeadings(
390382
}
391383
}
392384

385+
/**
386+
* Validate heading semantics for a single page.
387+
* - Ensure heading level increases by at most 1 when going deeper (no jumps, e.g., h1 → h3)
388+
* - Ensure headings are not empty
389+
*
390+
* @param {string} url
391+
* @param {Object} log
392+
* @returns {Promise<{url: string, checks: Array}>}
393+
*/
394+
export async function validatePageHeadings(
395+
url,
396+
log,
397+
site,
398+
allKeys,
399+
s3Client,
400+
S3_SCRAPER_BUCKET_NAME,
401+
context,
402+
seoChecks,
403+
) {
404+
if (!url) {
405+
log.error('URL is undefined or null, cannot validate headings');
406+
return {
407+
url,
408+
checks: [],
409+
};
410+
}
411+
412+
try {
413+
const scrapeJsonPath = getScrapeJsonPath(url, site.getId());
414+
const s3Key = allKeys.find((key) => key.includes(scrapeJsonPath));
415+
let scrapeJsonObject = null;
416+
if (!s3Key) {
417+
log.error(`Scrape JSON path not found for ${url}, skipping headings audit`);
418+
return null;
419+
} else {
420+
scrapeJsonObject = await getObjectFromKey(s3Client, S3_SCRAPER_BUCKET_NAME, s3Key, log);
421+
return validatePageHeadingFromScrapeJson(url, scrapeJsonObject, log, seoChecks);
422+
}
423+
} catch (error) {
424+
log.error(`Error validating headings for ${url}: ${error.message}`);
425+
return {
426+
url,
427+
checks: [],
428+
};
429+
}
430+
}
431+
393432
/**
394433
* Main headings audit runner
395434
* @param {string} baseURL

src/preflight/handler.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import metatags from './metatags.js';
2525
import links from './links.js';
2626
import readability from '../readability/handler.js';
2727
import accessibility from './accessibility.js';
28+
import headings from './headings.js';
2829

2930
const { AUDIT_STEP_DESTINATIONS } = Audit;
3031
export const PREFLIGHT_STEP_IDENTIFY = 'identify';
@@ -46,6 +47,7 @@ export const AUDIT_LOREM_IPSUM = 'lorem-ipsum';
4647
export const AUDIT_H1_COUNT = 'h1-count';
4748
export const AUDIT_ACCESSIBILITY = 'accessibility';
4849
export const AUDIT_READABILITY = 'readability';
50+
export const AUDIT_HEADINGS = 'headings';
4951

5052
const AVAILABLE_CHECKS = [
5153
AUDIT_CANONICAL,
@@ -56,12 +58,14 @@ const AVAILABLE_CHECKS = [
5658
AUDIT_H1_COUNT,
5759
AUDIT_ACCESSIBILITY,
5860
AUDIT_READABILITY,
61+
AUDIT_HEADINGS,
5962
];
6063

6164
export const PREFLIGHT_HANDLERS = {
6265
canonical,
6366
metatags,
6467
links,
68+
headings,
6569
readability,
6670
accessibility,
6771
};

0 commit comments

Comments
 (0)