Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[chore] Add intra-repo non-doc-page links to refcache #6177

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions .htmltest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,14 @@ IgnoreURLs: # list of regexs of paths or URLs to be ignored
# Ignore Docsy-generated GitHub links for now, until
# https://github.com/google/docsy/issues/1432 is fixed
- ^https?://github\.com/.*?/.*?/(new|edit|issues/new\?title)/ # view-page, edit-source etc
# Here's an approximate regex to avoid the "View page source" links. TODO: fix this in Docsy
- ^https?://github\.com/open-telemetry/opentelemetry.io/tree/
# FIXME: A patch until we can get Docsy to mark "View page source" links as excluded from link checking,
# Actually, it would be better to pin the version of the OTel spec.
- ^https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/logs/event-(api|sdk)\.md
# Ignore "View page source" links, except for spec pages, i.e., links starting with
# https://github.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/specs
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/[^e]
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/es
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/.*?/_index.md$
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/[^d]
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/[^s]
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/security
# FIXME: same issue as for the OTel spec mentioned above:
- ^https://github.com/open-telemetry/semantic-conventions/tree/main

Expand Down
2 changes: 1 addition & 1 deletion scripts/double-check-refcache-400s.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ async function retry400sAndUpdateCache() {
}

process.stdout.write(`Checking: ${url} (was ${StatusCode})... `);
const status = await getUrlStatus(url);
const status = await getUrlStatus(url, true);
console.log(`${status}.`);

if (!isHttp2XX(status)) continue;
Expand Down
25 changes: 20 additions & 5 deletions scripts/get-url-status.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import puppeteer from 'puppeteer';

const cratesIoURL = 'https://crates.io/crates/';
let verbose = false;

function log(...args) {
Expand All @@ -24,8 +25,20 @@ async function getUrlHeadless(url) {

if (!response) throw new Error('No response from server.');

const status = response.status();
log(` Headless fetch returned HTTP status code: ${status}`);
let status = response.status();
const title = await page.title();

// Handles special case of crates.io. For details, see:
// https://github.com/rust-lang/crates.io/issues/788
if (url.startsWith(cratesIoURL)) {
const crateName = url.split('/').pop();
// Crate found iff title is `${crateName} - crates.io: Rust Package Registry`
if (!title.startsWith(crateName)) status = 404;
}

log(
`Headless fetch returned HTTP status code: ${status}; page title: '${title}'`,
);

return status;
} catch (error) {
Expand Down Expand Up @@ -66,9 +79,11 @@ export function isHttp2XX(status) {
return status && status >= 200 && status < 300;
}

export async function getUrlStatus(url) {
export async function getUrlStatus(url, _verbose = false) {
verbose = _verbose;
let status = await getUrlHeadless(url);
if (!isHttp2XX(status)) {
// If headless fetch fails, try in browser for non-404 statuses
if (!isHttp2XX(status) && status !== 404) {
status = await getUrlInBrowser(url);
}
return status;
Expand All @@ -83,7 +98,7 @@ async function mainCLI() {
process.exit(1);
}

const status = await getUrlStatus(url);
const status = await getUrlStatus(url, verbose);
process.exit(isHttp2XX(status) ? 0 : 1);
}

Expand Down
44 changes: 44 additions & 0 deletions static/refcache.json
Original file line number Diff line number Diff line change
Expand Up @@ -10007,6 +10007,50 @@
"StatusCode": 206,
"LastSeen": "2025-02-01T07:12:04.503997-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/archetypes/": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:00.765175-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/archetypes/blog.md": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:10:58.811797-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/content-modules": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:01.039226-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/specs/status.md": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:42.468045-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:10:57.073038-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/adopters.yaml": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:10:58.803368-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/distributions.yaml": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:03.290407-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/vendors.yaml": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:04.640959-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/registry": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:10:58.178169-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/layouts/shortcodes/docs": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:10:59.64214-05:00"
},
"https://github.com/open-telemetry/opentelemetry.io/tree/main/templates/registry-entry.yml": {
"StatusCode": 206,
"LastSeen": "2025-02-02T12:11:00.926611-05:00"
},
"https://github.com/open-telemetry/otel-arrow": {
"StatusCode": 206,
"LastSeen": "2025-01-30T17:00:10.089894-05:00"
Expand Down