Skip to content

Commit 01d3e2e

Browse files
authored
[chore] Add intra-repo non-doc-page links to refcache (#6177)
1 parent 52225b7 commit 01d3e2e

File tree

4 files changed

+73
-11
lines changed

4 files changed

+73
-11
lines changed

.htmltest.yml

+8-5
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,14 @@ IgnoreURLs: # list of regexs of paths or URLs to be ignored
5555
# Ignore Docsy-generated GitHub links for now, until
5656
# https://github.com/google/docsy/issues/1432 is fixed
5757
- ^https?://github\.com/.*?/.*?/(new|edit|issues/new\?title)/ # view-page, edit-source etc
58-
# Here's an approximate regex to avoid the "View page source" links. TODO: fix this in Docsy
59-
- ^https?://github\.com/open-telemetry/opentelemetry.io/tree/
60-
# FIXME: A patch until we can get Docsy to mark "View page source" links as excluded from link checking,
61-
# Actually, it would be better to pin the version of the OTel spec.
62-
- ^https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/logs/event-(api|sdk)\.md
58+
# Ignore "View page source" links, except for spec pages, i.e., links starting with
59+
# https://github.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/specs
60+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/[^e]
61+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/es
62+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/.*?/_index.md$
63+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/[^d]
64+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/[^s]
65+
- ^https://github\.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/security
6366
# FIXME: same issue as for the OTel spec mentioned above:
6467
- ^https://github.com/open-telemetry/semantic-conventions/tree/main
6568

scripts/double-check-refcache-400s.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ async function retry400sAndUpdateCache() {
3434
}
3535

3636
process.stdout.write(`Checking: ${url} (was ${StatusCode})... `);
37-
const status = await getUrlStatus(url);
37+
const status = await getUrlStatus(url, true);
3838
console.log(`${status}.`);
3939

4040
if (!isHttp2XX(status)) continue;

scripts/get-url-status.mjs

+20-5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import puppeteer from 'puppeteer';
44

5+
const cratesIoURL = 'https://crates.io/crates/';
56
let verbose = false;
67

78
function log(...args) {
@@ -24,8 +25,20 @@ async function getUrlHeadless(url) {
2425

2526
if (!response) throw new Error('No response from server.');
2627

27-
const status = response.status();
28-
log(` Headless fetch returned HTTP status code: ${status}`);
28+
let status = response.status();
29+
const title = await page.title();
30+
31+
// Handles special case of crates.io. For details, see:
32+
// https://github.com/rust-lang/crates.io/issues/788
33+
if (url.startsWith(cratesIoURL)) {
34+
const crateName = url.split('/').pop();
35+
// Crate found iff title is `${crateName} - crates.io: Rust Package Registry`
36+
if (!title.startsWith(crateName)) status = 404;
37+
}
38+
39+
log(
40+
`Headless fetch returned HTTP status code: ${status}; page title: '${title}'`,
41+
);
2942

3043
return status;
3144
} catch (error) {
@@ -66,9 +79,11 @@ export function isHttp2XX(status) {
6679
return status && status >= 200 && status < 300;
6780
}
6881

69-
export async function getUrlStatus(url) {
82+
export async function getUrlStatus(url, _verbose = false) {
83+
verbose = _verbose;
7084
let status = await getUrlHeadless(url);
71-
if (!isHttp2XX(status)) {
85+
// If headless fetch fails, try in browser for non-404 statuses
86+
if (!isHttp2XX(status) && status !== 404) {
7287
status = await getUrlInBrowser(url);
7388
}
7489
return status;
@@ -83,7 +98,7 @@ async function mainCLI() {
8398
process.exit(1);
8499
}
85100

86-
const status = await getUrlStatus(url);
101+
const status = await getUrlStatus(url, verbose);
87102
process.exit(isHttp2XX(status) ? 0 : 1);
88103
}
89104

static/refcache.json

+44
Original file line numberDiff line numberDiff line change
@@ -10007,6 +10007,50 @@
1000710007
"StatusCode": 206,
1000810008
"LastSeen": "2025-02-01T07:12:04.503997-05:00"
1000910009
},
10010+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/archetypes/": {
10011+
"StatusCode": 206,
10012+
"LastSeen": "2025-02-02T12:11:00.765175-05:00"
10013+
},
10014+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/archetypes/blog.md": {
10015+
"StatusCode": 206,
10016+
"LastSeen": "2025-02-02T12:10:58.811797-05:00"
10017+
},
10018+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/content-modules": {
10019+
"StatusCode": 206,
10020+
"LastSeen": "2025-02-02T12:11:01.039226-05:00"
10021+
},
10022+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/content/en/docs/specs/status.md": {
10023+
"StatusCode": 206,
10024+
"LastSeen": "2025-02-02T12:11:42.468045-05:00"
10025+
},
10026+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem": {
10027+
"StatusCode": 206,
10028+
"LastSeen": "2025-02-02T12:10:57.073038-05:00"
10029+
},
10030+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/adopters.yaml": {
10031+
"StatusCode": 206,
10032+
"LastSeen": "2025-02-02T12:10:58.803368-05:00"
10033+
},
10034+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/distributions.yaml": {
10035+
"StatusCode": 206,
10036+
"LastSeen": "2025-02-02T12:11:03.290407-05:00"
10037+
},
10038+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/ecosystem/vendors.yaml": {
10039+
"StatusCode": 206,
10040+
"LastSeen": "2025-02-02T12:11:04.640959-05:00"
10041+
},
10042+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/data/registry": {
10043+
"StatusCode": 206,
10044+
"LastSeen": "2025-02-02T12:10:58.178169-05:00"
10045+
},
10046+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/layouts/shortcodes/docs": {
10047+
"StatusCode": 206,
10048+
"LastSeen": "2025-02-02T12:10:59.64214-05:00"
10049+
},
10050+
"https://github.com/open-telemetry/opentelemetry.io/tree/main/templates/registry-entry.yml": {
10051+
"StatusCode": 206,
10052+
"LastSeen": "2025-02-02T12:11:00.926611-05:00"
10053+
},
1001010054
"https://github.com/open-telemetry/otel-arrow": {
1001110055
"StatusCode": 206,
1001210056
"LastSeen": "2025-01-30T17:00:10.089894-05:00"

0 commit comments

Comments
 (0)