Skip to content

Commit 423cce0

Browse files
chalintiffany76
andauthored
[CI] Check links to crates.io, and link fix (#6184)
Co-authored-by: Tiffany Hrabusa <[email protected]>
1 parent cbf96ed commit 423cce0

5 files changed

+144
-18
lines changed

.htmltest.yml

-2
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ IgnoreURLs: # list of regexs of paths or URLs to be ignored
7070
# e.g.: https://www.microsoft.com/en-ca/sql-server.
7171
- ^https://www.microsoft.com/sql-server$
7272

73-
# TODO: drop after fix to https://github.com/rust-lang/crates.io/issues/788
74-
- ^https://crates\.io/crates
7573
# TODO move into content/en/blog/2023/humans-of-otel.md once https://github.com/open-telemetry/opentelemetry.io/issues/3889 is implemented
7674
- ^https://shorturl.at/osHRX$
7775
# TODO move into content/en/blog/2023/contributing-to-otel/index.md once https://github.com/open-telemetry/opentelemetry.io/issues/3889 is implemented

data/registry/instrumentation-rust-trillium.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ urls:
1515
createdAt: 2021-04-25
1616
package:
1717
registry: crates
18-
name: opentelemetry-trillium-opentelemetry
18+
name: trillium-opentelemetry
1919
version: 0.2.16
2020
isFirstParty: true

scripts/double-check-refcache-400s.mjs

+5-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import fs from 'fs/promises';
44
import { getUrlStatus, isHttp2XX } from './get-url-status.mjs';
55

66
const CACHE_FILE = 'static/refcache.json';
7+
const cratesIoURL = 'https://crates.io/crates/';
78

89
async function readRefcache() {
910
try {
@@ -28,13 +29,14 @@ async function retry400sAndUpdateCache() {
2829
for (const [url, details] of Object.entries(cache)) {
2930
const { StatusCode, LastSeen } = details;
3031
if (isHttp2XX(StatusCode)) continue;
31-
if (StatusCode === 404) {
32+
if (StatusCode === 404 && !url.startsWith(cratesIoURL)) {
3233
console.log(`Skipping 404: ${url} (last seen ${LastSeen}).`);
3334
continue;
3435
}
3536

36-
process.stdout.write(`Checking: ${url} (was ${StatusCode})... `);
37-
const status = await getUrlStatus(url, true);
37+
process.stdout.write(`Checking: ${url} (was ${StatusCode}) ... `);
38+
const verbose = false;
39+
const status = await getUrlStatus(url, verbose);
3840
console.log(`${status}.`);
3941

4042
if (!isHttp2XX(status)) continue;

scripts/get-url-status.mjs

+38-12
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,50 @@
11
#!/usr/bin/env node
22

3-
import puppeteer from 'puppeteer';
3+
import puppeteer from 'puppeteer'; // Consider using puppeteer-core
44

55
const cratesIoURL = 'https://crates.io/crates/';
66
let verbose = false;
77

88
function log(...args) {
9-
if (verbose) console.log(...args);
9+
if (!verbose) return;
10+
const lastArg = args[args.length - 1];
11+
if (typeof lastArg === 'string' && lastArg.endsWith(' ')) {
12+
process.stdout.write(args.join(' '));
13+
} else {
14+
console.log(...args);
15+
}
1016
}
1117

1218
async function getUrlHeadless(url) {
13-
let browser;
19+
// Get the URL, headless, while trying our best to avoid triggering
20+
// bot-rejection from some servers. Returns the HTTP status code.
1421

15-
log(`Trying headless fetch of ${url}`);
22+
log(`Headless fetch of ${url} ... `);
1623

24+
let browser;
1725
try {
18-
browser = await puppeteer.launch();
26+
// cSpell:ignore KHTML
27+
const userAgent =
28+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' +
29+
'(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
30+
31+
browser = await puppeteer.launch({
32+
headless: true,
33+
args: [
34+
'--no-sandbox',
35+
'--disable-setuid-sandbox',
36+
`--user-agent=${userAgent}`,
37+
],
38+
});
1939
const page = await browser.newPage();
40+
await page.setUserAgent(userAgent);
41+
await page.setExtraHTTPHeaders({
42+
'Accept-Language': 'en-US,en;q=0.9',
43+
});
2044

2145
const response = await page.goto(url, {
2246
waitUntil: 'networkidle2',
23-
timeout: 9000,
47+
timeout: 10_000,
2448
});
2549

2650
if (!response) throw new Error('No response from server.');
@@ -32,13 +56,13 @@ async function getUrlHeadless(url) {
3256
// https://github.com/rust-lang/crates.io/issues/788
3357
if (url.startsWith(cratesIoURL)) {
3458
const crateName = url.split('/').pop();
35-
// Crate found iff title is `${crateName} - crates.io: Rust Package Registry`
36-
if (!title.startsWith(crateName)) status = 404;
59+
// E.g. 'https://crates.io/crates/opentelemetry-sdk' -> 'opentelemetry-sdk'
60+
const crateNameRegex = new RegExp(crateName.replace(/-/g, '[-_]'));
61+
// Crate found if title starts with createName (in kebab or snake case)
62+
if (!crateNameRegex.test(title)) status = 404;
3763
}
3864

39-
log(
40-
`Headless fetch returned HTTP status code: ${status}; page title: '${title}'`,
41-
);
65+
log(`${status}; page title: '${title}'`);
4266

4367
return status;
4468
} catch (error) {
@@ -91,14 +115,16 @@ export async function getUrlStatus(url, _verbose = false) {
91115

92116
async function mainCLI() {
93117
const url = process.argv[2];
94-
verbose = true; // process.argv.includes('--verbose');
118+
verbose = !process.argv.includes('--quiet');
95119

96120
if (!url) {
97121
console.error(`Usage: ${process.argv[1]} URL`);
98122
process.exit(1);
99123
}
100124

101125
const status = await getUrlStatus(url, verbose);
126+
if (!verbose) console.log(status);
127+
102128
process.exit(isHttp2XX(status) ? 0 : 1);
103129
}
104130

static/refcache.json

+100
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,106 @@
535535
"StatusCode": 206,
536536
"LastSeen": "2025-01-13T12:41:25.904834-05:00"
537537
},
538+
"https://crates.io/crates/actix-web-opentelemetry": {
539+
"StatusCode": 200,
540+
"LastSeen": "2025-02-02T17:57:45.084Z"
541+
},
542+
"https://crates.io/crates/axum-tracing-opentelemetry": {
543+
"StatusCode": 200,
544+
"LastSeen": "2025-02-02T17:57:47.339Z"
545+
},
546+
"https://crates.io/crates/opentelemetry": {
547+
"StatusCode": 200,
548+
"LastSeen": "2025-02-02T17:57:49.61Z"
549+
},
550+
"https://crates.io/crates/opentelemetry-api": {
551+
"StatusCode": 200,
552+
"LastSeen": "2025-02-02T18:05:36.945Z"
553+
},
554+
"https://crates.io/crates/opentelemetry-application-insights": {
555+
"StatusCode": 200,
556+
"LastSeen": "2025-02-02T17:57:54.019Z"
557+
},
558+
"https://crates.io/crates/opentelemetry-aws": {
559+
"StatusCode": 200,
560+
"LastSeen": "2025-02-02T17:57:56.112Z"
561+
},
562+
"https://crates.io/crates/opentelemetry-contrib": {
563+
"StatusCode": 200,
564+
"LastSeen": "2025-02-02T17:57:58.245Z"
565+
},
566+
"https://crates.io/crates/opentelemetry-datadog": {
567+
"StatusCode": 200,
568+
"LastSeen": "2025-02-02T17:58:00.377Z"
569+
},
570+
"https://crates.io/crates/opentelemetry-dynatrace": {
571+
"StatusCode": 200,
572+
"LastSeen": "2025-02-02T17:58:02.494Z"
573+
},
574+
"https://crates.io/crates/opentelemetry-http": {
575+
"StatusCode": 200,
576+
"LastSeen": "2025-02-02T17:58:04.819Z"
577+
},
578+
"https://crates.io/crates/opentelemetry-jaeger": {
579+
"StatusCode": 200,
580+
"LastSeen": "2025-02-02T17:58:07.016Z"
581+
},
582+
"https://crates.io/crates/opentelemetry-jaeger-propagator": {
583+
"StatusCode": 200,
584+
"LastSeen": "2025-02-02T17:58:09.226Z"
585+
},
586+
"https://crates.io/crates/opentelemetry-otlp": {
587+
"StatusCode": 200,
588+
"LastSeen": "2025-02-02T17:58:11.386Z"
589+
},
590+
"https://crates.io/crates/opentelemetry-prometheus": {
591+
"StatusCode": 200,
592+
"LastSeen": "2025-02-02T17:58:13.549Z"
593+
},
594+
"https://crates.io/crates/opentelemetry-sdk": {
595+
"StatusCode": 200,
596+
"LastSeen": "2025-02-02T18:05:39.108Z"
597+
},
598+
"https://crates.io/crates/opentelemetry-semantic-conventions": {
599+
"StatusCode": 200,
600+
"LastSeen": "2025-02-02T17:58:17.816Z"
601+
},
602+
"https://crates.io/crates/opentelemetry-stackdriver": {
603+
"StatusCode": 200,
604+
"LastSeen": "2025-02-02T17:58:19.861Z"
605+
},
606+
"https://crates.io/crates/opentelemetry-stdout": {
607+
"StatusCode": 200,
608+
"LastSeen": "2025-02-02T17:58:22.05Z"
609+
},
610+
"https://crates.io/crates/opentelemetry-tide": {
611+
"StatusCode": 200,
612+
"LastSeen": "2025-02-02T17:58:24.207Z"
613+
},
614+
"https://crates.io/crates/opentelemetry-user-events-logs": {
615+
"StatusCode": 200,
616+
"LastSeen": "2025-02-02T17:58:28.107Z"
617+
},
618+
"https://crates.io/crates/opentelemetry-user-events-metrics": {
619+
"StatusCode": 200,
620+
"LastSeen": "2025-02-02T17:58:30.247Z"
621+
},
622+
"https://crates.io/crates/opentelemetry-zipkin": {
623+
"StatusCode": 200,
624+
"LastSeen": "2025-02-02T17:58:32.438Z"
625+
},
626+
"https://crates.io/crates/opentelemetry_sdk": {
627+
"StatusCode": 200,
628+
"LastSeen": "2025-02-02T17:58:34.619Z"
629+
},
630+
"https://crates.io/crates/tracing-opentelemetry": {
631+
"StatusCode": 200,
632+
"LastSeen": "2025-02-02T17:58:36.89Z"
633+
},
634+
"https://crates.io/crates/trillium-opentelemetry": {
635+
"StatusCode": 200,
636+
"LastSeen": "2025-02-02T18:16:27.365Z"
637+
},
538638
"https://creativecommons.org/licenses/by/4.0": {
539639
"StatusCode": 206,
540640
"LastSeen": "2025-02-01T06:38:25.294134-05:00"

0 commit comments

Comments
 (0)