Skip to content

Commit e224717

Browse files
committed
Update double-check-refcache-400s.mjs
1 parent e6e3a21 commit e224717

File tree

1 file changed

+94
-27
lines changed

1 file changed

+94
-27
lines changed

scripts/double-check-refcache-400s.mjs

+94-27
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,33 @@ import { exit } from 'process';
66

77
const CACHE_FILE = 'static/refcache.json';
88
const GOOGLE_DOCS_URL = 'https://docs.google.com/';
9-
let maxFragEntries = 3;
9+
let checkForFragments = false;
10+
let maxNumEntriesToUpdate = 3;
1011
const cratesIoURL = 'https://crates.io/crates/';
1112

13+
// Magic numbers that we use to determine if a URL with a fragment has been
14+
// checked with this script. Since we can't add new fields to the cache, we
15+
// encode "magic" values in the LastSeen field.
16+
const fragSecondsOk = 12;
17+
const fragMillisecondsOk = 345;
18+
const fragSecondsInvalid = 59;
19+
const fragMillisecondsInvalid = 999;
20+
21+
function isHttp2XXForFragments(StatusCode, lastSeenDate) {
22+
return (
23+
isHttp2XX(StatusCode) &&
24+
lastSeenDate.getSeconds() === fragSecondsOk &&
25+
lastSeenDate.getMilliseconds() === fragMillisecondsOk
26+
);
27+
}
28+
29+
function is4XXForFragments(StatusCode, lastSeenDate) {
30+
return (
31+
lastSeenDate.getSeconds() === fragSecondsInvalid &&
32+
lastSeenDate.getMilliseconds() === fragMillisecondsInvalid
33+
);
34+
}
35+
1236
async function readRefcache() {
1337
try {
1438
const data = await fs.readFile(CACHE_FILE, 'utf8');
@@ -21,38 +45,59 @@ async function readRefcache() {
2145

2246
async function writeRefcache(cache) {
2347
await fs.writeFile(CACHE_FILE, JSON.stringify(cache, null, 2) + '\n', 'utf8');
24-
console.log(`Updated ${CACHE_FILE} with fixed links.`);
48+
console.log(`Wrote updated ${CACHE_FILE}.`);
2549
}
2650

2751
// Retry HTTP status check for refcache URLs with non-200s and not 404
2852
async function retry400sAndUpdateCache() {
2953
console.log(`Checking ${CACHE_FILE} for 4XX status URLs ...`);
3054
const cache = await readRefcache();
31-
let updated = false;
55+
let updatedCount = 0;
3256
let entriesCount = 0;
3357
let urlWithFragmentCount = 0;
58+
let urlWithInvalidFragCount = 0;
59+
let statusCounts = {};
3460

3561
for (const [url, details] of Object.entries(cache)) {
3662
entriesCount++;
3763
const parsedUrl = new URL(url);
64+
if (parsedUrl.hash) urlWithFragmentCount++;
3865
const { StatusCode, LastSeen } = details;
66+
const lastSeenDate = new Date(LastSeen);
67+
68+
const fragOk =
69+
checkForFragments &&
70+
parsedUrl.hash &&
71+
isHttp2XXForFragments(StatusCode, lastSeenDate);
72+
const sc = StatusCode + (fragOk ? ' (frag ok)' : '');
73+
statusCounts[sc] = (statusCounts[sc] || 0) + 1;
3974

40-
if (isHttp2XX(StatusCode)) continue;
41-
if (isHttp2XX(StatusCode) && (!parsedUrl.hash || StatusCode >= 210))
75+
if (
76+
checkForFragments && parsedUrl.hash
77+
? isHttp2XXForFragments(StatusCode, lastSeenDate)
78+
: isHttp2XX(StatusCode)
79+
) {
80+
// process.stdout.write('.');
4281
continue;
82+
}
4383

4484
if (
45-
(StatusCode === 404 && !url.startsWith(cratesIoURL)) ||
46-
StatusCode === 422
85+
(StatusCode === 404 &&
86+
// Handles special case of crates.io. For details, see:
87+
// https://github.com/rust-lang/crates.io/issues/788
88+
!url.startsWith(cratesIoURL)) ||
89+
(parsedUrl.hash && is4XXForFragments(StatusCode, lastSeenDate))
4790
) {
48-
const lastSeenDate = new Date(LastSeen).toLocaleString();
4991
console.log(
50-
`Skipping ${StatusCode}: ${url} (last seen ${lastSeenDate}).`,
92+
`Skipping ${StatusCode}: ${url} (last seen ${lastSeenDate.toLocaleString()}).`,
5193
);
94+
if(parsedUrl.hash) urlWithInvalidFragCount++;
5295
continue;
5396
}
97+
5498
if (url.startsWith(GOOGLE_DOCS_URL)) {
5599
// console.log(`Skipping Google Docs URL (for now): ${url}.`);
100+
// process.stdout.write('.');
56101
continue;
57102
/*
58103
URLs are of the form:
@@ -62,40 +107,59 @@ async function retry400sAndUpdateCache() {
62107
*/
63108
}
64109

65-
if (
66-
parsedUrl.hash &&
67-
StatusCode < 210 &&
68-
++urlWithFragmentCount > maxFragEntries
69-
)
110+
if (maxNumEntriesToUpdate && updatedCount >= maxNumEntriesToUpdate) {
111+
console.log(`Updated max of ${maxNumEntriesToUpdate} entries, exiting.`);
70112
break;
113+
}
71114

72115
process.stdout.write(
73116
`Checking${
74117
parsedUrl.hash ? ` for fragment in` : `:`
75118
} ${url} (was ${StatusCode}) ... `,
76119
);
77120

78-
const verbose = false;
79-
let status = await getUrlStatus(url, verbose);
80-
if (parsedUrl.hash && isHttp2XX(status)) status += 10;
81-
121+
let status = await getUrlStatus(url);
82122
console.log(`${status}.`);
83123

84-
if (!isHttp2XX(status)) continue;
124+
let now = new Date();
125+
if (parsedUrl.hash) {
126+
if (isHttp2XX(status)) {
127+
// Encore that the fragment was checked and is valid.
128+
now.setSeconds(fragSecondsOk);
129+
now.setMilliseconds(fragMillisecondsOk);
130+
} else {
131+
status = StatusCode; // Keep the original status, rather than our custom 4XX status.
132+
now.setSeconds(fragSecondsInvalid);
133+
now.setMilliseconds(fragMillisecondsInvalid);
134+
urlWithInvalidFragCount++;
135+
}
136+
} else if (!isHttp2XX(status)) {
137+
continue;
138+
}
85139

86140
cache[url] = {
87141
StatusCode: status,
88-
LastSeen: new Date().toISOString(),
142+
LastSeen: now.toISOString(),
89143
};
90-
91-
updated = true;
144+
updatedCount++;
92145
}
93146

94-
if (updated) {
147+
if (updatedCount) {
95148
await writeRefcache(cache);
96149
} else {
97150
console.log(`No updates needed.`);
98151
}
152+
153+
console.log(
154+
`Processed ${entriesCount} URLs${
155+
checkForFragments
156+
? ` (${urlWithFragmentCount} with fragments, ${urlWithInvalidFragCount} are invalid)`
157+
: ''
158+
}`,
159+
);
160+
for (const [status, count] of Object.entries(statusCounts)) {
161+
console.log(`Status ${status}: ${count}`);
162+
}
99163
}
100164

101165
function getNumericFlagValue(flagName) {
@@ -107,17 +171,20 @@ function getNumericFlagValue(flagName) {
107171
: process.argv[process.argv.indexOf(flagName) + 1];
108172
let value = parseInt(valueArg);
109173

110-
if (!value) {
174+
if (value < 0) {
111175
console.error(
112176
`ERROR: invalid value for ${flagName}: ${valueArg}. ` +
113-
`Must be a number > 0. Using default ${maxFragEntries}.`,
177+
`Must be a number > 0. Using default ${maxNumEntriesToUpdate}.`,
114178
);
115179
exit(1);
116180
}
117181
return value;
118182
}
119183

120-
const _maxFragEntriesFlag = getNumericFlagValue('--max-frag-entries');
121-
if (_maxFragEntriesFlag) maxFragEntries = _maxFragEntriesFlag;
184+
const _maxNumEntriesToUpdateFlag = getNumericFlagValue('--max-num-to-update');
185+
if (_maxNumEntriesToUpdateFlag >= 0)
186+
maxNumEntriesToUpdate = _maxNumEntriesToUpdateFlag;
187+
checkForFragments =
188+
process.argv.includes('--check-for-fragments') || process.argv.includes('-f');
122189

123190
await retry400sAndUpdateCache();

0 commit comments

Comments
 (0)