@@ -6,9 +6,33 @@ import { exit } from 'process';
6
6
7
7
const CACHE_FILE = 'static/refcache.json' ;
8
8
const GOOGLE_DOCS_URL = 'https://docs.google.com/' ;
9
- let maxFragEntries = 3 ;
9
+ let checkForFragments = false ;
10
+ let maxNumEntriesToUpdate = 3 ;
10
11
const cratesIoURL = 'https://crates.io/crates/' ;
11
12
13
+ // Magic numbers that we use to determine if a URL with a fragment has been
14
+ // checked with this script. Since we can't add new fields to the cache, we
15
+ // encode "magic" values in the LastSeen field.
16
+ const fragSecondsOk = 12 ;
17
+ const fragMillisecondsOk = 345 ;
18
+ const fragSecondsInvalid = 59 ;
19
+ const fragMillisecondsInvalid = 999 ;
20
+
21
+ function isHttp2XXForFragments ( StatusCode , lastSeenDate ) {
22
+ return (
23
+ isHttp2XX ( StatusCode ) &&
24
+ lastSeenDate . getSeconds ( ) === fragSecondsOk &&
25
+ lastSeenDate . getMilliseconds ( ) === fragMillisecondsOk
26
+ ) ;
27
+ }
28
+
29
+ function is4XXForFragments ( StatusCode , lastSeenDate ) {
30
+ return (
31
+ lastSeenDate . getSeconds ( ) === fragSecondsInvalid &&
32
+ lastSeenDate . getMilliseconds ( ) === fragMillisecondsInvalid
33
+ ) ;
34
+ }
35
+
12
36
async function readRefcache ( ) {
13
37
try {
14
38
const data = await fs . readFile ( CACHE_FILE , 'utf8' ) ;
@@ -21,38 +45,59 @@ async function readRefcache() {
21
45
22
46
async function writeRefcache ( cache ) {
23
47
await fs . writeFile ( CACHE_FILE , JSON . stringify ( cache , null , 2 ) + '\n' , 'utf8' ) ;
24
- console . log ( `Updated ${ CACHE_FILE } with fixed links .` ) ;
48
+ console . log ( `Wrote updated ${ CACHE_FILE } .` ) ;
25
49
}
26
50
27
51
// Retry HTTP status check for refcache URLs with non-200s and not 404
28
52
async function retry400sAndUpdateCache ( ) {
29
53
console . log ( `Checking ${ CACHE_FILE } for 4XX status URLs ...` ) ;
30
54
const cache = await readRefcache ( ) ;
31
- let updated = false ;
55
+ let updatedCount = 0 ;
32
56
let entriesCount = 0 ;
33
57
let urlWithFragmentCount = 0 ;
58
+ let urlWithInvalidFragCount = 0 ;
59
+ let statusCounts = { } ;
34
60
35
61
for ( const [ url , details ] of Object . entries ( cache ) ) {
36
62
entriesCount ++ ;
37
63
const parsedUrl = new URL ( url ) ;
64
+ if ( parsedUrl . hash ) urlWithFragmentCount ++ ;
38
65
const { StatusCode, LastSeen } = details ;
66
+ const lastSeenDate = new Date ( LastSeen ) ;
67
+
68
+ const fragOk =
69
+ checkForFragments &&
70
+ parsedUrl . hash &&
71
+ isHttp2XXForFragments ( StatusCode , lastSeenDate ) ;
72
+ const sc = StatusCode + ( fragOk ? ' (frag ok)' : '' ) ;
73
+ statusCounts [ sc ] = ( statusCounts [ sc ] || 0 ) + 1 ;
39
74
40
- if ( isHttp2XX ( StatusCode ) ) continue ;
41
- if ( isHttp2XX ( StatusCode ) && ( ! parsedUrl . hash || StatusCode >= 210 ) )
75
+ if (
76
+ checkForFragments && parsedUrl . hash
77
+ ? isHttp2XXForFragments ( StatusCode , lastSeenDate )
78
+ : isHttp2XX ( StatusCode )
79
+ ) {
80
+ // process.stdout.write('.');
42
81
continue ;
82
+ }
43
83
44
84
if (
45
- ( StatusCode === 404 && ! url . startsWith ( cratesIoURL ) ) ||
46
- StatusCode === 422
85
+ ( StatusCode === 404 &&
86
+ // Handles special case of crates.io. For details, see:
87
+ // https://github.com/rust-lang/crates.io/issues/788
88
+ ! url . startsWith ( cratesIoURL ) ) ||
89
+ ( parsedUrl . hash && is4XXForFragments ( StatusCode , lastSeenDate ) )
47
90
) {
48
- const lastSeenDate = new Date ( LastSeen ) . toLocaleString ( ) ;
49
91
console . log (
50
- `Skipping ${ StatusCode } : ${ url } (last seen ${ lastSeenDate } ).` ,
92
+ `Skipping ${ StatusCode } : ${ url } (last seen ${ lastSeenDate . toLocaleString ( ) } ).` ,
51
93
) ;
94
+ if ( parsedUrl . hash ) urlWithInvalidFragCount ++ ;
52
95
continue ;
53
96
}
97
+
54
98
if ( url . startsWith ( GOOGLE_DOCS_URL ) ) {
55
99
// console.log(`Skipping Google Docs URL (for now): ${url}.`);
100
+ // process.stdout.write('.');
56
101
continue ;
57
102
/*
58
103
URLs are of the form:
@@ -62,40 +107,59 @@ async function retry400sAndUpdateCache() {
62
107
*/
63
108
}
64
109
65
- if (
66
- parsedUrl . hash &&
67
- StatusCode < 210 &&
68
- ++ urlWithFragmentCount > maxFragEntries
69
- )
110
+ if ( maxNumEntriesToUpdate && updatedCount >= maxNumEntriesToUpdate ) {
111
+ console . log ( `Updated max of ${ maxNumEntriesToUpdate } entries, exiting.` ) ;
70
112
break ;
113
+ }
71
114
72
115
process . stdout . write (
73
116
`Checking${
74
117
parsedUrl . hash ? ` for fragment in` : `:`
75
118
} ${ url } (was ${ StatusCode } ) ... `,
76
119
) ;
77
120
78
- const verbose = false ;
79
- let status = await getUrlStatus ( url , verbose ) ;
80
- if ( parsedUrl . hash && isHttp2XX ( status ) ) status += 10 ;
81
-
121
+ let status = await getUrlStatus ( url ) ;
82
122
console . log ( `${ status } .` ) ;
83
123
84
- if ( ! isHttp2XX ( status ) ) continue ;
124
+ let now = new Date ( ) ;
125
+ if ( parsedUrl . hash ) {
126
+ if ( isHttp2XX ( status ) ) {
127
+ // Encore that the fragment was checked and is valid.
128
+ now . setSeconds ( fragSecondsOk ) ;
129
+ now . setMilliseconds ( fragMillisecondsOk ) ;
130
+ } else {
131
+ status = StatusCode ; // Keep the original status, rather than our custom 4XX status.
132
+ now . setSeconds ( fragSecondsInvalid ) ;
133
+ now . setMilliseconds ( fragMillisecondsInvalid ) ;
134
+ urlWithInvalidFragCount ++ ;
135
+ }
136
+ } else if ( ! isHttp2XX ( status ) ) {
137
+ continue ;
138
+ }
85
139
86
140
cache [ url ] = {
87
141
StatusCode : status ,
88
- LastSeen : new Date ( ) . toISOString ( ) ,
142
+ LastSeen : now . toISOString ( ) ,
89
143
} ;
90
-
91
- updated = true ;
144
+ updatedCount ++ ;
92
145
}
93
146
94
- if ( updated ) {
147
+ if ( updatedCount ) {
95
148
await writeRefcache ( cache ) ;
96
149
} else {
97
150
console . log ( `No updates needed.` ) ;
98
151
}
152
+
153
+ console . log (
154
+ `Processed ${ entriesCount } URLs${
155
+ checkForFragments
156
+ ? ` (${ urlWithFragmentCount } with fragments, ${ urlWithInvalidFragCount } are invalid)`
157
+ : ''
158
+ } `,
159
+ ) ;
160
+ for ( const [ status , count ] of Object . entries ( statusCounts ) ) {
161
+ console . log ( `Status ${ status } : ${ count } ` ) ;
162
+ }
99
163
}
100
164
101
165
function getNumericFlagValue ( flagName ) {
@@ -107,17 +171,20 @@ function getNumericFlagValue(flagName) {
107
171
: process . argv [ process . argv . indexOf ( flagName ) + 1 ] ;
108
172
let value = parseInt ( valueArg ) ;
109
173
110
- if ( ! value ) {
174
+ if ( value < 0 ) {
111
175
console . error (
112
176
`ERROR: invalid value for ${ flagName } : ${ valueArg } . ` +
113
- `Must be a number > 0. Using default ${ maxFragEntries } .` ,
177
+ `Must be a number > 0. Using default ${ maxNumEntriesToUpdate } .` ,
114
178
) ;
115
179
exit ( 1 ) ;
116
180
}
117
181
return value ;
118
182
}
119
183
120
- const _maxFragEntriesFlag = getNumericFlagValue ( '--max-frag-entries' ) ;
121
- if ( _maxFragEntriesFlag ) maxFragEntries = _maxFragEntriesFlag ;
184
+ const _maxNumEntriesToUpdateFlag = getNumericFlagValue ( '--max-num-to-update' ) ;
185
+ if ( _maxNumEntriesToUpdateFlag >= 0 )
186
+ maxNumEntriesToUpdate = _maxNumEntriesToUpdateFlag ;
187
+ checkForFragments =
188
+ process . argv . includes ( '--check-for-fragments' ) || process . argv . includes ( '-f' ) ;
122
189
123
190
await retry400sAndUpdateCache ( ) ;
0 commit comments