Skip to content

Commit 0338068

Browse files
authored
fix(stac-validate): cache json schema objects to reduce network failures TDE-1212 (#1029)
#### Motivation _What does this change aim to achieve?_ We are seeing quite a few network related failures when validating JSON schema objects #### Modification Cache JSON schema objects locally when the docker container is built _Why is this change being made? What implications or other considerations are there?_ #### Checklist _If not applicable, provide explanation of why._ - [ ] Tests updated - [ ] Docs updated - [ ] Issue linked in Title
1 parent f40e69a commit 0338068

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
node_modules
2-
build
2+
build
3+
json-schema-cache

Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,7 @@ ADD package.json package-lock.json /app/
1818
RUN npm install --omit=dev
1919
ADD build/src /app/
2020

21+
# Cache of copy of the STAC JSON schemas by triggering a validation run
22+
RUN node /app/index.js stac-validate https://nz-imagery.s3-ap-southeast-2.amazonaws.com/new-zealand/new-zealand_2020-2021_10m/rgb/2193/collection.json
23+
2124
ENTRYPOINT ["node", "/app/index.js"]

src/commands/stac-validate/stac.validate.ts

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { fsa } from '@chunkd/fs';
22
import Ajv, { DefinedError, SchemaObject, ValidateFunction } from 'ajv';
33
import { fastFormats } from 'ajv-formats/dist/formats.js';
44
import { boolean, command, flag, number, option, restPositionals, string } from 'cmd-ts';
5+
import { createHash } from 'crypto';
56
import { dirname, join } from 'path';
67
import { performance } from 'perf_hooks';
78
import * as st from 'stac-ts';
@@ -13,6 +14,27 @@ import { hashStream } from '../../utils/hash.js';
1314
import { Sha256Prefix } from '../../utils/hash.js';
1415
import { config, registerCli, verbose } from '../common.js';
1516

17+
/**
18+
* Store a local copy of JSON schemas into a cache directory
19+
*
20+
* This is to prevent overloading the remote hosts as stac validation can trigger lots of schema requests
21+
*
22+
* @param url JSON schema to load
23+
* @returns object from the cache if it exists or directly from the uri
24+
*/
25+
async function readSchema(url: string): Promise<object> {
26+
const cacheId = createHash('sha256').update(url).digest('hex');
27+
const cachePath = `./json-schema-cache/${cacheId}.json`;
28+
try {
29+
return await fsa.readJson<object>(cachePath);
30+
} catch (e) {
31+
return fsa.readJson<object>(url).then(async (obj) => {
32+
await fsa.write(cachePath, JSON.stringify(obj));
33+
return obj;
34+
});
35+
}
36+
}
37+
1638
export const commandStacValidate = command({
1739
name: 'stac-validate',
1840
description: 'Validate STAC files',
@@ -78,8 +100,9 @@ export const commandStacValidate = command({
78100
strict: args.strict,
79101
loadSchema: (uri: string): Promise<SchemaObject> => {
80102
let existing = Schemas.get(uri);
103+
81104
if (existing == null) {
82-
existing = fsa.readJson(uri);
105+
existing = readSchema(uri);
83106
Schemas.set(uri, existing);
84107
}
85108
return existing;
@@ -99,7 +122,7 @@ export const commandStacValidate = command({
99122
if (schema != null) return schema;
100123
let existing = ajvSchema.get(uri);
101124
if (existing == null) {
102-
existing = fsa.readJson<object>(uri).then((json) => ajv.compileAsync(json));
125+
existing = readSchema(uri).then((json) => ajv.compileAsync(json));
103126
ajvSchema.set(uri, existing);
104127
}
105128
return existing;

0 commit comments

Comments
 (0)