Skip to content

Commit c9f8fe0

Browse files
Emma DicksonEmma Dickson
andauthored
add collection name validation (#37)
* add collection name validation * linter fix * add tests and optimize * linter fix * move to validateargs * properly reference collection * Update regex and error message Co-authored-by: Emma Dickson <[email protected]>
1 parent 24e2c4d commit c9f8fe0

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

crawler.js

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class Crawler {
103103
}
104104
}
105105
}
106-
106+
107107
bootstrap() {
108108
let opts = {};
109109
if (this.params.logging.includes("pywb")) {
@@ -114,7 +114,7 @@ class Crawler {
114114
}
115115

116116
this.configureUA();
117-
117+
118118
this.headers = {"User-Agent": this.userAgent};
119119

120120
child_process.spawn("redis-server", {...opts, cwd: "/tmp/"});
@@ -304,7 +304,13 @@ class Crawler {
304304
//argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
305305
argv.scope = [new RegExp("^" + this.rxEscape(argv.url.slice(0, argv.url.lastIndexOf("/") + 1)))];
306306
}
307-
307+
308+
309+
// Check that the collection name is valid.
310+
if (argv.collection.search(/^[\w][\w-]*$/) === -1){
311+
throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`);
312+
}
313+
308314
argv.timeout *= 1000;
309315

310316
// waitUntil condition must be: load, domcontentloaded, networkidle0, networkidle2

tests/collection_name.test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
const util = require("util");
2+
const exec = util.promisify(require("child_process").exec);
3+
4+
test("check that the collection name is properly validation", async () => {
5+
jest.setTimeout(30000);
6+
let passed = "";
7+
8+
try{
9+
const data = await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection valid_collection-nameisvalid");
10+
if (data.stdout.includes("Waiting 5s to ensure WARCs are finished")){
11+
passed = true;
12+
}
13+
else{
14+
passed = false;
15+
}
16+
}
17+
catch (error) {
18+
passed = false;
19+
}
20+
expect(passed).toBe(true);
21+
});
22+
23+
24+
test("check that the collection name is not accepted if it doesn't meets our standards", async () => {
25+
jest.setTimeout(30000);
26+
let passed = "";
27+
28+
try{
29+
await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection invalid_c!!ollection-nameisvalid");
30+
passed = true;
31+
}
32+
catch(e){
33+
passed = false;
34+
}
35+
expect(passed).toBe(false);
36+
37+
});

0 commit comments

Comments
 (0)