Skip to content

Commit 1024564

Browse files
committed
[skip ci] usage examples and docs
1 parent 5f1ebcb commit 1024564

File tree

6 files changed

+242
-36
lines changed

6 files changed

+242
-36
lines changed

README.md

Lines changed: 2 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,8 @@ This repository is designed to be used with file-based data programmatically; fo
2020

2121
## Usage
2222

23-
```ts
24-
const REGION = 'NWS';
25-
const CONFIG_PATH = join(__dirname, './config.toml');
26-
27-
const INPUT_PATH = join(__dirname, 'files', 'input_data.csv');
28-
const OUTPUT_PATH = join(__dirname, 'output', 'output.csv');
29-
const VALIDATION_ERRORS_PATH = join(__dirname, 'output', 'validation_errors.csv');
30-
31-
// load configuration file
32-
const configLoadResult = loadConfig(CONFIG_PATH, REGION);
33-
if (!configLoadResult.success) throw new Error('unable to load configuration file.');
34-
35-
const config = configLoadResult.config;
36-
37-
// validate the input file against all configured validation rules.
38-
const preprocessResult = await preprocessFile({
39-
config: config,
40-
inputFilePath: INPUT_PATH,
41-
errorFileOutputPath: VALIDATION_ERRORS_PATH,
42-
limit: 2,
43-
});
44-
45-
if (!preprocessResult.isValid)
46-
throw new Error('Validation errors found in input file, review error file output.');
47-
48-
// validate the input file against all configured validation rules.
49-
const processFileResult = await processFile({
50-
config: config,
51-
inputFilePath: INPUT_PATH,
52-
outputPath: OUTPUT_PATH,
53-
hasherFactory: makeHasher,
54-
limit: 2,
55-
});
56-
// print the result, save the result, etc.
57-
console.dir(processFileResult, { depth: 3 });
58-
```
23+
1. If using file-based data, see [this example](./examples/file_based_usage.ts)
24+
2. If using programmatic data (i.e. like an array in memory), see [this example](./examples/programmatic_usage.ts)
5925

6026
## 🧪 Data Processing Pipeline (file-based data)
6127

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* This file is part of Building Blocks CommonID Tool
3+
* Copyright (c) 2024 WFP
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, version 3.
8+
*
9+
* This program is distributed in the hope that it will be useful, but
10+
* WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12+
* General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
18+
import { joinFieldsForHash, cleanValueList, extractAlgoColumnsFromObject, BaseHasher } from 'common-identifier-algorithm-shared';
19+
import type { Config, Validator, makeHasherFunction } from 'common-identifier-algorithm-shared';
20+
21+
class GenericHasher extends BaseHasher {
22+
constructor(config: Config.Options["algorithm"]) {
23+
super(config);
24+
}
25+
26+
private composeHashSource = (extractedObj: Config.AlgorithmColumns) => {
27+
let staticValues = extractedObj.static;
28+
let concatenated = joinFieldsForHash(cleanValueList(staticValues));
29+
return concatenated;
30+
}
31+
32+
generateHashForObject(obj: Validator.InputData["row"]) {
33+
const extractedObj = extractAlgoColumnsFromObject(this.config.columns, obj);
34+
const toBeHashed = this.composeHashSource(extractedObj);
35+
return {
36+
hashed_id: toBeHashed.length > 0 ? this.generateHashForValue(toBeHashed) : "",
37+
}
38+
}
39+
}
40+
41+
export const REGION = "ANY";
42+
export const makeHasher: makeHasherFunction = (config: Config.Options["algorithm"]) => {
43+
switch (config.hash.strategy.toLowerCase()) {
44+
case 'sha256':
45+
return new GenericHasher(config);
46+
default:
47+
throw new Error(`Unknown hash strategy in config: '${config.hash.strategy}'`);
48+
}
49+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
[meta]
2+
region="ANY"
3+
version="1.0.0"
4+
signature="a5da7472f72da37a149ca37668c308ff"
5+
6+
[source]
7+
columns = [
8+
{ name = "ID", alias = "id" },
9+
{ name = "Column 2", alias = "col2" },
10+
{ name = "Column 3", alias = "col3" },
11+
]
12+
13+
[validations]
14+
15+
"*" = [ { op = "max_field_length", value = 200 } ]
16+
17+
id = [
18+
{ op = "field_type", value = "string" },
19+
{ op = "regex_match", value = '(\d{11})', message="must be 11 numeric digits" }
20+
]
21+
22+
[algorithm]
23+
24+
[algorithm.columns]
25+
process = []
26+
static = [
27+
"id",
28+
]
29+
reference = []
30+
31+
[algorithm.hash]
32+
strategy = "SHA256"
33+
34+
[algorithm.salt]
35+
source = "STRING"
36+
value = "{{ some_random_salt_value }}"
37+
38+
[destination]
39+
columns = [
40+
{ name = "Common Identifier", alias = "hashed_id" },
41+
]
42+
postfix = "-OUTPUT-{{yyyy-MM-dd--HH-mm-ss}}"
43+
44+
[destination_map]
45+
columns = [
46+
{ name = "ID", alias = "id" },
47+
{ name = "Common Identifier", alias = "hashed_id" }
48+
]
49+
postfix = "-MAPPING-{{yyyy-MM-dd--HH-mm-ss}}"
50+
51+
[destination_errors]
52+
columns = [
53+
{ name = "Errors", alias = "errors" },
54+
{ name = "ID", alias = "id" },
55+
]
56+
postfix = "-ERRORS-{{yyyy-MM-dd--HH-mm-ss}}"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
ID,Column 2,Column 3
2+
43294300000,bar0,baz0
3+
38591500000,bar1,baz1
4+
17386300000,bar2,baz2
5+
54598700000,bar3,baz3
6+
56552200000,bar4,baz4
7+
59893200000,bar5,baz5
8+
98099300000,bar6,baz6
9+
43920900000,bar7,baz7
10+
83307400000,bar8,baz8
11+
89685800000,bar9,baz9

examples/file_based_usage.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// REPLACE ALL REFERENCES TO "_generic_hasher" WITH THE DESIRED ALGORITHM IN THE ALGORITHMS DIRECTORY.
2+
3+
import { loadConfig, preprocessFile, processFile } from 'common-identifier-algorithm-shared';
4+
import { makeHasher, REGION } from './example_algorithm/_generic_hasher';
5+
import { dirname, join } from 'node:path';
6+
import { fileURLToPath } from 'node:url';
7+
8+
const __dirname = dirname(fileURLToPath(import.meta.url));
9+
10+
const CONFIG_PATH = join(__dirname, 'example_algorithm', 'config.toml');
11+
const INPUT_PATH = join(__dirname, 'example_algorithm', 'input_10.csv');
12+
const OUTPUT_PATH = join(__dirname, 'output', 'output_10.csv');
13+
const VALIDATION_ERRORS_PATH = join(__dirname, 'output', 'validation_errors.csv');
14+
15+
// load configuration from file
16+
const configLoadResult = loadConfig(CONFIG_PATH, REGION);
17+
if (!configLoadResult.success) throw new Error(`ERROR: Unable to load configuration file >> ${configLoadResult.error}`);
18+
19+
// validate the input file against all configured validation rules.
20+
const preprocessResult = await preprocessFile({
21+
config: configLoadResult.config,
22+
inputFilePath: INPUT_PATH,
23+
errorFileOutputPath: VALIDATION_ERRORS_PATH,
24+
});
25+
26+
if (!preprocessResult.isValid)
27+
throw new Error('Validation errors found in input file, review error file output.');
28+
29+
// validate the input file against all configured validation rules.
30+
const processFileResult = await processFile({
31+
config: configLoadResult.config,
32+
inputFilePath: INPUT_PATH,
33+
outputPath: OUTPUT_PATH,
34+
hasherFactory: makeHasher,
35+
});
36+
// print the result, save the result, etc.
37+
console.dir(processFileResult, { depth: 3 });

examples/programmatic_usage.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// REPLACE ALL REFERENCES TO "_generic_hasher" WITH THE DESIRED ALGORITHM IN THE ALGORITHMS DIRECTORY.
2+
3+
import { dirname, join } from 'node:path';
4+
import { fileURLToPath } from 'node:url';
5+
6+
import { generateHashesForDocument, validateDocument, type CidDocument, type Config } from '../src/index';
7+
import { SUPPORTED_VALIDATORS } from '../src/validation/Validation';
8+
import { makeHasher } from './example_algorithm/_generic_hasher';
9+
10+
const __dirname = dirname(fileURLToPath(import.meta.url));
11+
12+
/*
13+
Construct a config object instructing the algorithm HOW to process the data being passed
14+
in. This contains rules related to source schema, target schemas, validation rules, and
15+
algorithm specifications. Normally, this will be read from file using loadConfig()
16+
17+
see ../docs/configuration-files.md for more detail on the relevant config fields.
18+
*/
19+
const config: Config.Options = {
20+
meta: {
21+
region: "UNKONWN", // this must match the shortCode of the algorithm being used
22+
version: "",
23+
signature: ""
24+
},
25+
// the schema information for the source data
26+
source: {
27+
columns: [
28+
{ name: "ID", alias: "id" },
29+
{ name: "Column 2", alias: "col2" },
30+
{ name: "Column 3", alias: "col3" },
31+
]
32+
},
33+
// [OPTIONAL] validation rules per column: see ../docs/validators.md
34+
validations: {
35+
id: [
36+
{ op: SUPPORTED_VALIDATORS.FIELD_TYPE, value: 'string' },
37+
{ op: SUPPORTED_VALIDATORS.MAX_FIELD_LENGTH, value: 11 }
38+
// { op: SUPPORTED_VALIDATORS.LINKED_FIELD, target: "col2" }
39+
]
40+
},
41+
algorithm: {
42+
hash: { strategy: "SHA256" },
43+
salt: { source: "STRING", value: "testSalt" },
44+
columns: {
45+
process: [],
46+
reference: [],
47+
static: [ "id" ]
48+
},
49+
},
50+
// schema for main output file, skipping for brevity
51+
destination: { columns: [] },
52+
// schema for mapping output file, skipping for brevity
53+
destination_map: { columns: [] },
54+
// schema for error files, skipping for brevity
55+
destination_errors: { columns: [] }
56+
}
57+
58+
/*
59+
Construct a `document` containing the data to process.
60+
*/
61+
const doc: CidDocument = {
62+
name: "Input Data",
63+
data: [
64+
{ "id": "43294300000", "col2": "bar0", "col3": "baz0" },
65+
{ "id": "38591500000", "col2": "bar1", "col3": "baz1" },
66+
{ "id": "17386300000", "col2": "bar2", "col3": "baz2" },
67+
]
68+
}
69+
70+
function main() {
71+
// validate the input data against all configured validation rules.
72+
const validationResult = validateDocument(config, doc, false);
73+
if (!validationResult.ok) {
74+
console.dir(validationResult.results, { depth: 5 });
75+
throw new Error("Data contains validation errors, check input");
76+
}
77+
78+
// initialise the selected algorithm
79+
const hasher = makeHasher(config.algorithm);
80+
// run the algorithm against the input data
81+
const result = generateHashesForDocument(hasher, doc);
82+
83+
// print the results, save the results, up to you.
84+
console.dir(result, { depth: 5 });
85+
}
86+
87+
main();

0 commit comments

Comments
 (0)