|
14 | 14 | // You should have received a copy of the GNU Affero General Public License |
15 | 15 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
16 | 16 |
|
17 | | -import path from 'node:path'; |
18 | | -import os from 'node:os'; |
19 | | - |
20 | | -import { SUPPORTED_FILE_TYPES } from '../document.js'; |
21 | | -import type { CidDocument } from '../document.js'; |
22 | | - |
23 | | -import { encoderForFile } from '../encoding/index.js'; |
24 | | -import { decoderForFile, fileTypeOf } from '../decoding/index.js'; |
25 | | - |
26 | | -import { |
27 | | - makeValidationResultDocument, |
28 | | - makeValidatorListDict, |
29 | | - validateDocumentWithListDict, |
30 | | -} from '../validation/index.js'; |
31 | | - |
32 | | -import { keepOutputColumns, isMappingOnlyDocument, keepValidatorsForColumns } from './mapping.js'; |
33 | | -import type { Config } from '../config/Config.js'; |
34 | | -import { BaseHasher } from '../hashing/base.js'; |
35 | | -import type { makeHasherFunction } from '../hashing/base.js'; |
36 | | -import type { Validation } from '../validation/Validation.js'; |
37 | | - |
38 | | -import Debug from 'debug'; |
39 | | -const log = Debug('CID:Processing'); |
40 | | - |
41 | | -export async function readFile( |
42 | | - fileType: SUPPORTED_FILE_TYPES, |
43 | | - columnConfig: Config.ColumnMap, |
44 | | - filePath: string, |
45 | | - limit?: number | undefined, |
46 | | -): Promise<CidDocument> { |
47 | | - let decoderFactoryFn = decoderForFile(fileType); |
48 | | - let decoder = decoderFactoryFn(columnConfig, limit); |
49 | | - |
50 | | - // decode the data |
51 | | - let decoded = await decoder.decodeFile(filePath); |
52 | | - return decoded; |
53 | | -} |
54 | | - |
55 | | -export function validateDocument( |
56 | | - config: Config.Options, |
57 | | - decoded: CidDocument, |
58 | | - isMapping: boolean = false, |
59 | | -): Validation.DocumentResult { |
60 | | - let validatorDict = makeValidatorListDict(config.validations); |
61 | | - |
62 | | - // if this is a mapping document leave only the validators for the algorithm columns |
63 | | - if (isMapping) validatorDict = keepValidatorsForColumns(config, validatorDict); |
64 | | - |
65 | | - // do the actual validation |
66 | | - return validateDocumentWithListDict(validatorDict, decoded); |
67 | | -} |
68 | | - |
69 | | -export const generateHashesForDocument = (hasher: BaseHasher, document: CidDocument): CidDocument => { |
70 | | - // generate for all rows |
71 | | - let rows = document.data.map((row) => { |
72 | | - const generatedHashes = hasher.generateHashForObject(row); |
73 | | - return Object.assign({}, row, generatedHashes); |
74 | | - }); |
75 | | - return { |
76 | | - name: 'hashedDocument', |
77 | | - data: rows, |
78 | | - }; |
79 | | -}; |
80 | | - |
81 | | -// helper to output a document with a specific config |
82 | | -export function writeFileWithConfig( |
83 | | - fileType: SUPPORTED_FILE_TYPES, |
84 | | - columnConfig: Config.ColumnMap, |
85 | | - document: CidDocument, |
86 | | - filePath: string, |
87 | | -) { |
88 | | - let encoderFactoryFn = encoderForFile(fileType); |
89 | | - let encoder = encoderFactoryFn(columnConfig); |
90 | | - |
91 | | - return encoder.encodeDocument(document, filePath); |
92 | | -} |
93 | | - |
94 | | -// PRE-PROCESSING |
95 | | -// -------------- |
96 | | - |
97 | | -export interface PreprocessFileResult { |
98 | | - isValid: boolean; |
99 | | - isMappingDocument: boolean; |
100 | | - document: CidDocument; // either legitimate or error |
101 | | - inputFilePath: string; |
102 | | - errorFilePath?: string; |
103 | | -} |
104 | | - |
105 | | -interface PreProcessFileInput { |
106 | | - config: Config.Options; |
107 | | - inputFilePath: string; |
108 | | - errorFileOutputPath?: string; |
109 | | - limit?: number; |
110 | | -} |
111 | | - |
112 | | -export async function preprocessFile({ |
113 | | - config, |
114 | | - inputFilePath, |
115 | | - errorFileOutputPath = undefined, |
116 | | - limit = undefined, |
117 | | -}: PreProcessFileInput): Promise<PreprocessFileResult> { |
118 | | - log('------------ preprocessFile -----------------'); |
119 | | - |
120 | | - let inputFileType = fileTypeOf(inputFilePath); |
121 | | - |
122 | | - // DECODE |
123 | | - // ====== |
124 | | - const decoded = await readFile(inputFileType, config.source, inputFilePath, limit); |
125 | | - |
126 | | - // VALIDATION |
127 | | - // ========== |
128 | | - const isMappingDocument = isMappingOnlyDocument( |
129 | | - config.algorithm.columns, |
130 | | - config.source, |
131 | | - config.destination_map, |
132 | | - decoded, |
133 | | - ); |
134 | | - const validationResult = validateDocument(config, decoded, isMappingDocument); |
135 | | - |
136 | | - let validationErrorsOutputFile: string | undefined; |
137 | | - let validationResultDocument: CidDocument | undefined; |
138 | | - |
139 | | - // if any sheets contain errors, create an error file |
140 | | - if (!validationResult.ok) { |
141 | | - // by default the validation results show the "source" section columns |
142 | | - let validationResultBaseConfig = config.source; |
143 | | - |
144 | | - // but if this is a mapping document we only show the mapping columns in the validation output document |
145 | | - if (isMappingDocument) validationResultBaseConfig = keepOutputColumns(config, validationResultBaseConfig); |
146 | | - |
147 | | - validationResultDocument = makeValidationResultDocument(validationResultBaseConfig, validationResult); |
148 | | - |
149 | | - // The error file is output to the OS's temporary directory |
150 | | - if (!errorFileOutputPath) errorFileOutputPath = path.join(os.tmpdir(), path.basename(inputFilePath)); |
151 | | - |
152 | | - validationErrorsOutputFile = writeFileWithConfig( |
153 | | - inputFileType, |
154 | | - config.destination_errors, |
155 | | - validationResultDocument, |
156 | | - errorFileOutputPath, |
157 | | - ); |
158 | | - } |
159 | | - |
160 | | - return { |
161 | | - isValid: validationResult.ok, |
162 | | - isMappingDocument, |
163 | | - document: validationResultDocument ? validationResultDocument : decoded, |
164 | | - inputFilePath: inputFilePath, |
165 | | - errorFilePath: validationErrorsOutputFile, |
166 | | - }; |
167 | | -} |
168 | | - |
169 | | -// PROCESSING |
170 | | -// ---------- |
171 | | - |
172 | | -export interface ProcessFileResult { |
173 | | - isMappingDocument: boolean; |
174 | | - document: CidDocument; |
175 | | - outputFilePath?: string; |
176 | | - mappingFilePath: string; |
177 | | -} |
178 | | - |
179 | | -interface ProcessFileInput { |
180 | | - config: Config.Options; |
181 | | - outputPath: string; |
182 | | - inputFilePath: string; |
183 | | - hasherFactory: makeHasherFunction; |
184 | | - format?: SUPPORTED_FILE_TYPES; |
185 | | - limit?: number; |
186 | | -} |
187 | | - |
188 | | -export async function processFile({ |
189 | | - config, |
190 | | - outputPath, |
191 | | - inputFilePath, |
192 | | - hasherFactory, |
193 | | - format = undefined, |
194 | | - limit = undefined, |
195 | | -}: ProcessFileInput): Promise<ProcessFileResult> { |
196 | | - log('------------ processFile -----------------'); |
197 | | - |
198 | | - const inputFileType = fileTypeOf(inputFilePath); |
199 | | - |
200 | | - // DECODE |
201 | | - // ====== |
202 | | - const decoded = await readFile(inputFileType, config.source, inputFilePath, limit); |
203 | | - |
204 | | - // HASHING |
205 | | - // ======= |
206 | | - const hasher = hasherFactory(config.algorithm); |
207 | | - const result = generateHashesForDocument(hasher, decoded); |
208 | | - |
209 | | - // OUTPUT |
210 | | - // ------ |
211 | | - |
212 | | - // if the user specified a format use that, otherwise use the input format |
213 | | - const outputFileType = format || inputFileType; |
214 | | - |
215 | | - const isMappingDocument = isMappingOnlyDocument( |
216 | | - config.algorithm.columns, |
217 | | - config.source, |
218 | | - config.destination_map, |
219 | | - decoded, |
220 | | - ); |
221 | | - // output the base document |
222 | | - const mainOutputFile = isMappingDocument |
223 | | - ? undefined |
224 | | - : writeFileWithConfig(outputFileType, config.destination, result, outputPath); |
225 | | - // output the mapping document |
226 | | - const mappingFilePath = writeFileWithConfig(outputFileType, config.destination_map, result, outputPath); |
227 | | - |
228 | | - return { |
229 | | - isMappingDocument, |
230 | | - document: result, |
231 | | - outputFilePath: mainOutputFile, |
232 | | - mappingFilePath: mappingFilePath, |
233 | | - }; |
234 | | -} |
| 17 | +export { |
| 18 | + generateHashesForDocument, |
| 19 | + preprocessFile, |
| 20 | + processFile, |
| 21 | + validateDocument, |
| 22 | +} from './processing.js'; |
| 23 | +export { keepOutputColumns } from './mapping.js'; |
0 commit comments