Skip to content

Commit ab0f402

Browse files
committed
fix: handle large JSON files with compiler outputs as streams
1 parent a352f4b commit ab0f402

File tree

7 files changed

+382
-84
lines changed

7 files changed

+382
-84
lines changed

pnpm-lock.yaml

+25-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

v-next/hardhat-utils/package.json

+2
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,12 @@
8080
"typescript-eslint": "7.7.1"
8181
},
8282
"dependencies": {
83+
"@streamparser/json-node": "^0.0.22",
8384
"debug": "^4.3.2",
8485
"env-paths": "^2.2.0",
8586
"ethereum-cryptography": "^2.2.1",
8687
"fast-equals": "^5.0.1",
88+
"json-stream-stringify": "^3.1.6",
8789
"rfdc": "^1.3.1",
8890
"undici": "^6.16.1"
8991
}

v-next/hardhat-utils/src/fs.ts

+126-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1+
import type { JsonTypes, ParsedElementInfo } from "@streamparser/json-node";
2+
import type { FileHandle } from "node:fs/promises";
3+
14
import fsPromises from "node:fs/promises";
25
import path from "node:path";
6+
import { pipeline } from "node:stream/promises";
7+
8+
import { JSONParser } from "@streamparser/json-node";
9+
import { JsonStreamStringify } from "json-stream-stringify";
310

411
import { ensureError } from "./error.js";
512
import {
@@ -187,6 +194,73 @@ export async function readJsonFile<T>(absolutePathToFile: string): Promise<T> {
187194
}
188195
}
189196

197+
/**
198+
* Reads a JSON file as a stream and parses it. The encoding used is "utf8".
199+
* This function should be used when parsing very large JSON files.
200+
*
201+
* @param absolutePathToFile The path to the file.
202+
* @returns The parsed JSON object.
203+
* @throws FileNotFoundError if the file doesn't exist.
204+
* @throws InvalidFileFormatError if the file is not a valid JSON file.
205+
* @throws IsDirectoryError if the path is a directory instead of a file.
206+
* @throws FileSystemAccessError for any other error.
207+
*/
208+
export async function readJsonFileAsStream<T>(
209+
absolutePathToFile: string,
210+
): Promise<T> {
211+
let fileHandle: FileHandle | undefined;
212+
213+
try {
214+
fileHandle = await fsPromises.open(absolutePathToFile, "r");
215+
216+
const fileReadStream = fileHandle.createReadStream();
217+
218+
// NOTE: We set a separator to disable self-closing to be able to use the parser
219+
// in the stream.pipeline context; see https://github.com/juanjoDiaz/streamparser-json/issues/47
220+
const jsonParser = new JSONParser({
221+
separator: "",
222+
});
223+
224+
const result: T | undefined = await pipeline(
225+
fileReadStream,
226+
jsonParser,
227+
async (
228+
elements: AsyncIterable<ParsedElementInfo.ParsedElementInfo>,
229+
): Promise<any | undefined> => {
230+
let value: JsonTypes.JsonPrimitive | JsonTypes.JsonStruct | undefined;
231+
for await (const element of elements) {
232+
value = element.value;
233+
}
234+
return value;
235+
},
236+
);
237+
238+
if (result === undefined) {
239+
throw new Error("No data");
240+
}
241+
242+
return result;
243+
} catch (e) {
244+
ensureError<NodeJS.ErrnoException>(e);
245+
246+
if (e.code === "ENOENT") {
247+
throw new FileNotFoundError(absolutePathToFile, e);
248+
}
249+
250+
if (e.code === "EISDIR") {
251+
throw new IsDirectoryError(absolutePathToFile, e);
252+
}
253+
254+
if (e.code !== undefined) {
255+
throw new FileSystemAccessError(absolutePathToFile, e);
256+
}
257+
258+
throw new InvalidFileFormatError(absolutePathToFile, e);
259+
} finally {
260+
await fileHandle?.close();
261+
}
262+
}
263+
190264
/**
191265
* Writes an object to a JSON file. The encoding used is "utf8" and the file is overwritten.
192266
* If part of the path doesn't exist, it will be created.
@@ -211,6 +285,55 @@ export async function writeJsonFile<T>(
211285
await writeUtf8File(absolutePathToFile, content);
212286
}
213287

288+
/**
289+
* Writes an object to a JSON file as stream. The encoding used is "utf8" and the file is overwritten.
290+
* If part of the path doesn't exist, it will be created.
291+
* This function should be used when stringifying very large JSON objects.
292+
*
293+
* @param absolutePathToFile The path to the file. If the file exists, it will be overwritten.
294+
* @param object The object to write.
295+
* @throws JsonSerializationError if the object can't be serialized to JSON.
296+
* @throws FileSystemAccessError for any other error.
297+
*/
298+
export async function writeJsonFileAsStream<T>(
299+
absolutePathToFile: string,
300+
object: T,
301+
): Promise<void> {
302+
const dirPath = path.dirname(absolutePathToFile);
303+
const dirExists = await exists(dirPath);
304+
if (!dirExists) {
305+
await mkdir(dirPath);
306+
}
307+
308+
let fileHandle: FileHandle | undefined;
309+
310+
try {
311+
fileHandle = await fsPromises.open(absolutePathToFile, "w");
312+
313+
const jsonStream = new JsonStreamStringify(object);
314+
const fileWriteStream = fileHandle.createWriteStream();
315+
316+
await pipeline(jsonStream, fileWriteStream);
317+
} catch (e) {
318+
ensureError<NodeJS.ErrnoException>(e);
319+
// if the directory was created, we should remove it
320+
if (dirExists === false) {
321+
try {
322+
await remove(dirPath);
323+
// we don't want to override the original error
324+
} catch (err) {}
325+
}
326+
327+
if (e.code !== undefined) {
328+
throw new FileSystemAccessError(e.message, e);
329+
}
330+
331+
throw new JsonSerializationError(absolutePathToFile, e);
332+
} finally {
333+
await fileHandle?.close();
334+
}
335+
}
336+
214337
/**
215338
* Reads a file and returns its content as a string. The encoding used is "utf8".
216339
*
@@ -537,7 +660,9 @@ export async function move(source: string, destination: string): Promise<void> {
537660
// On linux, trying to move a non-empty directory will throw ENOTEMPTY,
538661
// while on Windows it will throw EPERM.
539662
if (e.code === "ENOTEMPTY" || e.code === "EPERM") {
540-
throw new DirectoryNotEmptyError(destination, e);
663+
if (await isDirectory(source)) {
664+
throw new DirectoryNotEmptyError(destination, e);
665+
}
541666
}
542667

543668
throw new FileSystemAccessError(e.message, e);

v-next/hardhat-utils/test/fs.ts

+133
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import {
2828
readBinaryFile,
2929
getAccessTime,
3030
getFileSize,
31+
readJsonFileAsStream,
32+
writeJsonFileAsStream,
3133
} from "../src/fs.js";
3234

3335
import { useTmpDir } from "./helpers/fs.js";
@@ -464,6 +466,137 @@ describe("File system utils", () => {
464466
});
465467
});
466468

469+
describe("readJsonFileAsStream", () => {
470+
it("Should read and parse a JSON file", async () => {
471+
const expectedObject = { a: 1, b: 2 };
472+
const filePath = path.join(getTmpDir(), "file.json");
473+
await writeUtf8File(filePath, JSON.stringify(expectedObject));
474+
475+
assert.deepEqual(await readJsonFileAsStream(filePath), expectedObject);
476+
expectTypeOf(await readJsonFileAsStream(filePath)).toBeUnknown();
477+
expectTypeOf(
478+
await readJsonFileAsStream<{ a: number; b: number }>(filePath),
479+
).toMatchTypeOf<{ a: number; b: number }>();
480+
});
481+
482+
it("Should throw InvalidFileFormatError if the file is not valid JSON", async () => {
483+
const filePath = path.join(getTmpDir(), "file.json");
484+
await writeUtf8File(filePath, "not-json");
485+
486+
await assert.rejects(readJsonFileAsStream(filePath), {
487+
name: "InvalidFileFormatError",
488+
message: `Invalid file format: ${filePath}`,
489+
});
490+
});
491+
492+
it("Should throw InvalidFileFormatError if the file is empty", async () => {
493+
const filePath = path.join(getTmpDir(), "file.json");
494+
await writeUtf8File(filePath, "");
495+
496+
await assert.rejects(readJsonFileAsStream(filePath), {
497+
name: "InvalidFileFormatError",
498+
message: `Invalid file format: ${filePath}`,
499+
});
500+
});
501+
502+
it("Should throw FileNotFoundError if the file doesn't exist", async () => {
503+
const filePath = path.join(getTmpDir(), "not-exists.json");
504+
505+
await assert.rejects(readJsonFileAsStream(filePath), {
506+
name: "FileNotFoundError",
507+
message: `File ${filePath} not found`,
508+
});
509+
});
510+
511+
it("Should throw IsDirectoryError if the file is a directory", async () => {
512+
const filePath = path.join(getTmpDir());
513+
514+
await assert.rejects(readJsonFileAsStream(filePath), {
515+
name: "IsDirectoryError",
516+
message: `Path ${filePath} is a directory`,
517+
});
518+
});
519+
520+
it("Should throw FileSystemAccessError if a different error is thrown", async () => {
521+
const invalidPath = "\0";
522+
523+
await assert.rejects(readJsonFileAsStream(invalidPath), {
524+
name: "FileSystemAccessError",
525+
});
526+
});
527+
});
528+
529+
describe("writeJsonFileAsStream", () => {
530+
it("Should write an object to a JSON file", async () => {
531+
const expectedObject = { a: 1, b: 2 };
532+
const filePath = path.join(getTmpDir(), "file.json");
533+
534+
await writeJsonFileAsStream(filePath, expectedObject);
535+
536+
assert.deepEqual(
537+
JSON.parse(await readUtf8File(filePath)),
538+
expectedObject,
539+
);
540+
expectTypeOf(
541+
writeJsonFile<{ a: number; b: number }>(filePath, expectedObject),
542+
);
543+
});
544+
545+
it("Should write an object tto a JSON file even if part of the path doesn't exist", async () => {
546+
const expectedObject = { a: 1, b: 2 };
547+
const filePath = path.join(getTmpDir(), "not-exists", "file.json");
548+
549+
await writeJsonFileAsStream(filePath, expectedObject);
550+
551+
assert.deepEqual(
552+
JSON.parse(await readUtf8File(filePath)),
553+
expectedObject,
554+
);
555+
});
556+
557+
it("Should throw JsonSerializationError if the object can't be serialized to JSON", async () => {
558+
const filePath = path.join(getTmpDir(), "file.json");
559+
// create an object with a circular reference
560+
const circularObject: { self?: {} } = {};
561+
circularObject.self = circularObject;
562+
563+
await assert.rejects(writeJsonFileAsStream(filePath, circularObject), {
564+
name: "JsonSerializationError",
565+
message: `Error serializing JSON file ${filePath}`,
566+
});
567+
});
568+
569+
it("Should throw FileSystemAccessError if a different error is thrown", async () => {
570+
const filePath = path.join(getTmpDir(), "protected-file.json");
571+
await createFile(filePath);
572+
573+
try {
574+
await chmod(filePath, 0o444);
575+
576+
await assert.rejects(writeJsonFileAsStream(filePath, {}), {
577+
name: "FileSystemAccessError",
578+
});
579+
} finally {
580+
await chmod(filePath, 0o666);
581+
}
582+
});
583+
584+
it("Should remove the part of the path that didn't exist before if an error is thrown", async () => {
585+
const dirPath = path.join(getTmpDir(), "not-exists");
586+
const filePath = path.join(dirPath, "protected-file.json");
587+
// create an object with a circular reference
588+
const circularObject: { self?: {} } = {};
589+
circularObject.self = circularObject;
590+
591+
await assert.rejects(writeJsonFileAsStream(filePath, circularObject), {
592+
name: "JsonSerializationError",
593+
message: `Error serializing JSON file ${filePath}`,
594+
});
595+
596+
assert.ok(!(await exists(dirPath)), "The directory should not exist");
597+
});
598+
});
599+
467600
describe("readUtf8File", () => {
468601
it("Should read a file and return its content as a string", async () => {
469602
const content = "hello";

0 commit comments

Comments
 (0)