Skip to content

Commit 15a0d2e

Browse files
Race condition in compiler download when more than one Hardhat process is compiling (#4771)
1 parent 277b6dc commit 15a0d2e

File tree

4 files changed

+238
-2
lines changed

4 files changed

+238
-2
lines changed

.changeset/lazy-jobs-turn.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"hardhat": patch
3+
---
4+
5+
Fixed a race condition that occurred when multiple Hardhat processes ran a compilation at the same time.

packages/hardhat-core/src/internal/solidity/compiler/downloader.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { promisify } from "util";
88
import { download } from "../../util/download";
99
import { assertHardhatInvariant, HardhatError } from "../../core/errors";
1010
import { ERRORS } from "../../core/errors-list";
11-
import { Mutex } from "../../vendor/await-semaphore";
11+
import { MultiProcessMutex } from "../../util/multi-process-mutex";
1212

1313
const log = debug("hardhat:core:solidity:downloader");
1414

@@ -126,7 +126,7 @@ export class CompilerDownloader implements ICompilerDownloader {
126126
}
127127

128128
public static defaultCompilerListCachePeriod = 3_600_00;
129-
private readonly _mutex = new Mutex();
129+
private readonly _mutex = new MultiProcessMutex("compiler-download");
130130

131131
/**
132132
* Use CompilerDownloader.getConcurrencySafeDownloader instead
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/* eslint-disable @nomicfoundation/hardhat-internal-rules/only-hardhat-error */
2+
import debug from "debug";
3+
import fs from "node:fs";
4+
import path from "node:path";
5+
import os from "node:os";
6+
7+
// Logic explanation: the fs.writeFile function, when used with the wx+ flag, performs an atomic operation to create a file.
8+
// If multiple processes try to create the same file simultaneously, only one will succeed.
9+
// This logic can be utilized to implement a mutex.
10+
// ATTENTION: in the current implementation, there's still a risk of two processes running simultaneously.
11+
// For example, if processA has locked the mutex and is running, processB will wait.
12+
// During this wait, processB continuously checks the elapsed time since the mutex lock file was created.
13+
// If an excessive amount of time has passed, processB will assume ownership of the mutex to avoid stale locks.
14+
// However, there's a possibility that processB might take ownership because the mutex creation file is outdated, even though processA is still running
15+
// For more info check the Nomic Notion page (internal link).
16+
17+
const log = debug("hardhat:util:multi-process-mutex");
18+
const DEFAULT_MAX_MUTEX_LIFESPAN_IN_MS = 60000;
19+
const MUTEX_LOOP_WAITING_TIME_IN_MS = 100;
20+
21+
export class MultiProcessMutex {
22+
private _mutexFilePath: string;
23+
private _mutexLifespanInMs: number;
24+
25+
constructor(mutexName: string, maxMutexLifespanInMs?: number) {
26+
log(`Creating mutex with name '${mutexName}'`);
27+
28+
this._mutexFilePath = path.join(os.tmpdir(), `${mutexName}.txt`);
29+
this._mutexLifespanInMs =
30+
maxMutexLifespanInMs ?? DEFAULT_MAX_MUTEX_LIFESPAN_IN_MS;
31+
}
32+
33+
public async use<T>(f: () => Promise<T>): Promise<T> {
34+
log(`Starting mutex process with mutex file '${this._mutexFilePath}'`);
35+
36+
while (true) {
37+
if (await this._tryToAcquireMutex()) {
38+
// Mutex has been acquired
39+
return this._executeFunctionAndReleaseMutex(f);
40+
}
41+
42+
// Mutex not acquired
43+
if (this._isMutexFileTooOld()) {
44+
// If the mutex file is too old, it likely indicates a stale lock, so the file should be removed
45+
log(
46+
`Current mutex file is too old, removing it at path '${this._mutexFilePath}'`
47+
);
48+
this._deleteMutexFile();
49+
} else {
50+
// wait
51+
await this._waitMs();
52+
}
53+
}
54+
}
55+
56+
private async _tryToAcquireMutex() {
57+
try {
58+
// Create a file only if it does not exist
59+
fs.writeFileSync(this._mutexFilePath, "", { flag: "wx+" });
60+
return true;
61+
} catch (error: any) {
62+
if (error.code === "EEXIST") {
63+
// File already exists, so the mutex is already acquired
64+
return false;
65+
}
66+
67+
throw error;
68+
}
69+
}
70+
71+
private async _executeFunctionAndReleaseMutex<T>(
72+
f: () => Promise<T>
73+
): Promise<T> {
74+
log(`Mutex acquired at path '${this._mutexFilePath}'`);
75+
76+
try {
77+
const res = await f();
78+
79+
// Release the mutex
80+
log(`Mutex released at path '${this._mutexFilePath}'`);
81+
this._deleteMutexFile();
82+
83+
log(`Mutex released at path '${this._mutexFilePath}'`);
84+
85+
return res;
86+
} catch (error: any) {
87+
// Catch any error to avoid stale locks.
88+
// Remove the mutex file and re-throw the error
89+
this._deleteMutexFile();
90+
throw error;
91+
}
92+
}
93+
94+
private _isMutexFileTooOld(): boolean {
95+
let fileStat;
96+
try {
97+
fileStat = fs.statSync(this._mutexFilePath);
98+
} catch (error: any) {
99+
if (error.code === "ENOENT") {
100+
// The file might have been deleted by another process while this function was trying to access it.
101+
return false;
102+
}
103+
104+
throw error;
105+
}
106+
107+
const now = new Date();
108+
const fileDate = new Date(fileStat.ctime);
109+
const diff = now.getTime() - fileDate.getTime();
110+
111+
return diff > this._mutexLifespanInMs;
112+
}
113+
114+
private _deleteMutexFile() {
115+
try {
116+
log(`Deleting mutex file at path '${this._mutexFilePath}'`);
117+
fs.unlinkSync(this._mutexFilePath);
118+
} catch (error: any) {
119+
if (error.code === "ENOENT") {
120+
// The file might have been deleted by another process while this function was trying to access it.
121+
return;
122+
}
123+
124+
throw error;
125+
}
126+
}
127+
128+
private async _waitMs() {
129+
return new Promise((resolve) =>
130+
setTimeout(resolve, MUTEX_LOOP_WAITING_TIME_IN_MS)
131+
);
132+
}
133+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import { assert, expect } from "chai";
2+
import { MultiProcessMutex } from "../../src/internal/util/multi-process-mutex";
3+
4+
describe("multi-process-mutex", () => {
5+
const mutexName = "test-mutex";
6+
7+
it("should execute all the function in a sequential order, not in parallel", async () => {
8+
// Since all the functions cannot be executed in parallel because of the mutex,
9+
// the total execution time should be bigger than the sum of the execution times of each function.
10+
const mutex = new MultiProcessMutex(mutexName); // Use default max mutex lifespan
11+
const start = performance.now();
12+
13+
const ms = [500, 700, 1000];
14+
await Promise.all([
15+
mutex.use(async () => {
16+
await waitMs(ms[0]);
17+
}),
18+
mutex.use(async () => {
19+
await waitMs(ms[1]);
20+
}),
21+
22+
mutex.use(async () => {
23+
await waitMs(ms[2]);
24+
}),
25+
]);
26+
27+
const end = performance.now();
28+
const duration = end - start;
29+
30+
expect(duration).to.be.greaterThan(ms[0] + ms[1] + ms[2]);
31+
});
32+
33+
it("should overwrite the current mutex locked by another function because the function took to long to finish", async () => {
34+
const mutexLifeSpanMs = 500;
35+
const mutex = new MultiProcessMutex(mutexName, mutexLifeSpanMs);
36+
37+
const res: number[] = [];
38+
39+
await Promise.all([
40+
mutex.use(async () => {
41+
await waitMs(2000);
42+
res.push(1);
43+
}),
44+
new Promise((resolve) =>
45+
setTimeout(async () => {
46+
await mutex.use(async () => {
47+
await waitMs(200);
48+
res.push(2);
49+
});
50+
resolve(true);
51+
}, 200)
52+
),
53+
]);
54+
55+
expect(res).to.deep.equal([2, 1]);
56+
});
57+
58+
it("should get the mutex lock because the first function to own it failed", async () => {
59+
const mutexLifeSpanMs = 20000; // The mutex should be released and not hit timeout because the first function failed
60+
const mutex = new MultiProcessMutex(mutexName, mutexLifeSpanMs);
61+
62+
const start = performance.now();
63+
64+
const res: number[] = [];
65+
let errThrown = false;
66+
67+
await Promise.all([
68+
mutex
69+
.use(async () => {
70+
throw new Error("Expected error");
71+
})
72+
.catch(() => {
73+
errThrown = true;
74+
}),
75+
new Promise((resolve) =>
76+
setTimeout(async () => {
77+
await mutex.use(async () => {
78+
res.push(2);
79+
});
80+
resolve(true);
81+
}, 200)
82+
),
83+
]);
84+
85+
const end = performance.now();
86+
const duration = end - start;
87+
88+
assert.isTrue(errThrown);
89+
expect(res).to.deep.equal([2]);
90+
// Since the first function that obtained the mutex failed, the function waiting for it will acquire it instantly.
91+
// Therefore, the mutex timeout will not be reached, and the test should complete in less time than the mutex timeout.
92+
expect(duration).to.be.lessThan(1000);
93+
});
94+
});
95+
96+
async function waitMs(timeout: number) {
97+
return new Promise((resolve) => setTimeout(resolve, timeout));
98+
}

0 commit comments

Comments
 (0)