Skip to content

Commit d8ec4d1

Browse files
authored
ci: calibrate performance alert threshold (#5070)
1 parent 44b0bd2 commit d8ec4d1

39 files changed

+2918
-1550
lines changed

crates/tools/js/benchmark/index.js

+74-30
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ const {
1515

1616
const SCENARIOS_DIR = "../../scenarios/";
1717
const SCENARIO_SNAPSHOT_NAME = "snapshot.json";
18+
const OZ_MAX_MIN_FAILURES = 1.05;
1819

1920
async function main() {
2021
const parser = new ArgumentParser({
@@ -89,11 +90,27 @@ async function verify(benchmarkResultPath) {
8990
));
9091

9192
for (let scenarioName in snapshotResult) {
92-
// Snapshot testing is unreliable for these scenarios
93-
if (
94-
scenarioName.includes("openzeppelin") ||
95-
scenarioName.includes("neptune-mutual")
96-
) {
93+
// TODO https://github.com/NomicFoundation/edr/issues/365
94+
if (scenarioName.includes("neptune-mutual")) {
95+
continue;
96+
}
97+
98+
// TODO https://github.com/NomicFoundation/edr/issues/365
99+
if (scenarioName.includes("openzeppelin")) {
100+
const snapshotCount = snapshotResult[scenarioName].failures.length;
101+
const actualCount = benchmarkResult[scenarioName].failures.length;
102+
const ratio =
103+
Math.max(snapshotCount, actualCount) /
104+
Math.min(snapshotCount, actualCount);
105+
106+
if (ratio > OZ_MAX_MIN_FAILURES) {
107+
console.error(
108+
`Snapshot failure for ${scenarioName} with max/min failure ratio`,
109+
ratio
110+
);
111+
success = false;
112+
}
113+
97114
continue;
98115
}
99116

@@ -143,30 +160,35 @@ async function benchmarkAllScenarios(outPath) {
143160
let totalTime = 0;
144161
let totalFailures = 0;
145162
for (let scenarioFileName of getScenarioFileNames()) {
146-
// Run in subprocess with grep to simulate Hardhat test runner behaviour
147-
// where there is one provider per process
148-
const processResult = child_process.spawnSync(
149-
process.argv[0],
150-
[
151-
"--noconcurrent_sweeping",
152-
"--noconcurrent_recompilation",
153-
"--max-old-space-size=28000",
154-
"index.js",
155-
"benchmark",
156-
"-g",
157-
scenarioFileName,
158-
],
159-
{
160-
shell: true,
161-
timeout: 60 * 60 * 1000,
162-
// Pipe stdout, proxy the rest
163-
stdio: [process.stdin, "pipe", process.stderr],
164-
encoding: "utf-8",
165-
}
166-
);
167-
168163
try {
169-
const scenarioResult = JSON.parse(processResult.stdout);
164+
const scenarioResults = [];
165+
const iterations = numIterations(scenarioFileName);
166+
for (let i = 0; i < iterations; i++) {
167+
// Run in subprocess with grep to simulate Hardhat test runner behaviour
168+
// where there is one provider per process
169+
const processResult = child_process.spawnSync(
170+
process.argv[0],
171+
[
172+
"--noconcurrent_sweeping",
173+
"--noconcurrent_recompilation",
174+
"--max-old-space-size=28000",
175+
"index.js",
176+
"benchmark",
177+
"-g",
178+
scenarioFileName,
179+
],
180+
{
181+
shell: true,
182+
timeout: 60 * 60 * 1000,
183+
// Pipe stdout, proxy the rest
184+
stdio: [process.stdin, "pipe", process.stderr],
185+
encoding: "utf-8",
186+
}
187+
);
188+
const scenarioResult = JSON.parse(processResult.stdout);
189+
scenarioResults.push(scenarioResult);
190+
}
191+
const scenarioResult = medianOfResults(scenarioResults);
170192
totalTime += scenarioResult.result.timeMs;
171193
totalFailures += scenarioResult.result.failures.length;
172194
result[scenarioResult.name] = scenarioResult.result;
@@ -189,8 +211,28 @@ async function benchmarkAllScenarios(outPath) {
189211
console.error(`Benchmark results written to ${outPath}`);
190212
}
191213

214+
function numIterations(scenarioName) {
215+
// Run fast scenarios repeatedly to get more reliable results
216+
if (scenarioName.includes("safe-contracts")) {
217+
return 15;
218+
} else if (scenarioName.includes("seaport")) {
219+
return 5;
220+
} else {
221+
return 1;
222+
}
223+
}
224+
225+
function medianOfResults(results) {
226+
if (results.length === 0) {
227+
throw new Error("No results to calculate median");
228+
}
229+
const sorted = results.sort((a, b) => a.result.timeMs - b.result.timeMs);
230+
const middle = Math.floor(sorted.length / 2);
231+
return sorted[middle];
232+
}
233+
192234
async function benchmarkScenario(scenarioFileName) {
193-
const { config, requests } = await loadScenario(scenarioFileName);
235+
let { config, requests } = await loadScenario(scenarioFileName);
194236
const name = path.basename(scenarioFileName).split(".")[0];
195237
console.error(`Running ${name} scenario`);
196238

@@ -202,12 +244,14 @@ async function benchmarkScenario(scenarioFileName) {
202244

203245
const failures = [];
204246
const rpcCallResults = [];
247+
const rpcCallErrors = [];
205248

206249
for (let i = 0; i < requests.length; i += 1) {
207250
try {
208251
const result = await provider.request(requests[i]);
209252
rpcCallResults.push(result);
210253
} catch (e) {
254+
rpcCallErrors.push(e);
211255
failures.push(i);
212256
}
213257
}
@@ -230,7 +274,7 @@ async function benchmarkScenario(scenarioFileName) {
230274
console.log(JSON.stringify(result));
231275

232276
// Return this to avoid gc
233-
return rpcCallResults;
277+
return { rpcCallResults, rpcCallErrors };
234278
}
235279

236280
async function loadScenario(scenarioFileName) {

crates/tools/python/benchmark-variance.ipynb

+273-19
Large diffs are not rendered by default.

crates/tools/python/ejs-baseline.ipynb

+364
Large diffs are not rendered by default.

crates/tools/python/ejs-benchmark/1.json

+1
Large diffs are not rendered by default.

crates/tools/python/ejs-benchmark/2.json

+1
Large diffs are not rendered by default.

crates/tools/python/ejs-benchmark/3.json

+1
Large diffs are not rendered by default.

crates/tools/python/ejs-benchmark/4.json

+1
Large diffs are not rendered by default.

crates/tools/python/ejs-benchmark/5.json

+1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/1.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/10.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/11.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/12.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/13.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/14.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/15.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/16.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/17.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/18.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/19.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/2.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/20.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/21.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/22.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/23.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/24.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/25.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/26.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/27.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/28.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/29.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/3.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/30.json

-1
This file was deleted.

crates/tools/python/js-benchmark-variance/4.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/5.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/6.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/7.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/8.json

+1-1
Large diffs are not rendered by default.

crates/tools/python/js-benchmark-variance/9.json

+1-1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)