-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathtest_benchmarks.py
136 lines (107 loc) · 4.51 KB
/
test_benchmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Optional benchmarks-do-not-regress test"""
import contextlib
import logging
import platform
import re
import shutil
from pathlib import Path
from typing import Callable, List
import pytest
from framework import utils
from framework.ab_test import binary_ab_test, git_clone_ab_dirs
from host_tools.cargo_build import cargo
LOGGER = logging.getLogger(__name__)
git_clone_ab_dirs_one_time = pytest.fixture(git_clone_ab_dirs, scope="class")
def get_benchmark_names() -> List[str]:
"""
Get a list of benchmark test names
"""
_, stdout, _ = cargo(
"bench",
f"--workspace --quiet --target {platform.machine()}-unknown-linux-musl",
"--list",
)
# Format a string like `page_fault #2: benchmark` to a string like `page_fault`.
benchmark_names = [
re.sub(r"\s#([0-9]*)", "", i.split(":")[0])
for i in stdout.split("\n")
if i.endswith(": benchmark")
]
return list(set(benchmark_names))
class TestBenchMarks:
"""
This class is used to prevent fixtures from being executed for each parameter in
a parametrize test.
"""
@pytest.mark.no_block_pr
@pytest.mark.parametrize("benchname", get_benchmark_names())
def test_no_regression_relative_to_target_branch(
self, benchname, git_clone_ab_dirs_one_time
):
"""
Run the microbenchmarks in this repository, comparing results from pull
request target branch against what's achieved on HEAD
"""
dir_a = git_clone_ab_dirs_one_time[0]
dir_b = git_clone_ab_dirs_one_time[1]
run_criterion = get_run_criterion(benchname)
compare_results = get_compare_results(benchname)
binary_ab_test(
test_runner=run_criterion,
comparator=compare_results,
a_directory=dir_a,
b_directory=dir_b,
)
def get_run_criterion(benchmark_name) -> Callable[[Path, bool], Path]:
"""
Get function that executes specified benchmarks, and running them pinned to some CPU
"""
def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
baseline_name = "a_baseline" if is_a else "b_baseline"
with contextlib.chdir(firecracker_checkout):
utils.check_output(
f"taskset -c 1 cargo bench --workspace --quiet -- {benchmark_name} --exact --save-baseline {baseline_name}"
)
return firecracker_checkout / "build" / "cargo_target" / "criterion"
return _run_criterion
def get_compare_results(benchmark_name) -> Callable[[Path, Path], None]:
"""
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
"""
def _compare_results(location_a_baselines: Path, location_b_baselines: Path):
_, stdout, _ = cargo(
"bench",
f"--workspace --target {platform.machine()}-unknown-linux-musl --quiet",
f"--exact {benchmark_name} --list",
)
# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
bench_mark_targets = [
re.sub(r"\s#(?P<sub_id>[0-9]*)", r"_\g<sub_id>", i.split(":")[0])
for i in stdout.split("\n")
if i.endswith(": benchmark")
]
# If benchmark test has multiple targets, the results of a single benchmark test will be output to multiple directories.
# For example, `page_fault` and `page_fault_2`.
# We need copy benchmark results each directories.
for bench_mark_target in bench_mark_targets:
shutil.copytree(
location_a_baselines / bench_mark_target / "a_baseline",
location_b_baselines / bench_mark_target / "a_baseline",
)
_, stdout, _ = cargo(
"bench",
f"--workspace --target {platform.machine()}-unknown-linux-musl",
f"{benchmark_name} --baseline a_baseline --load-baseline b_baseline",
)
regressions_only = "\n\n".join(
result
for result in stdout.split("\n\n")
if "Performance has regressed." in result
)
# If this string is anywhere in stdout, then at least one of our benchmarks
# is now performing worse with the PR changes.
assert not regressions_only, "\n" + regressions_only
return _compare_results