Skip to content

Commit 7c8ed6b

Browse files
xdje42dmah42
andauthored
[FR] Add API to provide custom profilers #1807 (#1809)
This API is akin to the MemoryManager API and lets tools provide their own profiler which is wrapped in the same way MemoryManager is wrapped. Namely, the profiler provides Start/Stop methods that are called at the start/end of running the benchmark in a separate pass. Co-authored-by: dominic <[email protected]>
1 parent d2cd246 commit 7c8ed6b

8 files changed

+134
-16
lines changed

CONTRIBUTORS

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Dominic Hamon <[email protected]> <[email protected]>
4242
Dominik Czarnota <[email protected]>
4343
Dominik Korman <[email protected]>
4444
Donald Aingworth <[email protected]>
45+
Doug Evans <[email protected]>
4546
Eric Backus <[email protected]>
4647
Eric Fiselier <[email protected]>
4748
Eugene Zhuk <[email protected]>

docs/user_guide.md

+15
Original file line numberDiff line numberDiff line change
@@ -1139,6 +1139,21 @@ a report on the number of allocations, bytes used, etc.
11391139
This data will then be reported alongside other performance data, currently
11401140
only when using JSON output.
11411141
1142+
<a name="profiling" />
1143+
1144+
## Profiling
1145+
1146+
It's often useful to also profile benchmarks in particular ways, in addition to
1147+
CPU performance. For this reason, benchmark offers the `RegisterProfilerManager`
1148+
method that allows a custom `ProfilerManager` to be injected.
1149+
1150+
If set, the `ProfilerManager::AfterSetupStart` and
1151+
`ProfilerManager::BeforeTeardownStop` methods will be called at the start and
1152+
end of a separate benchmark run to allow user code to collect and report
1153+
user-provided profile metrics.
1154+
1155+
Output collected from this profiling run must be reported separately.
1156+
11421157
<a name="using-register-benchmark" />
11431158
11441159
## Using RegisterBenchmark(name, fn, args...)

include/benchmark/benchmark.h

+20
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,26 @@ class MemoryManager {
416416
BENCHMARK_EXPORT
417417
void RegisterMemoryManager(MemoryManager* memory_manager);
418418

419+
// If a ProfilerManager is registered (via RegisterProfilerManager()), the
420+
// benchmark will be run an additional time under the profiler to collect and
421+
// report profile metrics for the run of the benchmark.
422+
class ProfilerManager {
423+
public:
424+
virtual ~ProfilerManager() {}
425+
426+
// This is called after `Setup()` code and right before the benchmark is run.
427+
virtual void AfterSetupStart() = 0;
428+
429+
// This is called before `Teardown()` code and right after the benchmark
430+
// completes.
431+
virtual void BeforeTeardownStop() = 0;
432+
};
433+
434+
// Register a ProfilerManager instance that will be used to collect and report
435+
// profile measurements for benchmark runs.
436+
BENCHMARK_EXPORT
437+
void RegisterProfilerManager(ProfilerManager* profiler_manager);
438+
419439
// Add a key-value pair to output as part of the context stanza in the report.
420440
BENCHMARK_EXPORT
421441
void AddCustomContext(const std::string& key, const std::string& value);

src/benchmark.cc

+4
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,10 @@ void RegisterMemoryManager(MemoryManager* manager) {
656656
internal::memory_manager = manager;
657657
}
658658

659+
void RegisterProfilerManager(ProfilerManager* manager) {
660+
internal::profiler_manager = manager;
661+
}
662+
659663
void AddCustomContext(const std::string& key, const std::string& value) {
660664
if (internal::global_context == nullptr) {
661665
internal::global_context = new std::map<std::string, std::string>();

src/benchmark_runner.cc

+43-16
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ namespace internal {
6262

6363
MemoryManager* memory_manager = nullptr;
6464

65+
ProfilerManager* profiler_manager = nullptr;
66+
6567
namespace {
6668

6769
static constexpr IterationCount kMaxIterations = 1000000000000;
@@ -401,6 +403,41 @@ void BenchmarkRunner::RunWarmUp() {
401403
}
402404
}
403405

406+
MemoryManager::Result* BenchmarkRunner::RunMemoryManager(
407+
IterationCount memory_iterations) {
408+
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
409+
// optional so we don't have to own the Result here.
410+
// Can't do it now due to cxx03.
411+
memory_results.push_back(MemoryManager::Result());
412+
MemoryManager::Result* memory_result = &memory_results.back();
413+
memory_manager->Start();
414+
std::unique_ptr<internal::ThreadManager> manager;
415+
manager.reset(new internal::ThreadManager(1));
416+
b.Setup();
417+
RunInThread(&b, memory_iterations, 0, manager.get(),
418+
perf_counters_measurement_ptr);
419+
manager->WaitForAllThreads();
420+
manager.reset();
421+
b.Teardown();
422+
memory_manager->Stop(*memory_result);
423+
return memory_result;
424+
}
425+
426+
void BenchmarkRunner::RunProfilerManager() {
427+
// TODO: Provide a way to specify the number of iterations.
428+
IterationCount profile_iterations = 1;
429+
std::unique_ptr<internal::ThreadManager> manager;
430+
manager.reset(new internal::ThreadManager(1));
431+
b.Setup();
432+
profiler_manager->AfterSetupStart();
433+
RunInThread(&b, profile_iterations, 0, manager.get(),
434+
/*perf_counters_measurement_ptr=*/nullptr);
435+
manager->WaitForAllThreads();
436+
profiler_manager->BeforeTeardownStop();
437+
manager.reset();
438+
b.Teardown();
439+
}
440+
404441
void BenchmarkRunner::DoOneRepetition() {
405442
assert(HasRepeatsRemaining() && "Already done all repetitions?");
406443

@@ -445,28 +482,18 @@ void BenchmarkRunner::DoOneRepetition() {
445482
"then we should have accepted the current iteration run.");
446483
}
447484

448-
// Oh, one last thing, we need to also produce the 'memory measurements'..
485+
// Produce memory measurements if requested.
449486
MemoryManager::Result* memory_result = nullptr;
450487
IterationCount memory_iterations = 0;
451488
if (memory_manager != nullptr) {
452-
// TODO(vyng): Consider making BenchmarkReporter::Run::memory_result an
453-
// optional so we don't have to own the Result here.
454-
// Can't do it now due to cxx03.
455-
memory_results.push_back(MemoryManager::Result());
456-
memory_result = &memory_results.back();
457489
// Only run a few iterations to reduce the impact of one-time
458490
// allocations in benchmarks that are not properly managed.
459491
memory_iterations = std::min<IterationCount>(16, iters);
460-
memory_manager->Start();
461-
std::unique_ptr<internal::ThreadManager> manager;
462-
manager.reset(new internal::ThreadManager(1));
463-
b.Setup();
464-
RunInThread(&b, memory_iterations, 0, manager.get(),
465-
perf_counters_measurement_ptr);
466-
manager->WaitForAllThreads();
467-
manager.reset();
468-
b.Teardown();
469-
memory_manager->Stop(*memory_result);
492+
memory_result = RunMemoryManager(memory_iterations);
493+
}
494+
495+
if (profiler_manager != nullptr) {
496+
RunProfilerManager();
470497
}
471498

472499
// Ok, now actually report.

src/benchmark_runner.h

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ BM_DECLARE_string(benchmark_perf_counters);
3535
namespace internal {
3636

3737
extern MemoryManager* memory_manager;
38+
extern ProfilerManager* profiler_manager;
3839

3940
struct RunResults {
4041
std::vector<BenchmarkReporter::Run> non_aggregates;
@@ -113,6 +114,10 @@ class BenchmarkRunner {
113114
};
114115
IterationResults DoNIterations();
115116

117+
MemoryManager::Result* RunMemoryManager(IterationCount memory_iterations);
118+
119+
void RunProfilerManager();
120+
116121
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
117122

118123
bool ShouldReportIterationResults(const IterationResults& i) const;

test/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ benchmark_add_test(NAME user_counters_thousands_test COMMAND user_counters_thous
192192
compile_output_test(memory_manager_test)
193193
benchmark_add_test(NAME memory_manager_test COMMAND memory_manager_test --benchmark_min_time=0.01s)
194194

195+
compile_output_test(profiler_manager_test)
196+
benchmark_add_test(NAME profiler_manager_test COMMAND profiler_manager_test --benchmark_min_time=0.01s)
197+
195198
# MSVC does not allow to set the language standard to C++98/03.
196199
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
197200
compile_benchmark_test(cxx03_test)

test/profiler_manager_test.cc

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// FIXME: WIP
2+
3+
#include <memory>
4+
5+
#include "benchmark/benchmark.h"
6+
#include "output_test.h"
7+
8+
class TestProfilerManager : public benchmark::ProfilerManager {
9+
void AfterSetupStart() override {}
10+
void BeforeTeardownStop() override {}
11+
};
12+
13+
void BM_empty(benchmark::State& state) {
14+
for (auto _ : state) {
15+
auto iterations = state.iterations();
16+
benchmark::DoNotOptimize(iterations);
17+
}
18+
}
19+
BENCHMARK(BM_empty);
20+
21+
ADD_CASES(TC_ConsoleOut, {{"^BM_empty %console_report$"}});
22+
ADD_CASES(TC_JSONOut, {{"\"name\": \"BM_empty\",$"},
23+
{"\"family_index\": 0,$", MR_Next},
24+
{"\"per_family_instance_index\": 0,$", MR_Next},
25+
{"\"run_name\": \"BM_empty\",$", MR_Next},
26+
{"\"run_type\": \"iteration\",$", MR_Next},
27+
{"\"repetitions\": 1,$", MR_Next},
28+
{"\"repetition_index\": 0,$", MR_Next},
29+
{"\"threads\": 1,$", MR_Next},
30+
{"\"iterations\": %int,$", MR_Next},
31+
{"\"real_time\": %float,$", MR_Next},
32+
{"\"cpu_time\": %float,$", MR_Next},
33+
{"\"time_unit\": \"ns\"$", MR_Next},
34+
{"}", MR_Next}});
35+
ADD_CASES(TC_CSVOut, {{"^\"BM_empty\",%csv_report$"}});
36+
37+
int main(int argc, char* argv[]) {
38+
std::unique_ptr<benchmark::ProfilerManager> pm(new TestProfilerManager());
39+
40+
benchmark::RegisterProfilerManager(pm.get());
41+
RunOutputTests(argc, argv);
42+
benchmark::RegisterProfilerManager(nullptr);
43+
}

0 commit comments

Comments
 (0)