Skip to content

Commit 68978f3

Browse files
zonzha01meta-codesync[bot]
authored andcommitted
Chm: Maximize CPU utilization (#299)
Summary: The worker threads are in a starved state because the producer’s production speed cannot keep up with the consumer’s consumption speed. The goal of the CHM benchmark is to maximize CPU utilization while minimizing interference from producers. This patch introduces a new parameter, worker_loop_count, which sets the number of times a worker thread loops over CHM operations. By increasing the worker thread’s active time, CPU utilization can reach 100%. Before this change, CPU utilization was around 20%. Test results show that throughput has significantly improved on some platforms. Alternatively, we could increase the number of producer threads to keep consumers busy, but our primary goal is to measure the throughput of consumer threads without adding extra producer-side overhead. Pull Request resolved: #299 Reviewed By: charles-typ Differential Revision: D87083803 Pulled By: YifanYuan3 fbshipit-source-id: a6f9ecc8f5016469b9864c4915f29f5b2b2345f9
1 parent cbd654a commit 68978f3

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

benchpress/config/jobs_ai.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,15 @@
169169
- '--duration_seconds={duration_seconds}'
170170
- '--batch_size={batch_size}'
171171
- '--num_batch_threads={num_batch_threads}'
172+
- '--worker_loop_count={worker_loop_count}'
172173

173174
vars:
174175
- 'distribution_file=benchmarks/chm/model_a.dist'
175176
- 'num_threads=80'
176177
- 'duration_seconds=360'
177178
- 'batch_size=10000000'
178179
- 'num_batch_threads=4'
180+
- 'worker_loop_count=10'
179181

180182

181183
- benchmark: chm
@@ -187,13 +189,15 @@
187189
- '--duration_seconds={duration_seconds}'
188190
- '--batch_size={batch_size}'
189191
- '--num_batch_threads={num_batch_threads}'
192+
- '--worker_loop_count={worker_loop_count}'
190193

191194
vars:
192195
- 'distribution_file=benchmarks/chm/model_b.dist'
193196
- 'num_threads=80'
194197
- 'duration_seconds=360'
195198
- 'batch_size=10000000'
196199
- 'num_batch_threads=4'
200+
- 'worker_loop_count=10'
197201

198202

199203
- benchmark: deser

packages/ai_wdl/chm/ChmBenchmark.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ DEFINE_int32(num_batch_threads, 2, "Number of parallel batch threads");
4040
DEFINE_int32(duration_seconds, 10, "Benchmark duration in seconds");
4141
DEFINE_int32(initial_capacity, 0, "Initial hash map capacity hint");
4242
DEFINE_int32(batch_size, 1000, "Operations per batch");
43+
DEFINE_int32(worker_loop_count, 1, "The loop count worker do its job");
4344
DEFINE_int32(hit_ratio, 40, "Desired hit ratio (0-100)");
4445
DEFINE_bool(verbose, false, "Enable verbose output");
4546

@@ -307,6 +308,7 @@ class ChmBenchmark {
307308
int durationSeconds; // Benchmark duration
308309
int initialCapacity; // Hash map initial capacity hint
309310
int batchSize; // Operations per batch
311+
int worker_loop_count; // The loop count worker do its job
310312
int hitRatio; // Desired hit ratio (0-100)
311313
bool verbose; // Enable detailed output
312314
};
@@ -599,14 +601,16 @@ class ChmBenchmark {
599601
uint64_t localOps = 0;
600602
uint64_t localSuccessfulOps = 0;
601603

602-
// Process assigned range of operations
603-
for (int i = startIdx; i < endIdx; i++) {
604-
AdId key = preGeneratedKeys[i];
605-
auto result = map.getValue(key); // Core benchmark operation
604+
for (int loop = 0; loop < config_.worker_loop_count; loop++) {
605+
// Process assigned range of operations
606+
for (int i = startIdx; i < endIdx; i++) {
607+
AdId key = preGeneratedKeys[i];
608+
auto result = map.getValue(key); // Core benchmark operation
606609

607-
localOps++;
608-
if (result.second) { // Check if key was found
609-
localSuccessfulOps++;
610+
localOps++;
611+
if (result.second) { // Check if key was found
612+
localSuccessfulOps++;
613+
}
610614
}
611615
}
612616

@@ -654,6 +658,7 @@ int main(int argc, char* argv[]) {
654658
.durationSeconds = FLAGS_duration_seconds,
655659
.initialCapacity = FLAGS_initial_capacity,
656660
.batchSize = FLAGS_batch_size,
661+
.worker_loop_count = FLAGS_worker_loop_count,
657662
.hitRatio = FLAGS_hit_ratio,
658663
.verbose = FLAGS_verbose};
659664

0 commit comments

Comments
 (0)