@@ -173,6 +173,14 @@ class provider_allocator : public allocator_interface {
173
173
return argPos;
174
174
}
175
175
176
+ void preBench (::benchmark::State &state) override {
177
+ provider.preBench (state);
178
+ }
179
+
180
+ void postBench (::benchmark::State &state) override {
181
+ provider.postBench (state);
182
+ }
183
+
176
184
void TearDown (::benchmark::State &state) override {
177
185
provider.TearDown (state);
178
186
}
@@ -204,13 +212,18 @@ template <typename Pool> class pool_allocator : public allocator_interface {
204
212
return argPos;
205
213
}
206
214
215
+ void preBench (::benchmark::State &state) override { pool.preBench (state); }
216
+ void postBench (::benchmark::State &state) override {
217
+ pool.postBench (state);
218
+ }
219
+
207
220
void TearDown (::benchmark::State &state) override { pool.TearDown (state); }
208
221
209
- virtual void *benchAlloc (size_t size) override {
222
+ void *benchAlloc (size_t size) override {
210
223
return umfPoolMalloc (pool.pool , size);
211
224
}
212
225
213
- virtual void benchFree (void *ptr, [[maybe_unused]] size_t size) override {
226
+ void benchFree (void *ptr, [[maybe_unused]] size_t size) override {
214
227
umfPoolFree (pool.pool , ptr);
215
228
}
216
229
@@ -241,7 +254,7 @@ struct benchmark_interface : public benchmark::Fixture {
241
254
allocator.TearDown (state);
242
255
}
243
256
244
- virtual void bench (::benchmark::State &state) = 0 ;
257
+ void bench ([[maybe_unused]] ::benchmark::State &state){} ;
245
258
246
259
virtual std::vector<std::string> argsName () {
247
260
auto s = Size::argsName ();
@@ -260,6 +273,9 @@ struct benchmark_interface : public benchmark::Fixture {
260
273
benchmark->ArgNames (bench->argsName ())->Name (bench->name ());
261
274
}
262
275
276
+ void custom_counters (::benchmark::State &state) {
277
+ allocator.custom_counters (state);
278
+ }
263
279
std::vector<Size > alloc_sizes;
264
280
Allocator allocator;
265
281
};
@@ -282,7 +298,8 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
282
298
283
299
vector2d<alloc_data> allocations;
284
300
std::vector<unsigned > iters;
285
-
301
+ std::vector<size_t > memused;
302
+ std::vector<size_t > peakmemory;
286
303
vector2d<next_alloc_data> next;
287
304
std::vector<std::vector<next_alloc_data>::const_iterator> next_iter;
288
305
int64_t iterations;
@@ -302,6 +319,12 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
302
319
allocations.resize (state.threads ());
303
320
next.resize (state.threads ());
304
321
next_iter.resize (state.threads ());
322
+ memused.resize (state.threads ());
323
+ peakmemory.resize (state.threads ());
324
+ for (int i = 0 ; i < state.threads (); i++) {
325
+ memused[i] = 0 ;
326
+ peakmemory[i] = 0 ;
327
+ }
305
328
306
329
#ifndef WIN32
307
330
// Ensure that system malloc does not have memory pooled on the heap
@@ -323,13 +346,49 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
323
346
waitForAllThreads (state);
324
347
// prepare workload for actual benchmark.
325
348
freeAllocs (state);
349
+ // reset peak memory before real benchmark
350
+ for (auto &i : peakmemory) {
351
+ i = 0 ;
352
+ }
353
+
326
354
prealloc (state);
327
355
prepareWorkload (state);
356
+ waitForAllThreads (state);
357
+ base::allocator.preBench (state);
328
358
}
329
359
330
360
void TearDown (::benchmark::State &state) override {
361
+ base::allocator.postBench (state);
331
362
auto tid = state.thread_index ();
363
+ if (tid == 0 ) {
364
+ size_t current_memory_allocated = 0 ;
365
+ for (int i = 0 ; i < state.threads (); i++) {
366
+ current_memory_allocated += memused[i];
367
+ }
368
+ size_t peak_memory_allocated =
369
+ *std::max_element (peakmemory.begin (), peakmemory.end ());
370
+
371
+ size_t peak_memory_used = state.counters [" peak_memory_use" ];
372
+ size_t current_memory_used = state.counters [" current_memory_use" ];
373
+
374
+ if (peak_memory_used != 0 ) {
375
+ state.counters [" peak_memory_fragmentation" ] =
376
+ 100.0 * (peak_memory_used - peak_memory_allocated) /
377
+ peak_memory_used;
378
+ }
379
+
380
+ if (current_memory_used != 0 ) {
381
+ state.counters [" current_memory_fragmentation" ] =
382
+ 100.0 * (current_memory_used - current_memory_allocated) /
383
+ current_memory_used;
384
+ }
385
+
386
+ state.counters [" current_memory_allocated" ] =
387
+ current_memory_allocated;
388
+ state.counters [" peak_memory_allocated" ] = peak_memory_allocated;
389
+ }
332
390
391
+ waitForAllThreads (state);
333
392
freeAllocs (state);
334
393
waitForAllThreads (state);
335
394
if (tid == 0 ) {
@@ -342,20 +401,24 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
342
401
base::TearDown (state);
343
402
}
344
403
345
- void bench (benchmark::State &state) override {
404
+ void bench (benchmark::State &state) {
346
405
auto tid = state.thread_index ();
347
406
auto &allocation = allocations[tid];
407
+ auto &memuse = memused[tid];
408
+ auto &peak = peakmemory[tid];
348
409
for (int i = 0 ; i < allocsPerIterations; i++) {
349
410
auto &n = *next_iter[tid]++;
350
411
auto &alloc = allocation[n.offset ];
351
412
base::allocator.benchFree (alloc.ptr , alloc.size );
352
-
413
+ memuse -= alloc. size ;
353
414
alloc.size = n.size ;
354
415
alloc.ptr = base::allocator.benchAlloc (alloc.size );
355
416
356
417
if (alloc.ptr == NULL ) {
357
418
state.SkipWithError (" allocation failed" );
358
419
}
420
+ memuse += alloc.size ;
421
+ peak = std::max (peak, memuse);
359
422
}
360
423
}
361
424
@@ -376,7 +439,10 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
376
439
auto tid = state.thread_index ();
377
440
auto &i = allocations[tid];
378
441
i.resize (max_allocs);
442
+ auto &memuse = memused[tid];
443
+ auto &peak = peakmemory[tid];
379
444
auto sizeGenerator = base::alloc_sizes[tid];
445
+
380
446
for (size_t j = 0 ; j < max_allocs; j++) {
381
447
auto size = sizeGenerator.nextSize ();
382
448
i[j].ptr = base::allocator.benchAlloc (size);
@@ -385,6 +451,8 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
385
451
return ;
386
452
}
387
453
i[j].size = size;
454
+ memuse += size;
455
+ peak = std::max (peak, memuse);
388
456
}
389
457
}
390
458
@@ -394,6 +462,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface<Size, Alloc> {
394
462
for (auto &j : i) {
395
463
if (j.ptr != NULL ) {
396
464
base::allocator.benchFree (j.ptr , j.size );
465
+ memused[tid] -= j.size ;
397
466
j.ptr = NULL ;
398
467
j.size = 0 ;
399
468
}
0 commit comments