-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
Copy pathgpu_object_lifetime_tracker.h
107 lines (91 loc) · 3.78 KB
/
gpu_object_lifetime_tracker.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifndef GPU_OBJECT_LIFETIME_H
#define GPU_OBJECT_LIFETIME_H
#include <array>
#include <stdio.h>
#include <string.h>
namespace Halide {
namespace Internal {
class GpuObjectLifetimeTracker {
struct ObjectType {
const char *const created;
const char *const destroyed;
bool const is_global;
int total_created;
int live_count;
ObjectType(const char *created, const char *destroyed, bool is_global = false)
: created(created), destroyed(destroyed),
is_global(is_global), total_created(0), live_count(0) {
}
};
std::array<ObjectType, 22> object_types = {{
{"Caching compiled kernel:", "Releasing cached compilation:"},
// OpenCL objects
{"clCreateContext", "clReleaseContext", true},
{"clCreateCommandQueue", "clReleaseCommandQueue", true},
// This handles both "clCreateProgramWithSource" and
// "clCreateProgramWithBinary".
{"clCreateBuffer", "clReleaseMemObject"},
{"clCreateKernel", "clReleaseKernel"},
// CUDA objects
{"cuCtxCreate", "cuCtxDestroy", true},
{"cuMemAlloc", "cuMemFree"},
// Metal objects
{"Allocating: MTLCreateSystemDefaultDevice", "Releasing: MTLCreateSystemDefaultDevice", true},
{"Allocating: new_command_queue", "Releasing: new_command_queue"},
// Hexagon objects
{"halide_remote_load_library", "halide_remote_release_library"},
{"ion_alloc", "ion_free"},
// Vulkan objects
{"vk_create_context", "vk_destroy_context", true},
{"vk_create_command_pool", "vk_destroy_command_pool"},
{"vk_create_command_buffer", "vk_destroy_command_buffer"},
{"vk_create_pipeline_layout", "vk_destroy_pipeline_layout"},
{"vk_create_compute_pipeline", "vk_destroy_compute_pipeline"},
{"vk_create_descriptor_pool", "vk_destroy_descriptor_pool"},
{"Vulkan: Reserved memory for device region", "Vulkan: Released memory for device region"},
{"vkCreateBuffer: Created buffer for device region", "vkDestroyBuffer: Destroyed buffer for device region"},
// WebGPU objects
{"wgpuCreateInstance", "wgpuInstanceRelease", true},
{"wgpuDeviceCreateBuffer", "wgpuBufferRelease"},
{"wgpuDeviceCreateComputePipeline", "wgpuComputePipelineRelease"},
}};
public:
// Parse a line of output from gpu_debug and update object counts.
void record_gpu_debug(const char *str) {
for (auto &o : object_types) {
if (strstr(str, o.created)) {
o.total_created++;
o.live_count++;
} else if (strstr(str, o.destroyed)) {
o.live_count--;
}
}
}
// Check that there are no live objects remaining, and we created at least one object.
int validate_gpu_object_lifetime(bool allow_globals, bool allow_none, int max_globals) {
int total = 0;
for (auto &o : object_types) {
if (o.live_count != 0 &&
!(allow_globals && o.is_global)) {
printf("Error! %d objects created by %s still live\n",
o.live_count, o.created);
return 1;
}
if (o.is_global && o.total_created > max_globals) {
printf("Error! %d global objects created by %s, max is %d\n",
o.total_created, o.created, max_globals);
return 1;
}
total += o.total_created;
}
if (!allow_none && total == 0) {
printf("Error! No objects created. Ensure gpu_debug is set, ");
printf("and record_gpu_debug is called from halide_print.\n");
return 1;
}
return 0;
}
};
} // namespace Internal
} // namespace Halide
#endif