Skip to content

Commit 2929a36

Browse files
authored
More stats and MPI rank for rocm profiles (#635)
* Add min/max/avg time/instance in rocm-activity-profile * Add mpi.rank to rocprofiler activity records
1 parent 1bfdfb2 commit 2929a36

File tree

2 files changed

+37
-12
lines changed

2 files changed

+37
-12
lines changed

src/caliper/controllers/ROCmActivityProfileController.cpp

+21-10
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,19 @@ class RocmActivityProfileController : public cali::ChannelController
4141
output.append(ext);
4242
}
4343

44+
const char* local_let =
45+
"act_count=first(sum#count,count) if rocm.activity"
46+
",dmin=scale(min#rocm.activity.duration,1e-9)"
47+
",davg=scale(avg#rocm.activity.duration,1e-9)"
48+
",dmax=scale(max#rocm.activity.duration,1e-9)";
49+
const char* local_select =
50+
"*,scale(sum#time.duration.ns,1e-9) as time"
51+
",scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\""
52+
",min(dmin) as \"min time/inst\""
53+
",avg(davg) as \"avg time/inst\""
54+
",max(dmax) as \"max time/inst\""
55+
",sum(act_count) as count";
56+
4457
auto avail_services = services::get_available_services();
4558
bool have_mpi = std::find(avail_services.begin(), avail_services.end(), "mpireport") != avail_services.end();
4659
bool have_adiak =
@@ -63,22 +76,20 @@ class RocmActivityProfileController : public cali::ChannelController
6376
config()["CALI_MPIREPORT_WRITE_ON_FINALIZE"] = "false";
6477
config()["CALI_MPIREPORT_CONFIG"] = opts.build_query(
6578
"local",
66-
{ { "select",
67-
"*,scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\" unit sec"
68-
" ,scale(sum#time.duration.ns,1e-9) as \"time\" unit sec" },
69-
{ "group by", "path,rocm.kernel.name,rocm.activity.kind,mpi.rank" },
70-
{ "format", format } }
79+
{ { "let", local_let },
80+
{ "select", local_select },
81+
{ "group by", "path,rocm.kernel.name,rocm.activity.kind,mpi.rank" },
82+
{ "format", format } }
7183
);
7284
} else {
7385
config()["CALI_SERVICES_ENABLE"].append(",report");
7486
config()["CALI_REPORT_FILENAME"] = output;
7587
config()["CALI_REPORT_CONFIG"] = opts.build_query(
7688
"local",
77-
{ { "select",
78-
"*,scale(sum#rocm.activity.duration,1e-9) as \"time (gpu)\" unit sec"
79-
" ,scale(sum#time.duration.ns,1e-9) as \"time\" unit sec" },
80-
{ "group by", "path,rocm.kernel.name,rocm.activity.kind" },
81-
{ "format", format } }
89+
{ { "let", local_let },
90+
{ "select", local_select },
91+
{ "group by", "path,rocm.kernel.name,rocm.activity.kind" },
92+
{ "format", format } }
8293
);
8394
}
8495

src/services/rocprofiler/RocProfiler.cpp

+16-2
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ class RocProfilerService
180180
Caliper c;
181181
c.begin(s_instance->m_flush_region_attr, Variant("ROCPROFILER FLUSH"));
182182

183+
Entry mpi_rank_entry;
184+
185+
{
186+
Attribute mpi_rank_attr = c.get_attribute("mpi.rank");
187+
if (mpi_rank_attr)
188+
mpi_rank_entry = c.get(mpi_rank_attr);
189+
}
190+
183191
for (size_t i = 0; i < num_headers; ++i) {
184192
auto* header = headers[i];
185193

@@ -216,8 +224,11 @@ class RocProfilerService
216224

217225
cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);
218226

219-
FixedSizeSnapshotRecord<6> snapshot;
227+
FixedSizeSnapshotRecord<8> snapshot;
220228
c.make_record(6, attr, data, snapshot.builder(), correlation);
229+
if (!mpi_rank_entry.empty())
230+
snapshot.builder().append(mpi_rank_entry);
231+
221232
s_instance->m_channel.events()
222233
.process_snapshot(&c, &s_instance->m_channel, SnapshotView(), snapshot.view());
223234

@@ -253,8 +264,11 @@ class RocProfilerService
253264

254265
cali::Node* correlation = static_cast<cali::Node*>(record->correlation_id.external.ptr);
255266

256-
FixedSizeSnapshotRecord<6> snapshot;
267+
FixedSizeSnapshotRecord<8> snapshot;
257268
c.make_record(6, attr, data, snapshot.builder(), correlation);
269+
if (!mpi_rank_entry.empty())
270+
snapshot.builder().append(mpi_rank_entry);
271+
258272
s_instance->m_channel.events()
259273
.process_snapshot(&c, &s_instance->m_channel, SnapshotView(), snapshot.view());
260274

0 commit comments

Comments
 (0)