Skip to content

Commit 06e41ae

Browse files
committed
fix: address review comments, especially about setting up metric unit classes
1 parent 82b524f commit 06e41ae

File tree

5 files changed

+87
-9
lines changed

5 files changed

+87
-9
lines changed

aiperf/common/enums/metric_enums.py

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,19 +198,97 @@ class GenericMetricUnit(BaseMetricUnit):
198198
PERCENT = _unit("%")
199199

200200

201+
class PowerMetricUnitInfo(BaseMetricUnitInfo):
202+
"""Information about a power unit for metrics."""
203+
204+
long_name: str
205+
watts: float
206+
207+
def convert_to(self, other_unit: "MetricUnitT", value: int | float) -> float:
208+
"""Convert a value from this unit to another unit."""
209+
if not isinstance(other_unit, PowerMetricUnit | PowerMetricUnitInfo):
210+
return super().convert_to(other_unit, value)
211+
212+
return value * (self.watts / other_unit.watts)
213+
214+
201215
class PowerMetricUnit(BaseMetricUnit):
202216
"""Defines power units for metrics."""
203217

204-
WATT = _unit("W")
205-
MILLIWATT = _unit("mW")
218+
WATT = PowerMetricUnitInfo(
219+
tag="W",
220+
long_name="watts",
221+
watts=1.0,
222+
)
223+
MILLIWATT = PowerMetricUnitInfo(
224+
tag="mW",
225+
long_name="milliwatts",
226+
watts=0.001,
227+
)
228+
229+
@cached_property
230+
def info(self) -> PowerMetricUnitInfo:
231+
"""Get the info for the power unit."""
232+
return self._info # type: ignore
233+
234+
@cached_property
235+
def watts(self) -> float:
236+
"""The number of watts in the power unit."""
237+
return self.info.watts
238+
239+
@cached_property
240+
def long_name(self) -> str:
241+
"""The long name of the power unit."""
242+
return self.info.long_name
243+
244+
245+
class EnergyMetricUnitInfo(BaseMetricUnitInfo):
246+
"""Information about an energy unit for metrics."""
247+
248+
long_name: str
249+
joules: float
250+
251+
def convert_to(self, other_unit: "MetricUnitT", value: int | float) -> float:
252+
"""Convert a value from this unit to another unit."""
253+
if not isinstance(other_unit, EnergyMetricUnit | EnergyMetricUnitInfo):
254+
return super().convert_to(other_unit, value)
255+
256+
return value * (self.joules / other_unit.joules)
206257

207258

208259
class EnergyMetricUnit(BaseMetricUnit):
209260
"""Defines energy units for metrics."""
210261

211-
JOULE = _unit("J")
212-
MILLIJOULE = _unit("mJ")
213-
MEGAJOULE = _unit("MJ")
262+
JOULE = EnergyMetricUnitInfo(
263+
tag="J",
264+
long_name="joules",
265+
joules=1.0,
266+
)
267+
MILLIJOULE = EnergyMetricUnitInfo(
268+
tag="mJ",
269+
long_name="millijoules",
270+
joules=0.001,
271+
)
272+
MEGAJOULE = EnergyMetricUnitInfo(
273+
tag="MJ",
274+
long_name="megajoules",
275+
joules=1_000_000.0,
276+
)
277+
278+
@cached_property
279+
def info(self) -> EnergyMetricUnitInfo:
280+
"""Get the info for the energy unit."""
281+
return self._info # type: ignore
282+
283+
@cached_property
284+
def joules(self) -> float:
285+
"""The number of joules in the energy unit."""
286+
return self.info.joules
287+
288+
@cached_property
289+
def long_name(self) -> str:
290+
"""The long name of the energy unit."""
291+
return self.info.long_name
214292

215293

216294
class MetricDateTimeUnit(BaseMetricUnit):

aiperf/common/enums/service_enums.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class ServiceType(CaseInsensitiveStrEnum):
4646
WORKER = "worker"
4747
TELEMETRY_MANAGER = "telemetry_manager"
4848

49-
# [AIP-331] TODO: Remove when CLI argument is added to AI perf (–server-metrics-url) and telemetry_data_collector.py no longer requires local testing via main().
49+
# [AIP-331] TODO: @ilana-n Remove when CLI argument is added to AI perf (–server-metrics-url) and telemetry_data_collector.py no longer requires local testing via main().
5050
TELEMETRY_COLLECTOR = "telemetry_collector"
5151

5252
# For testing purposes only

aiperf/common/models/telemetry_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class TelemetryRecord(AIPerfBaseModel):
1818
"""
1919

2020
timestamp_ns: int = Field(
21-
description="Nanosecond timestamp when telemetry was collected (perf_counter_ns)"
21+
description="Nanosecond wall-clock timestamp when telemetry was collected (time_ns)"
2222
)
2323

2424
dcgm_url: str = Field(

aiperf/gpu_telemetry/telemetry_data_collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def _parse_metrics_to_records(self, metrics_data: str) -> list[TelemetryRecord]:
155155
self._logger.warning("Response from DCGM metrics endpoint is empty")
156156
return []
157157

158-
current_timestamp = time.perf_counter_ns()
158+
current_timestamp = time.time_ns()
159159
gpu_data = {}
160160
gpu_metadata = {}
161161

aiperf/gpu_telemetry/telemetry_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def __init__(
6262

6363
self._collectors: dict[str, TelemetryDataCollector] = {}
6464

65-
# TODO: Get from user_config when telemetry config is added
65+
# [AIP-280] TODO: @ilana-n Get from user_config when telemetry config is added to CLI argument
6666
self._dcgm_endpoints = [DEFAULT_DCGM_ENDPOINT]
6767
self._collection_interval = DEFAULT_COLLECTION_INTERVAL
6868

0 commit comments

Comments
 (0)