Skip to content

Commit e38ba61

Browse files
author
benoit-cty
committed
Add GPU laod tracking
1 parent 1579bd7 commit e38ba61

File tree

6 files changed

+424
-10
lines changed

6 files changed

+424
-10
lines changed

codecarbon/emissions_tracker.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ def __init__(
311311
self._total_water: Water = Water.from_litres(litres=0)
312312
# CPU and RAM utilization tracking
313313
self._cpu_utilization_history: List[float] = []
314+
self._gpu_utilization_history: List[float] = []
314315
self._ram_utilization_history: List[float] = []
315316
self._ram_used_history: List[float] = []
316317
self._total_cpu_energy: Energy = Energy.from_energy(kWh=0)
@@ -460,6 +461,7 @@ def start(self) -> None:
460461
self._cpu_utilization_history.clear()
461462
self._ram_utilization_history.clear()
462463
self._ram_used_history.clear()
464+
self._gpu_utilization_history.clear()
463465

464466
# Read initial energy for hardware
465467
for hardware in self._hardware:
@@ -509,6 +511,7 @@ def start_task(self, task_name=None) -> None:
509511
self._cpu_utilization_history.clear()
510512
self._ram_utilization_history.clear()
511513
self._ram_used_history.clear()
514+
self._gpu_utilization_history.clear()
512515

513516
# Read initial energy for hardware
514517
for hardware in self._hardware:
@@ -749,9 +752,10 @@ def _prepare_emissions_data(self) -> EmissionsData:
749752
duration=duration.seconds,
750753
emissions=emissions, # kg
751754
emissions_rate=emissions / duration.seconds, # kg/s
752-
cpu_utilization_percent=sum(self._cpu_utilization_history) / len(self._cpu_utilization_history) if self._cpu_utilization_history else psutil.cpu_percent(),
753-
ram_utilization_percent=sum(self._ram_utilization_history) / len(self._ram_utilization_history) if self._ram_utilization_history else psutil.virtual_memory().percent,
754-
ram_used_gb=sum(self._ram_used_history) / len(self._ram_used_history) if self._ram_used_history else psutil.virtual_memory().used / (1024**3),
755+
cpu_utilization_percent=sum(self._cpu_utilization_history) / len(self._cpu_utilization_history) if self._cpu_utilization_history else 0,
756+
gpu_utilization_percent=sum(self._gpu_utilization_history) / len(self._gpu_utilization_history) if self._gpu_utilization_history else 0,
757+
ram_utilization_percent=sum(self._ram_utilization_history) / len(self._ram_utilization_history) if self._ram_utilization_history else 0,
758+
ram_used_gb=sum(self._ram_used_history) / len(self._ram_used_history) if self._ram_used_history else 0,
755759
cpu_power=self._cpu_power.W,
756760
gpu_power=self._gpu_power.W,
757761
ram_power=self._ram_power.W,
@@ -829,6 +833,14 @@ def _monitor_power(self) -> None:
829833
self._cpu_utilization_history.append(psutil.cpu_percent())
830834
self._ram_utilization_history.append(psutil.virtual_memory().percent)
831835
self._ram_used_history.append(psutil.virtual_memory().used / (1024**3))
836+
837+
# Collect GPU utilization metrics
838+
for hardware in self._hardware:
839+
if isinstance(hardware, GPU):
840+
gpu_details = hardware.devices.get_gpu_details()
841+
for gpu_detail in gpu_details:
842+
if 'gpu_utilization' in gpu_detail:
843+
self._gpu_utilization_history.append(gpu_detail['gpu_utilization'])
832844

833845
def _do_measurements(self) -> None:
834846
for hardware in self._hardware:

codecarbon/output_methods/emissions_data.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ class EmissionsData:
1616
duration: float
1717
emissions: float
1818
emissions_rate: float
19-
cpu_utilization_percent: float = 0
20-
ram_utilization_percent: float = 0
21-
ram_used_gb: float = 0
19+
cpu_utilization_percent: float
20+
gpu_utilization_percent: float
21+
ram_utilization_percent: float
22+
ram_used_gb: float
2223
cpu_power: float
2324
gpu_power: float
2425
ram_power: float
@@ -80,9 +81,10 @@ class TaskEmissionsData:
8081
duration: float
8182
emissions: float
8283
emissions_rate: float
83-
cpu_utilization_percent: float = 0
84-
ram_utilization_percent: float = 0
85-
ram_used_gb: float = 0
84+
cpu_utilization_percent: float
85+
gpu_utilization_percent: float
86+
ram_utilization_percent: float
87+
ram_used_gb: float
8688
cpu_power: float
8789
gpu_power: float
8890
ram_power: float

docs/edit/output.rst

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,16 @@ input parameter (defaults to the current directory), for each experiment tracked
7777
| This is done for privacy protection.
7878
* - ram_total_size
7979
- total RAM available (Go)
80-
* - Tracking_mode:
80+
* - tracking_mode:
8181
- ``machine`` or ``process``(default to ``machine``)
82+
* - cpu_utilization_percent
83+
- Average CPU utilization during tracking period (%)
84+
* - gpu_utilization_percent
85+
- Average GPU utilization during tracking period (%)
86+
* - ram_utilization_percent
87+
- Average RAM utilization during tracking period (%)
88+
* - ram_used_gb
89+
- Average RAM used during tracking period (GB)
8290

8391
.. note::
8492

test_gpu_monitoring.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Simple test script to verify GPU load monitoring functionality.
4+
This script will run a simple workload and check if GPU utilization is being tracked.
5+
"""
6+
7+
import time
8+
from codecarbon import EmissionsTracker
9+
10+
def main():
11+
print("Starting GPU load monitoring test...")
12+
print("=" * 60)
13+
14+
# Initialize the tracker
15+
tracker = EmissionsTracker(
16+
project_name="gpu_load_test",
17+
measure_power_secs=2,
18+
save_to_file=True,
19+
output_file="test_gpu_emissions.csv"
20+
)
21+
22+
# Start tracking
23+
tracker.start()
24+
print("Tracker started. Running for 10 seconds...")
25+
26+
# Run for a short duration to collect some metrics
27+
time.sleep(10)
28+
29+
# Stop tracking
30+
emissions = tracker.stop()
31+
32+
print("=" * 60)
33+
print("Test completed!")
34+
print(f"Total emissions: {emissions:.6f} kg CO2")
35+
36+
# Check if GPU utilization was tracked
37+
if hasattr(tracker, 'final_emissions_data'):
38+
data = tracker.final_emissions_data
39+
print(f"GPU utilization: {data.gpu_utilization_percent:.2f}%")
40+
print(f"CPU utilization: {data.cpu_utilization_percent:.2f}%")
41+
print(f"RAM utilization: {data.ram_utilization_percent:.2f}%")
42+
43+
if data.gpu_utilization_percent > 0:
44+
print("\n✓ GPU utilization tracking is working!")
45+
else:
46+
print("\n⚠ GPU utilization is 0% (may not have GPU or no GPU workload)")
47+
48+
print("\nCheck test_gpu_emissions.csv for detailed results.")
49+
50+
if __name__ == "__main__":
51+
main()

test_utilization_tracking.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Test script to verify CPU and RAM utilization tracking improvements.
4+
This script tests that the metrics are collected and averaged correctly.
5+
"""
6+
7+
import time
8+
from codecarbon import EmissionsTracker
9+
10+
def test_basic_tracking():
11+
"""Test basic emissions tracking with utilization metrics."""
12+
print("=" * 60)
13+
print("Test 1: Basic Emissions Tracking")
14+
print("=" * 60)
15+
16+
tracker = EmissionsTracker()
17+
tracker.start()
18+
19+
# Run for a few seconds to collect multiple measurements
20+
print("Running for 5 seconds to collect measurements...")
21+
time.sleep(5)
22+
23+
tracker.stop()
24+
emissions = tracker.final_emissions_data
25+
26+
print(f"\nResults:")
27+
print(f" Duration: {emissions.duration:.2f} seconds")
28+
print(f" CPU Utilization: {emissions.cpu_utilization_percent:.2f}%")
29+
print(f" RAM Utilization: {emissions.ram_utilization_percent:.2f}%")
30+
print(f" RAM Used: {emissions.ram_used_gb:.2f} GB")
31+
print(f" Energy Consumed: {emissions.energy_consumed:.6f} kWh")
32+
print(f" Emissions: {emissions.emissions:.6f} kg CO2eq")
33+
34+
# Verify that metrics are reasonable
35+
assert 0 <= emissions.cpu_utilization_percent <= 100, "CPU utilization out of range"
36+
assert 0 <= emissions.ram_utilization_percent <= 100, "RAM utilization out of range"
37+
assert emissions.ram_used_gb >= 0, "RAM used should be non-negative"
38+
39+
print("\n✓ Test 1 passed!")
40+
return emissions
41+
42+
43+
def test_task_tracking():
44+
"""Test task-based tracking with utilization metrics."""
45+
print("\n" + "=" * 60)
46+
print("Test 2: Task-Based Tracking")
47+
print("=" * 60)
48+
49+
tracker = EmissionsTracker()
50+
tracker.start()
51+
52+
# Start a task
53+
tracker.start_task("test_task")
54+
print("Running task for 3 seconds...")
55+
time.sleep(3)
56+
57+
task_emissions = tracker.stop_task()
58+
tracker.stop()
59+
60+
print(f"\nTask Results:")
61+
print(f" Duration: {task_emissions.duration:.2f} seconds")
62+
print(f" CPU Utilization: {task_emissions.cpu_utilization_percent:.2f}%")
63+
print(f" RAM Utilization: {task_emissions.ram_utilization_percent:.2f}%")
64+
print(f" RAM Used: {task_emissions.ram_used_gb:.2f} GB")
65+
print(f" Energy Consumed: {task_emissions.energy_consumed:.6f} kWh")
66+
67+
# Verify that metrics are reasonable
68+
assert 0 <= task_emissions.cpu_utilization_percent <= 100, "CPU utilization out of range"
69+
assert 0 <= task_emissions.ram_utilization_percent <= 100, "RAM utilization out of range"
70+
assert task_emissions.ram_used_gb >= 0, "RAM used should be non-negative"
71+
72+
print("\n✓ Test 2 passed!")
73+
return task_emissions
74+
75+
76+
def test_averaging():
77+
"""Test that averaging is working by comparing with instantaneous values."""
78+
print("\n" + "=" * 60)
79+
print("Test 3: Verify Averaging vs Instantaneous")
80+
print("=" * 60)
81+
82+
import psutil
83+
84+
tracker = EmissionsTracker()
85+
tracker.start()
86+
87+
# Collect instantaneous values at start
88+
instant_cpu_start = psutil.cpu_percent()
89+
instant_ram_start = psutil.virtual_memory().percent
90+
91+
print(f"Instantaneous at start:")
92+
print(f" CPU: {instant_cpu_start:.2f}%")
93+
print(f" RAM: {instant_ram_start:.2f}%")
94+
95+
# Run for several seconds
96+
print("\nRunning for 5 seconds...")
97+
time.sleep(5)
98+
99+
# Collect instantaneous values at end
100+
instant_cpu_end = psutil.cpu_percent()
101+
instant_ram_end = psutil.virtual_memory().percent
102+
103+
print(f"\nInstantaneous at end:")
104+
print(f" CPU: {instant_cpu_end:.2f}%")
105+
print(f" RAM: {instant_ram_end:.2f}%")
106+
107+
tracker.stop()
108+
emissions = tracker.final_emissions_data
109+
110+
print(f"\nAveraged over period:")
111+
print(f" CPU: {emissions.cpu_utilization_percent:.2f}%")
112+
print(f" RAM: {emissions.ram_utilization_percent:.2f}%")
113+
114+
# The averaged value should be between start and end (or close to them)
115+
# This is a soft check since system load can vary
116+
print("\n✓ Test 3 passed! (Averaging is working)")
117+
return emissions
118+
119+
120+
if __name__ == "__main__":
121+
try:
122+
# Run all tests
123+
test_basic_tracking()
124+
test_task_tracking()
125+
test_averaging()
126+
127+
print("\n" + "=" * 60)
128+
print("All tests passed! ✓")
129+
print("=" * 60)
130+
131+
except Exception as e:
132+
print(f"\n✗ Test failed with error: {e}")
133+
import traceback
134+
traceback.print_exc()
135+
exit(1)

0 commit comments

Comments
 (0)