@@ -129,20 +129,20 @@ def _cost_function(
129
129
print ("[ROUTER] metrics.endpoint " , metrics .endpoints )
130
130
for endpoint in metrics .endpoints :
131
131
worker_id = endpoint .worker_id
132
- worker_metrics [worker_id ] = {
133
- "gpu_cache_usage_perc" : endpoint .gpu_cache_usage_perc
134
- if hasattr (endpoint , "gpu_cache_usage_perc" )
135
- else 0.0 ,
136
- "num_requests_waiting" : endpoint .num_requests_waiting
137
- if hasattr (endpoint , "num_requests_waiting" )
138
- else 0.0 ,
139
- "gpu_prefix_cache_hit_rate" : endpoint .gpu_prefix_cache_hit_rate
140
- if hasattr (endpoint , "gpu_prefix_cache_hit_rate" )
141
- else 0.0 ,
142
- }
143
- max_waiting = max (
144
- max_waiting , worker_metrics [worker_id ]["num_requests_waiting" ]
145
- )
132
+ worker_metrics [worker_id ] = {
133
+ "gpu_cache_usage_perc" : endpoint .gpu_cache_usage_perc
134
+ if hasattr (endpoint , "gpu_cache_usage_perc" )
135
+ else 0.0 ,
136
+ "num_requests_waiting" : endpoint .num_requests_waiting
137
+ if hasattr (endpoint , "num_requests_waiting" )
138
+ else 0.0 ,
139
+ "gpu_prefix_cache_hit_rate" : endpoint .gpu_prefix_cache_hit_rate
140
+ if hasattr (endpoint , "gpu_prefix_cache_hit_rate" )
141
+ else 0.0 ,
142
+ }
143
+ max_waiting = max (
144
+ max_waiting , worker_metrics [worker_id ]["num_requests_waiting" ]
145
+ )
146
146
147
147
# Get all worker IDs from the client. This is needed because scores / metrics may not have values for all workers
148
148
# and we want all workers to be considered in the logit calculation
0 commit comments