@@ -68,7 +68,6 @@ class LMCacheStats:
6868 interval_local_cpu_evict_count : int # evict count
6969 interval_local_cpu_evict_keys_count : int # evict keys count
7070 interval_local_cpu_evict_failed_count : int # evict failed count
71- interval_local_hit_tokens : int = 0 # local tier hit tokens
7271 interval_remote_hit_tokens : int = 0 # remote tier hit tokens
7372 interval_cpu_hit_tokens : int = 0
7473 interval_disk_hit_tokens : int = 0
@@ -123,7 +122,9 @@ class LMCacheStats:
123122 interval_lookup_hit_rates : List [float ] = field (default_factory = list )
124123 interval_lookup_0_hit_requests : int = 0
125124
126- interval_request_cache_lifespan : List [float ] = field (default_factory = list ) # cache lifespan in minutes
125+ interval_request_cache_lifespan : List [float ] = field (
126+ default_factory = list
127+ ) # cache lifespan in minutes
127128
128129
129130@dataclass
@@ -306,13 +307,26 @@ def __init__(self):
306307 self .interval_local_cpu_evict_count = 0
307308 self .interval_local_cpu_evict_keys_count = 0
308309 self .interval_local_cpu_evict_failed_count = 0
309- self .interval_local_hit_tokens = 0
310310 self .interval_remote_hit_tokens = 0
311311 self .interval_cpu_hit_tokens = 0
312312 self .interval_disk_hit_tokens = 0
313- self .per_tier_get_latencies : Dict [str , List [float ]] = {"cpu" : [], "disk" : [], "remote" : []}
314- self .interval_request_tier_served : Dict [str , int ] = {"cpu" : 0 , "disk" : 0 , "remote" : 0 , "mixed" : 0 , "miss" : 0 }
315- self .per_tier_request_hit_tokens : Dict [str , List [int ]] = {"cpu" : [], "disk" : [], "remote" : []}
313+ self .per_tier_get_latencies : Dict [str , List [float ]] = {
314+ "cpu" : [],
315+ "disk" : [],
316+ "remote" : [],
317+ }
318+ self .interval_request_tier_served : Dict [str , int ] = {
319+ "cpu" : 0 ,
320+ "disk" : 0 ,
321+ "remote" : 0 ,
322+ "mixed" : 0 ,
323+ "miss" : 0 ,
324+ }
325+ self .per_tier_request_hit_tokens : Dict [str , List [int ]] = {
326+ "cpu" : [],
327+ "disk" : [],
328+ "remote" : [],
329+ }
316330 self .interval_local_disk_read_bytes = 0
317331 self .interval_local_disk_write_bytes = 0
318332 self .local_disk_read_latencies : List [float ] = []
@@ -432,31 +446,31 @@ def on_retrieve_finished(
432446 if retrieve_stats .end_time == 0 :
433447 retrieve_stats .end_time = curr_time
434448 self .interval_hit_tokens += num_retrieved_tokens
435- self .interval_local_hit_tokens += retrieve_stats .local_hit_tokens
436449 self .interval_remote_hit_tokens += retrieve_stats .remote_hit_tokens
437450 self .interval_cpu_hit_tokens += retrieve_stats .cpu_hit_tokens
438451 self .interval_disk_hit_tokens += retrieve_stats .disk_hit_tokens
439452 # Per-tier get latency from detailed_metrics
440- per_backend_time = retrieve_stats .detailed_metrics .get ("per_backend_get_time" , {})
453+ per_backend_time = retrieve_stats .detailed_metrics .get (
454+ "per_backend_get_time" , {}
455+ )
441456 for backend , latency in per_backend_time .items ():
442457 if backend == "LocalCPUBackend" :
443458 self .per_tier_get_latencies ["cpu" ].append (latency )
444459 elif backend == "LocalDiskBackend" :
445460 self .per_tier_get_latencies ["disk" ].append (latency )
446461 elif "Remote" in backend :
447462 self .per_tier_get_latencies ["remote" ].append (latency )
448- # Per-request tier attribution
449463 cpu = retrieve_stats .cpu_hit_tokens
450464 disk = retrieve_stats .disk_hit_tokens
451465 remote = retrieve_stats .remote_hit_tokens
452466 total = cpu + disk + remote
453467 if total == 0 :
454468 self .interval_request_tier_served ["miss" ] += 1
455- elif cpu >= disk and cpu >= remote and cpu > total * 0.5 :
469+ elif cpu > total * 0.5 :
456470 self .interval_request_tier_served ["cpu" ] += 1
457- elif disk >= cpu and disk >= remote and disk > total * 0.5 :
471+ elif disk > total * 0.5 :
458472 self .interval_request_tier_served ["disk" ] += 1
459- elif remote >= cpu and remote >= disk and remote > total * 0.5 :
473+ elif remote > total * 0.5 :
460474 self .interval_request_tier_served ["remote" ] += 1
461475 else :
462476 self .interval_request_tier_served ["mixed" ] += 1
@@ -704,12 +718,17 @@ def _clear(self):
704718 self .interval_local_cpu_evict_count = 0
705719 self .interval_local_cpu_evict_keys_count = 0
706720 self .interval_local_cpu_evict_failed_count = 0
707- self .interval_local_hit_tokens = 0
708721 self .interval_remote_hit_tokens = 0
709722 self .interval_cpu_hit_tokens = 0
710723 self .interval_disk_hit_tokens = 0
711724 self .per_tier_get_latencies = {"cpu" : [], "disk" : [], "remote" : []}
712- self .interval_request_tier_served = {"cpu" : 0 , "disk" : 0 , "remote" : 0 , "mixed" : 0 , "miss" : 0 }
725+ self .interval_request_tier_served = {
726+ "cpu" : 0 ,
727+ "disk" : 0 ,
728+ "remote" : 0 ,
729+ "mixed" : 0 ,
730+ "miss" : 0 ,
731+ }
713732 self .per_tier_request_hit_tokens = {"cpu" : [], "disk" : [], "remote" : []}
714733 self .interval_local_disk_read_bytes = 0
715734 self .interval_local_disk_write_bytes = 0
@@ -881,13 +900,16 @@ def filter_out_zeros(stats: Iterable[float]) -> List[float]:
881900 interval_local_cpu_evict_count = self .interval_local_cpu_evict_count ,
882901 interval_local_cpu_evict_keys_count = self .interval_local_cpu_evict_keys_count ,
883902 interval_local_cpu_evict_failed_count = self .interval_local_cpu_evict_failed_count ,
884- interval_local_hit_tokens = self .interval_local_hit_tokens ,
885903 interval_remote_hit_tokens = self .interval_remote_hit_tokens ,
886904 interval_cpu_hit_tokens = self .interval_cpu_hit_tokens ,
887905 interval_disk_hit_tokens = self .interval_disk_hit_tokens ,
888- per_tier_get_latencies = {k : list (v ) for k , v in self .per_tier_get_latencies .items ()},
906+ per_tier_get_latencies = {
907+ k : list (v ) for k , v in self .per_tier_get_latencies .items ()
908+ },
889909 interval_request_tier_served = dict (self .interval_request_tier_served ),
890- per_tier_request_hit_tokens = {k : list (v ) for k , v in self .per_tier_request_hit_tokens .items ()},
910+ per_tier_request_hit_tokens = {
911+ k : list (v ) for k , v in self .per_tier_request_hit_tokens .items ()
912+ },
891913 interval_local_disk_read_bytes = self .interval_local_disk_read_bytes ,
892914 interval_local_disk_write_bytes = self .interval_local_disk_write_bytes ,
893915 local_disk_read_latencies = self .local_disk_read_latencies .copy (),
@@ -1051,7 +1073,7 @@ def __init__(
10511073 self .counter_num_hit_tokens = self ._create_counter (
10521074 name = "lmcache:num_hit_tokens" ,
10531075 documentation = "Total number of tokens hit in lmcache" ,
1054- labelnames = labelnames ,
1076+ labelnames = labelnames + [ "tier" ] ,
10551077 )
10561078
10571079 self .counter_num_stored_tokens = self ._create_counter (
@@ -1130,7 +1152,6 @@ def __init__(
11301152 labelnames = labelnames ,
11311153 )
11321154
1133- # Local disk I/O metrics (mirrors remote_* pattern)
11341155 self .counter_local_disk_read_bytes = self ._create_counter (
11351156 name = "lmcache:local_disk_read_bytes_total" ,
11361157 documentation = "Total bytes read from local disk backend" ,
@@ -1142,21 +1163,32 @@ def __init__(
11421163 labelnames = labelnames ,
11431164 )
11441165
1145- disk_latency_buckets = [
1146- 0.001 , 0.005 , 0.01 , 0.02 , 0.04 , 0.06 , 0.08 , 0.1 ,
1147- 0.25 , 0.5 , 0.75 , 1.0 , 2.5 , 5.0 , 7.5 , 10.0 ,
1166+ self .disk_latency_buckets = [
1167+ 0.001 ,
1168+ 0.002 ,
1169+ 0.005 ,
1170+ 0.01 ,
1171+ 0.02 ,
1172+ 0.05 ,
1173+ 0.1 ,
1174+ 0.2 ,
1175+ 0.5 ,
1176+ 1.0 ,
1177+ 2.0 ,
1178+ 5.0 ,
1179+ 10.0 ,
11481180 ]
11491181 self .histogram_local_disk_read_latency = self ._create_histogram (
11501182 name = "lmcache:local_disk_read_latency" ,
11511183 documentation = "Local disk read latency (seconds)" ,
11521184 labelnames = labelnames ,
1153- buckets = disk_latency_buckets ,
1185+ buckets = self . disk_latency_buckets ,
11541186 )
11551187 self .histogram_local_disk_write_latency = self ._create_histogram (
11561188 name = "lmcache:local_disk_write_latency" ,
11571189 documentation = "Local disk write latency (seconds)" ,
11581190 labelnames = labelnames ,
1159- buckets = disk_latency_buckets ,
1191+ buckets = self . disk_latency_buckets ,
11601192 )
11611193
11621194 self .counter_local_disk_evict_count = self ._create_counter (
@@ -1165,15 +1197,13 @@ def __init__(
11651197 labelnames = labelnames ,
11661198 )
11671199
1168- # Per-tier retrieve get latency histograms
11691200 self .histogram_tier_get_latency = self ._create_histogram (
11701201 name = "lmcache:tier_get_latency" ,
11711202 documentation = "Per-tier batched_get latency (seconds)" ,
11721203 labelnames = labelnames + ["tier" ],
1173- buckets = disk_latency_buckets ,
1204+ buckets = self . disk_latency_buckets ,
11741205 )
11751206
1176- # Per-request tier attribution
11771207 self .counter_request_tier_served = self ._create_counter (
11781208 name = "lmcache:request_tier_served" ,
11791209 documentation = "Number of retrieve requests served by each tier" ,
@@ -1828,7 +1858,6 @@ def log_prometheus(self, stats: LMCacheStats):
18281858 self ._log_counter (
18291859 self .counter_num_requested_tokens , stats .interval_requested_tokens
18301860 )
1831- # Per-tier hit token logging
18321861 if stats .interval_cpu_hit_tokens > 0 :
18331862 self .counter_num_hit_tokens .labels (** self .labels , tier = "cpu" ).inc (
18341863 stats .interval_cpu_hit_tokens
@@ -1898,13 +1927,19 @@ def log_prometheus(self, stats: LMCacheStats):
18981927 )
18991928 for tier , latencies in stats .per_tier_get_latencies .items ():
19001929 for latency in latencies :
1901- self .histogram_tier_get_latency .labels (** self .labels , tier = tier ).observe (latency )
1930+ self .histogram_tier_get_latency .labels (
1931+ ** self .labels , tier = tier
1932+ ).observe (latency )
19021933 for tier , count in stats .interval_request_tier_served .items ():
19031934 if count > 0 :
1904- self .counter_request_tier_served .labels (** self .labels , tier = tier ).inc (count )
1935+ self .counter_request_tier_served .labels (** self .labels , tier = tier ).inc (
1936+ count
1937+ )
19051938 for tier , tokens_list in stats .per_tier_request_hit_tokens .items ():
19061939 for tokens in tokens_list :
1907- self .histogram_request_tier_hit_tokens .labels (** self .labels , tier = tier ).observe (tokens )
1940+ self .histogram_request_tier_hit_tokens .labels (
1941+ ** self .labels , tier = tier
1942+ ).observe (tokens )
19081943 self ._log_counter (
19091944 self .counter_forced_unpin_count ,
19101945 stats .interval_forced_unpin_count ,
0 commit comments