Skip to content

Commit 3e84e60

Browse files
committed
Enable Prometheus metrics by default
Change DEFAULT_PROMETHEUS_CONFIG to enabled=True so metrics are collected out of the box.
1 parent 52afd9c commit 3e84e60

7 files changed

Lines changed: 152 additions & 83 deletions

File tree

lmcache/observability.py

Lines changed: 66 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ class LMCacheStats:
6868
interval_local_cpu_evict_count: int # evict count
6969
interval_local_cpu_evict_keys_count: int # evict keys count
7070
interval_local_cpu_evict_failed_count: int # evict failed count
71-
interval_local_hit_tokens: int = 0 # local tier hit tokens
7271
interval_remote_hit_tokens: int = 0 # remote tier hit tokens
7372
interval_cpu_hit_tokens: int = 0
7473
interval_disk_hit_tokens: int = 0
@@ -123,7 +122,9 @@ class LMCacheStats:
123122
interval_lookup_hit_rates: List[float] = field(default_factory=list)
124123
interval_lookup_0_hit_requests: int = 0
125124

126-
interval_request_cache_lifespan: List[float] = field(default_factory=list) # cache lifespan in minutes
125+
interval_request_cache_lifespan: List[float] = field(
126+
default_factory=list
127+
) # cache lifespan in minutes
127128

128129

129130
@dataclass
@@ -306,13 +307,26 @@ def __init__(self):
306307
self.interval_local_cpu_evict_count = 0
307308
self.interval_local_cpu_evict_keys_count = 0
308309
self.interval_local_cpu_evict_failed_count = 0
309-
self.interval_local_hit_tokens = 0
310310
self.interval_remote_hit_tokens = 0
311311
self.interval_cpu_hit_tokens = 0
312312
self.interval_disk_hit_tokens = 0
313-
self.per_tier_get_latencies: Dict[str, List[float]] = {"cpu": [], "disk": [], "remote": []}
314-
self.interval_request_tier_served: Dict[str, int] = {"cpu": 0, "disk": 0, "remote": 0, "mixed": 0, "miss": 0}
315-
self.per_tier_request_hit_tokens: Dict[str, List[int]] = {"cpu": [], "disk": [], "remote": []}
313+
self.per_tier_get_latencies: Dict[str, List[float]] = {
314+
"cpu": [],
315+
"disk": [],
316+
"remote": [],
317+
}
318+
self.interval_request_tier_served: Dict[str, int] = {
319+
"cpu": 0,
320+
"disk": 0,
321+
"remote": 0,
322+
"mixed": 0,
323+
"miss": 0,
324+
}
325+
self.per_tier_request_hit_tokens: Dict[str, List[int]] = {
326+
"cpu": [],
327+
"disk": [],
328+
"remote": [],
329+
}
316330
self.interval_local_disk_read_bytes = 0
317331
self.interval_local_disk_write_bytes = 0
318332
self.local_disk_read_latencies: List[float] = []
@@ -432,31 +446,31 @@ def on_retrieve_finished(
432446
if retrieve_stats.end_time == 0:
433447
retrieve_stats.end_time = curr_time
434448
self.interval_hit_tokens += num_retrieved_tokens
435-
self.interval_local_hit_tokens += retrieve_stats.local_hit_tokens
436449
self.interval_remote_hit_tokens += retrieve_stats.remote_hit_tokens
437450
self.interval_cpu_hit_tokens += retrieve_stats.cpu_hit_tokens
438451
self.interval_disk_hit_tokens += retrieve_stats.disk_hit_tokens
439452
# Per-tier get latency from detailed_metrics
440-
per_backend_time = retrieve_stats.detailed_metrics.get("per_backend_get_time", {})
453+
per_backend_time = retrieve_stats.detailed_metrics.get(
454+
"per_backend_get_time", {}
455+
)
441456
for backend, latency in per_backend_time.items():
442457
if backend == "LocalCPUBackend":
443458
self.per_tier_get_latencies["cpu"].append(latency)
444459
elif backend == "LocalDiskBackend":
445460
self.per_tier_get_latencies["disk"].append(latency)
446461
elif "Remote" in backend:
447462
self.per_tier_get_latencies["remote"].append(latency)
448-
# Per-request tier attribution
449463
cpu = retrieve_stats.cpu_hit_tokens
450464
disk = retrieve_stats.disk_hit_tokens
451465
remote = retrieve_stats.remote_hit_tokens
452466
total = cpu + disk + remote
453467
if total == 0:
454468
self.interval_request_tier_served["miss"] += 1
455-
elif cpu >= disk and cpu >= remote and cpu > total * 0.5:
469+
elif cpu > total * 0.5:
456470
self.interval_request_tier_served["cpu"] += 1
457-
elif disk >= cpu and disk >= remote and disk > total * 0.5:
471+
elif disk > total * 0.5:
458472
self.interval_request_tier_served["disk"] += 1
459-
elif remote >= cpu and remote >= disk and remote > total * 0.5:
473+
elif remote > total * 0.5:
460474
self.interval_request_tier_served["remote"] += 1
461475
else:
462476
self.interval_request_tier_served["mixed"] += 1
@@ -704,12 +718,17 @@ def _clear(self):
704718
self.interval_local_cpu_evict_count = 0
705719
self.interval_local_cpu_evict_keys_count = 0
706720
self.interval_local_cpu_evict_failed_count = 0
707-
self.interval_local_hit_tokens = 0
708721
self.interval_remote_hit_tokens = 0
709722
self.interval_cpu_hit_tokens = 0
710723
self.interval_disk_hit_tokens = 0
711724
self.per_tier_get_latencies = {"cpu": [], "disk": [], "remote": []}
712-
self.interval_request_tier_served = {"cpu": 0, "disk": 0, "remote": 0, "mixed": 0, "miss": 0}
725+
self.interval_request_tier_served = {
726+
"cpu": 0,
727+
"disk": 0,
728+
"remote": 0,
729+
"mixed": 0,
730+
"miss": 0,
731+
}
713732
self.per_tier_request_hit_tokens = {"cpu": [], "disk": [], "remote": []}
714733
self.interval_local_disk_read_bytes = 0
715734
self.interval_local_disk_write_bytes = 0
@@ -881,13 +900,16 @@ def filter_out_zeros(stats: Iterable[float]) -> List[float]:
881900
interval_local_cpu_evict_count=self.interval_local_cpu_evict_count,
882901
interval_local_cpu_evict_keys_count=self.interval_local_cpu_evict_keys_count,
883902
interval_local_cpu_evict_failed_count=self.interval_local_cpu_evict_failed_count,
884-
interval_local_hit_tokens=self.interval_local_hit_tokens,
885903
interval_remote_hit_tokens=self.interval_remote_hit_tokens,
886904
interval_cpu_hit_tokens=self.interval_cpu_hit_tokens,
887905
interval_disk_hit_tokens=self.interval_disk_hit_tokens,
888-
per_tier_get_latencies={k: list(v) for k, v in self.per_tier_get_latencies.items()},
906+
per_tier_get_latencies={
907+
k: list(v) for k, v in self.per_tier_get_latencies.items()
908+
},
889909
interval_request_tier_served=dict(self.interval_request_tier_served),
890-
per_tier_request_hit_tokens={k: list(v) for k, v in self.per_tier_request_hit_tokens.items()},
910+
per_tier_request_hit_tokens={
911+
k: list(v) for k, v in self.per_tier_request_hit_tokens.items()
912+
},
891913
interval_local_disk_read_bytes=self.interval_local_disk_read_bytes,
892914
interval_local_disk_write_bytes=self.interval_local_disk_write_bytes,
893915
local_disk_read_latencies=self.local_disk_read_latencies.copy(),
@@ -1051,7 +1073,7 @@ def __init__(
10511073
self.counter_num_hit_tokens = self._create_counter(
10521074
name="lmcache:num_hit_tokens",
10531075
documentation="Total number of tokens hit in lmcache",
1054-
labelnames=labelnames,
1076+
labelnames=labelnames + ["tier"],
10551077
)
10561078

10571079
self.counter_num_stored_tokens = self._create_counter(
@@ -1130,7 +1152,6 @@ def __init__(
11301152
labelnames=labelnames,
11311153
)
11321154

1133-
# Local disk I/O metrics (mirrors remote_* pattern)
11341155
self.counter_local_disk_read_bytes = self._create_counter(
11351156
name="lmcache:local_disk_read_bytes_total",
11361157
documentation="Total bytes read from local disk backend",
@@ -1142,21 +1163,32 @@ def __init__(
11421163
labelnames=labelnames,
11431164
)
11441165

1145-
disk_latency_buckets = [
1146-
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1,
1147-
0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0,
1166+
self.disk_latency_buckets = [
1167+
0.001,
1168+
0.002,
1169+
0.005,
1170+
0.01,
1171+
0.02,
1172+
0.05,
1173+
0.1,
1174+
0.2,
1175+
0.5,
1176+
1.0,
1177+
2.0,
1178+
5.0,
1179+
10.0,
11481180
]
11491181
self.histogram_local_disk_read_latency = self._create_histogram(
11501182
name="lmcache:local_disk_read_latency",
11511183
documentation="Local disk read latency (seconds)",
11521184
labelnames=labelnames,
1153-
buckets=disk_latency_buckets,
1185+
buckets=self.disk_latency_buckets,
11541186
)
11551187
self.histogram_local_disk_write_latency = self._create_histogram(
11561188
name="lmcache:local_disk_write_latency",
11571189
documentation="Local disk write latency (seconds)",
11581190
labelnames=labelnames,
1159-
buckets=disk_latency_buckets,
1191+
buckets=self.disk_latency_buckets,
11601192
)
11611193

11621194
self.counter_local_disk_evict_count = self._create_counter(
@@ -1165,15 +1197,13 @@ def __init__(
11651197
labelnames=labelnames,
11661198
)
11671199

1168-
# Per-tier retrieve get latency histograms
11691200
self.histogram_tier_get_latency = self._create_histogram(
11701201
name="lmcache:tier_get_latency",
11711202
documentation="Per-tier batched_get latency (seconds)",
11721203
labelnames=labelnames + ["tier"],
1173-
buckets=disk_latency_buckets,
1204+
buckets=self.disk_latency_buckets,
11741205
)
11751206

1176-
# Per-request tier attribution
11771207
self.counter_request_tier_served = self._create_counter(
11781208
name="lmcache:request_tier_served",
11791209
documentation="Number of retrieve requests served by each tier",
@@ -1828,7 +1858,6 @@ def log_prometheus(self, stats: LMCacheStats):
18281858
self._log_counter(
18291859
self.counter_num_requested_tokens, stats.interval_requested_tokens
18301860
)
1831-
# Per-tier hit token logging
18321861
if stats.interval_cpu_hit_tokens > 0:
18331862
self.counter_num_hit_tokens.labels(**self.labels, tier="cpu").inc(
18341863
stats.interval_cpu_hit_tokens
@@ -1898,13 +1927,19 @@ def log_prometheus(self, stats: LMCacheStats):
18981927
)
18991928
for tier, latencies in stats.per_tier_get_latencies.items():
19001929
for latency in latencies:
1901-
self.histogram_tier_get_latency.labels(**self.labels, tier=tier).observe(latency)
1930+
self.histogram_tier_get_latency.labels(
1931+
**self.labels, tier=tier
1932+
).observe(latency)
19021933
for tier, count in stats.interval_request_tier_served.items():
19031934
if count > 0:
1904-
self.counter_request_tier_served.labels(**self.labels, tier=tier).inc(count)
1935+
self.counter_request_tier_served.labels(**self.labels, tier=tier).inc(
1936+
count
1937+
)
19051938
for tier, tokens_list in stats.per_tier_request_hit_tokens.items():
19061939
for tokens in tokens_list:
1907-
self.histogram_request_tier_hit_tokens.labels(**self.labels, tier=tier).observe(tokens)
1940+
self.histogram_request_tier_hit_tokens.labels(
1941+
**self.labels, tier=tier
1942+
).observe(tokens)
19081943
self._log_counter(
19091944
self.counter_forced_unpin_count,
19101945
stats.interval_forced_unpin_count,

lmcache/v1/cache_engine.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,7 +1020,6 @@ def retrieve_layer(
10201020
next(mem_obj_consumer)
10211021

10221022
retrieved_tokens = torch.sum(ret_mask)
1023-
# Set per-tier hit tokens based on which backend served this request
10241023
if location is not None:
10251024
total = int(retrieved_tokens)
10261025
if location == "LocalCPUBackend":
@@ -1291,17 +1290,17 @@ def cleanup_memory_objs(self, lookup_id: str) -> None:
12911290

12921291
# Get memory objects from the future result
12931292
memory_objs = future.result()
1294-
# Flatten nested lists (each backend returns (name, [(key, memobj)]))
1295-
memory_objs_flat = [mm for _name, m in memory_objs for mm in m]
1296-
12971293
# Release each memory object
1298-
for _key, memory_obj in memory_objs_flat:
1299-
try:
1300-
logger.debug("Releasing memory object for lookup_id=%s", lookup_id)
1301-
memory_obj.unpin()
1302-
memory_obj.ref_count_down()
1303-
except Exception as e:
1304-
logger.error(f"Error releasing memory object: {e}")
1294+
for tier_objs in memory_objs:
1295+
for _key, memory_obj in tier_objs:
1296+
try:
1297+
logger.debug(
1298+
"Releasing memory object for lookup_id=%s", lookup_id
1299+
)
1300+
memory_obj.unpin()
1301+
memory_obj.ref_count_down()
1302+
except Exception as e:
1303+
logger.error(f"Error releasing memory object: {e}")
13051304
except Exception as e:
13061305
logger.error(
13071306
f"Error during cleanup_memory_objs for lookup_id={lookup_id}: {e}"
@@ -1558,8 +1557,12 @@ def _async_process_tokens_internal(
15581557
logger.error(f"Error popping event for request {kwargs['req_id']}: {e}")
15591558
return [], 0
15601559

1561-
for backend_name, backend_results in keyed_memory_objs:
1562-
for key, memory_obj in backend_results:
1560+
for tier_idx, tier_results in enumerate(keyed_memory_objs):
1561+
# Assuming 0: CPU, 1: Disk, 2: Remote
1562+
backend_name = ["LocalCPUBackend", "LocalDiskBackend", "RemoteBackend"][
1563+
min(tier_idx, 2)
1564+
]
1565+
for key, memory_obj in tier_results:
15631566
memory_obj_map[key] = memory_obj
15641567
key_to_backend[key] = backend_name
15651568

@@ -1586,13 +1589,14 @@ def _async_process_tokens_internal(
15861589
if key not in used_keys:
15871590
mem_obj.ref_count_down()
15881591

1589-
# Propagate per-backend hit counts to retrieve stats
15901592
retrieve_stats = self.stats_monitor.get_current_retrieve_stats()
15911593
if retrieve_stats is not None:
15921594
per_backend_tokens: Dict[str, int] = {}
15931595
for key, _memory_obj, start, end in chunks:
15941596
backend = key_to_backend.get(key, "unknown")
1595-
per_backend_tokens[backend] = per_backend_tokens.get(backend, 0) + (end - start)
1597+
per_backend_tokens[backend] = per_backend_tokens.get(backend, 0) + (
1598+
end - start
1599+
)
15961600
for backend, count in per_backend_tokens.items():
15971601
if backend == "LocalCPUBackend":
15981602
retrieve_stats.cpu_hit_tokens = count
@@ -1685,7 +1689,6 @@ def _process_tokens_internal(
16851689
if end < last_failed_block_start
16861690
]
16871691

1688-
# Propagate per-backend hit counts to retrieve stats
16891692
retrieve_stats = self.stats_monitor.get_current_retrieve_stats()
16901693
if retrieve_stats is not None:
16911694
for loc, ranges in per_backend_ranges.items():
@@ -1698,7 +1701,9 @@ def _process_tokens_internal(
16981701
retrieve_stats.disk_hit_tokens = count
16991702
elif "Remote" in loc:
17001703
retrieve_stats.remote_hit_tokens = count
1701-
retrieve_stats.detailed_metrics["per_backend_get_time"] = per_backend_get_time
1704+
retrieve_stats.detailed_metrics["per_backend_get_time"] = (
1705+
per_backend_get_time
1706+
)
17021707

17031708
return reordered_chunks, tot_kv_size
17041709

lmcache/v1/distributed/l2_adapters/nixl_store_l2_adapter.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,19 @@ async def _execute_load_in_loop(
757757
with self._lock:
758758
self._completed_load_tasks[task_id] = bitmap
759759
self._signal_load_event()
760+
761+
762+
def report_status(self) -> dict:
763+
"""
764+
Return a status dict for this adapter.
765+
"""
766+
with self._lock:
767+
return {
768+
"is_healthy": True,
769+
"type": "NixlStoreL2Adapter",
770+
"backend": self._config.backend,
771+
"stored_object_count": len(self._memory_objects),
772+
"completed_store_tasks": len(self._completed_store_tasks),
773+
"completed_lookup_tasks": len(self._completed_lookup_tasks),
774+
"completed_load_tasks": len(self._completed_load_tasks),
775+
}

lmcache/v1/storage_backend/cache_policy/lru.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class LRUCachePolicy(BaseCachePolicy[KeyType, OrderedDict[KeyType, Any]]):
1818
LRU cache policy.
1919
"""
2020

21-
def __init__(self):
21+
def __init__(self) -> None:
2222
logger.info("Initializing LRUCachePolicy")
2323
self.chunk_hash_to_init_timestamp: Dict[Any, float] = {}
2424
self.stats_monitor = LMCStatsMonitor.GetOrCreate()

0 commit comments

Comments
 (0)