From 21acd2e2964685568731a4ecd69a6081e535fb3d Mon Sep 17 00:00:00 2001 From: krataratha Date: Wed, 13 May 2026 18:18:43 +0530 Subject: [PATCH] Refactor health monitoring logic and snapshot collection --- src/health/monitor.ts | 61 +++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/src/health/monitor.ts b/src/health/monitor.ts index 953c94a0..170a2a2d 100644 --- a/src/health/monitor.ts +++ b/src/health/monitor.ts @@ -1,39 +1,38 @@ -import type { ISdk } from "iii-sdk"; -import type { HealthSnapshot } from "../types.js"; -import type { StateKV } from "../state/kv.js"; -import { KV } from "../state/schema.js"; -import { evaluateHealth } from "./thresholds.js"; + const endMark = performance.now(); + const eventLoopLag = endMark - startMark; -export function registerHealthMonitor( - sdk: ISdk, - kv: StateKV, -): { stop: () => void } { - let connectionState = "connected"; - let prevCpuUsage = process.cpuUsage(); - let prevCpuTime = Date.now(); + const snapshot: HealthSnapshot = { + cpuUsage: cpuPercent, + memoryRss: mem.rss / 1024 / 1024, + memoryHeapUsed: mem.heapUsed / 1024 / 1024, + eventLoopLag, + uptime, + connectionState, + timestamp: now, + }; - if (typeof sdk.on === "function") { - sdk.on("connection_state", (state?: unknown) => { - connectionState = state as string; - }); - } + const status = evaluateHealth(snapshot); + + // Feature: Persistence & State-Aware Alerting + const lastStatus = await kv.get(KV.LAST_HEALTH_STATUS); + if (status.isCritical && lastStatus !== "critical") { + sdk.emit?.("health_alert", { snapshot, status }); + } - async function collectHealth(): Promise { - const mem = process.memoryUsage(); - const currentCpu = process.cpuUsage(); - const now = Date.now(); - const uptime = process.uptime(); + await kv.set(KV.LATEST_HEALTH, snapshot); + await kv.set(KV.LAST_HEALTH_STATUS, status.level); - const elapsedMs = now - prevCpuTime; - const userDelta = currentCpu.user - prevCpuUsage.user; - const systemDelta = currentCpu.system - prevCpuUsage.system; - const cpuPercent = - elapsedMs > 0 ? ((userDelta + systemDelta) / 1000 / elapsedMs) * 100 : 0; - prevCpuUsage = currentCpu; - prevCpuTime = now; + return snapshot; + } - const startMark = performance.now(); - await new Promise((resolve) => setImmediate(resolve)); + const interval = setInterval(collectHealth, 5000); + return { + stop: () => { + clearInterval(interval); + sdk.emit?.("monitor_stopped", { at: Date.now() }); + } + }; +} const eventLoopLagMs = performance.now() - startMark; let workers: HealthSnapshot["workers"] = [];