@@ -6,6 +6,7 @@ package server_test
66import (
77 "context"
88 "encoding/json"
9+ "errors"
910 "net/http"
1011 "testing"
1112 "time"
@@ -19,6 +20,7 @@ import (
1920 "github.com/stacklok/toolhive/pkg/vmcp/aggregator"
2021 authtypes "github.com/stacklok/toolhive/pkg/vmcp/auth/types"
2122 discoveryMocks "github.com/stacklok/toolhive/pkg/vmcp/discovery/mocks"
23+ "github.com/stacklok/toolhive/pkg/vmcp/health"
2224 "github.com/stacklok/toolhive/pkg/vmcp/mocks"
2325 "github.com/stacklok/toolhive/pkg/vmcp/router"
2426 "github.com/stacklok/toolhive/pkg/vmcp/server"
@@ -40,66 +42,15 @@ type BackendStatus struct {
4042 AuthType string `json:"auth_type,omitempty"`
4143}
4244
43- // createTestServerWithBackends creates a test server instance with custom backends.
45+ // createTestServerWithBackends creates a test server instance with custom backends
46+ // and no health monitoring. It is a convenience wrapper around createTestServerWithHealthMonitor.
4447func createTestServerWithBackends (t * testing.T , backends []vmcp.Backend , groupRef string ) * server.Server {
4548 t .Helper ()
46-
47- ctrl := gomock .NewController (t )
48- t .Cleanup (ctrl .Finish )
49-
50- mockBackendClient := mocks .NewMockBackendClient (ctrl )
51- mockDiscoveryMgr := discoveryMocks .NewMockManager (ctrl )
52- rt := router .NewDefaultRouter ()
53-
54- port := networking .FindAvailable ()
55- require .NotZero (t , port , "Failed to find available port" )
56-
57- mockDiscoveryMgr .EXPECT ().
58- Discover (gomock .Any (), gomock .Any ()).
59- Return (& aggregator.AggregatedCapabilities {
60- Tools : []vmcp.Tool {},
61- Resources : []vmcp.Resource {},
62- Prompts : []vmcp.Prompt {},
63- RoutingTable : & vmcp.RoutingTable {
64- Tools : make (map [string ]* vmcp.BackendTarget ),
65- Resources : make (map [string ]* vmcp.BackendTarget ),
66- Prompts : make (map [string ]* vmcp.BackendTarget ),
67- },
68- Metadata : & aggregator.AggregationMetadata {},
69- }, nil ).
70- AnyTimes ()
71- mockDiscoveryMgr .EXPECT ().Stop ().AnyTimes ()
72-
73- ctx , cancel := context .WithCancel (t .Context ())
74-
75- srv , err := server .New (ctx , & server.Config {
76- Name : "test-vmcp" ,
77- Version : "1.0.0" ,
78- Host : "127.0.0.1" ,
79- Port : port ,
80- GroupRef : groupRef ,
81- SessionFactory : newNoopMockFactory (t ),
82- }, rt , mockBackendClient , mockDiscoveryMgr , vmcp .NewImmutableRegistry (backends ), nil )
83- require .NoError (t , err )
84-
85- t .Cleanup (cancel )
86- errCh := make (chan error , 1 )
87- go func () {
88- if err := srv .Start (ctx ); err != nil {
89- errCh <- err
90- }
91- }()
92-
93- select {
94- case <- srv .Ready ():
95- case err := <- errCh :
96- t .Fatalf ("Server failed to start: %v" , err )
97- case <- time .After (5 * time .Second ):
98- t .Fatalf ("Server did not become ready within 5s (address: %s)" , srv .Address ())
99- }
100-
101- time .Sleep (10 * time .Millisecond )
102- return srv
49+ return createTestServerWithHealthMonitor (t , backends ,
50+ health.MonitorConfig {}, // zero value → no health monitor started
51+ nil , // no mock expectations needed
52+ groupRef ,
53+ )
10354}
10455
10556func TestStatusEndpoint_HTTPBehavior (t * testing.T ) {
@@ -242,3 +193,221 @@ func TestStatusEndpoint_BackendFieldMapping(t *testing.T) {
242193 assert .Equal (t , "streamable-http" , b .Transport )
243194 assert .Equal (t , authtypes .StrategyTypeTokenExchange , b .AuthType )
244195}
196+
197+ // createTestServerWithHealthMonitor creates a test server with health monitoring enabled.
198+ // setupMock configures mock expectations on the backend client (e.g. ListCapabilities responses for health checks).
199+ // groupRef is set in the server config (empty string is fine for tests that don't need it).
200+ func createTestServerWithHealthMonitor (
201+ t * testing.T ,
202+ backends []vmcp.Backend ,
203+ monitorCfg health.MonitorConfig ,
204+ setupMock func (mockClient * mocks.MockBackendClient ),
205+ groupRef string ,
206+ ) * server.Server {
207+ t .Helper ()
208+
209+ // ctrl.Finish must run last so that all mock calls have already stopped.
210+ // t.Cleanup is LIFO, so register it first — it will execute third.
211+ ctrl := gomock .NewController (t )
212+ t .Cleanup (ctrl .Finish )
213+
214+ mockBackendClient := mocks .NewMockBackendClient (ctrl )
215+ mockDiscoveryMgr := discoveryMocks .NewMockManager (ctrl )
216+ rt := router .NewDefaultRouter ()
217+
218+ if setupMock != nil {
219+ setupMock (mockBackendClient )
220+ }
221+
222+ port := networking .FindAvailable ()
223+ require .NotZero (t , port , "Failed to find available port" )
224+
225+ mockDiscoveryMgr .EXPECT ().
226+ Discover (gomock .Any (), gomock .Any ()).
227+ Return (& aggregator.AggregatedCapabilities {
228+ Tools : []vmcp.Tool {},
229+ Resources : []vmcp.Resource {},
230+ Prompts : []vmcp.Prompt {},
231+ RoutingTable : & vmcp.RoutingTable {
232+ Tools : make (map [string ]* vmcp.BackendTarget ),
233+ Resources : make (map [string ]* vmcp.BackendTarget ),
234+ Prompts : make (map [string ]* vmcp.BackendTarget ),
235+ },
236+ Metadata : & aggregator.AggregationMetadata {},
237+ }, nil ).
238+ AnyTimes ()
239+ mockDiscoveryMgr .EXPECT ().Stop ().AnyTimes ()
240+
241+ ctx , cancel := context .WithCancel (t .Context ())
242+
243+ var healthMonCfg * health.MonitorConfig
244+ if (monitorCfg != health.MonitorConfig {}) {
245+ healthMonCfg = & monitorCfg
246+ }
247+ srv , err := server .New (ctx , & server.Config {
248+ Name : "test-vmcp" ,
249+ Version : "1.0.0" ,
250+ Host : "127.0.0.1" ,
251+ Port : port ,
252+ GroupRef : groupRef ,
253+ HealthMonitorConfig : healthMonCfg ,
254+ SessionFactory : newNoopMockFactory (t ),
255+ }, rt , mockBackendClient , mockDiscoveryMgr , vmcp .NewImmutableRegistry (backends ), nil )
256+ require .NoError (t , err )
257+
258+ type startResult struct {
259+ err error
260+ }
261+ done := make (chan startResult , 1 )
262+ go func () {
263+ done <- startResult {err : srv .Start (ctx )}
264+ }()
265+
266+ // Cleanup order (LIFO):
267+ // 1. cancel() — stops the server and health monitor goroutines
268+ // 2. <-done — waits for srv.Start (and all goroutines) to return
269+ // 3. ctrl.Finish — validates mock expectations after all calls have stopped
270+ t .Cleanup (func () {
271+ result := <- done
272+ if result .err != nil && ! errors .Is (result .err , context .Canceled ) {
273+ t .Errorf ("server exited with unexpected error: %v" , result .err )
274+ }
275+ })
276+ t .Cleanup (cancel )
277+
278+ select {
279+ case <- srv .Ready ():
280+ case result := <- done :
281+ t .Fatalf ("server exited before becoming ready: %v" , result .err )
282+ case <- time .After (5 * time .Second ):
283+ t .Fatalf ("Server did not become ready within 5s (address: %s)" , srv .Address ())
284+ }
285+
286+ return srv
287+ }
288+
289+ // queryStatus fetches and decodes /status from the given server.
290+ func queryStatus (t * testing.T , srv * server.Server ) StatusResponse {
291+ t .Helper ()
292+ resp , err := http .Get ("http://" + srv .Address () + "/status" )
293+ require .NoError (t , err )
294+ defer resp .Body .Close ()
295+ require .Equal (t , http .StatusOK , resp .StatusCode , "unexpected HTTP status from /status" )
296+ var status StatusResponse
297+ require .NoError (t , json .NewDecoder (resp .Body ).Decode (& status ))
298+ return status
299+ }
300+
301+ // TestStatusEndpoint_ReflectsLiveHealthMonitor_Unhealthy verifies the fix for
302+ // https://github.com/stacklok/toolhive/issues/4103: /status must report the
303+ // same health state as the live health monitor, not the stale registry value.
304+ //
305+ // Without the fix, a backend registered as "healthy" would always appear healthy
306+ // in /status even after the health monitor had marked it unhealthy.
307+ func TestStatusEndpoint_ReflectsLiveHealthMonitor_Unhealthy (t * testing.T ) {
308+ t .Parallel ()
309+
310+ // Backend starts as "healthy" in the registry – this is the stale value
311+ // that the old code would always return from /status.
312+ backends := []vmcp.Backend {{
313+ ID : "b1" ,
314+ Name : "test-backend" ,
315+ TransportType : "streamable-http" ,
316+ HealthStatus : vmcp .BackendHealthy ,
317+ }}
318+
319+ monitorCfg := health.MonitorConfig {
320+ CheckInterval : 5 * time .Millisecond ,
321+ UnhealthyThreshold : 1 , // one failure → unhealthy immediately
322+ Timeout : time .Second ,
323+ }
324+
325+ srv := createTestServerWithHealthMonitor (t , backends , monitorCfg , func (mockClient * mocks.MockBackendClient ) {
326+ // All health checks fail – the monitor should mark the backend unhealthy.
327+ mockClient .EXPECT ().
328+ ListCapabilities (gomock .Any (), gomock .Any ()).
329+ Return (nil , errors .New ("connection refused" )).
330+ AnyTimes ()
331+ }, "" )
332+
333+ // Poll /status until the live monitor state propagates. If the fix is
334+ // absent the backend stays "healthy" forever and the assertion times out.
335+ require .Eventually (t , func () bool {
336+ status := queryStatus (t , srv )
337+ if len (status .Backends ) == 0 {
338+ return false
339+ }
340+ return status .Backends [0 ].Health == string (vmcp .BackendUnhealthy )
341+ }, 5 * time .Second , 20 * time .Millisecond , "expected /status to report backend as unhealthy" )
342+
343+ // The overall server health flag must also be false when no backend is healthy.
344+ status := queryStatus (t , srv )
345+ assert .False (t , status .Healthy )
346+ require .Len (t , status .Backends , 1 )
347+ assert .Equal (t , string (vmcp .BackendUnhealthy ), status .Backends [0 ].Health )
348+ }
349+
350+ // TestStatusEndpoint_ReflectsLiveHealthMonitor_Healthy confirms that /status
351+ // correctly reports a backend as healthy when the health monitor records success.
352+ func TestStatusEndpoint_ReflectsLiveHealthMonitor_Healthy (t * testing.T ) {
353+ t .Parallel ()
354+
355+ backends := []vmcp.Backend {{
356+ ID : "b1" ,
357+ Name : "test-backend" ,
358+ TransportType : "streamable-http" ,
359+ HealthStatus : vmcp .BackendUnknown , // registry starts with unknown
360+ }}
361+
362+ monitorCfg := health.MonitorConfig {
363+ CheckInterval : 5 * time .Millisecond ,
364+ UnhealthyThreshold : 3 ,
365+ Timeout : time .Second ,
366+ }
367+
368+ srv := createTestServerWithHealthMonitor (t , backends , monitorCfg , func (mockClient * mocks.MockBackendClient ) {
369+ // Health checks succeed – the monitor should mark the backend healthy.
370+ mockClient .EXPECT ().
371+ ListCapabilities (gomock .Any (), gomock .Any ()).
372+ Return (& vmcp.CapabilityList {}, nil ).
373+ AnyTimes ()
374+ }, "" )
375+
376+ // Poll until the healthy state from the monitor appears in /status.
377+ require .Eventually (t , func () bool {
378+ status := queryStatus (t , srv )
379+ if len (status .Backends ) == 0 {
380+ return false
381+ }
382+ return status .Backends [0 ].Health == string (vmcp .BackendHealthy )
383+ }, 5 * time .Second , 20 * time .Millisecond , "expected /status to report backend as healthy" )
384+
385+ status := queryStatus (t , srv )
386+ assert .True (t , status .Healthy )
387+ require .Len (t , status .Backends , 1 )
388+ assert .Equal (t , string (vmcp .BackendHealthy ), status .Backends [0 ].Health )
389+ }
390+
391+ // TestStatusEndpoint_FallsBackToRegistry_WhenMonitorDisabled confirms the
392+ // no-monitor path is unchanged: health status comes from the registry.
393+ func TestStatusEndpoint_FallsBackToRegistry_WhenMonitorDisabled (t * testing.T ) {
394+ t .Parallel ()
395+
396+ backends := []vmcp.Backend {
397+ {ID : "b1" , Name : "healthy-backend" , HealthStatus : vmcp .BackendHealthy },
398+ {ID : "b2" , Name : "unhealthy-backend" , HealthStatus : vmcp .BackendUnhealthy },
399+ }
400+
401+ // createTestServerWithBackends does NOT configure a health monitor.
402+ srv := createTestServerWithBackends (t , backends , "" )
403+
404+ status := queryStatus (t , srv )
405+
406+ require .Len (t , status .Backends , 2 )
407+ healthByName := make (map [string ]string )
408+ for _ , b := range status .Backends {
409+ healthByName [b .Name ] = b .Health
410+ }
411+ assert .Equal (t , string (vmcp .BackendHealthy ), healthByName ["healthy-backend" ])
412+ assert .Equal (t , string (vmcp .BackendUnhealthy ), healthByName ["unhealthy-backend" ])
413+ }
0 commit comments