Skip to content

Commit 5f7a3ab

Browse files
authored
Merge pull request #1707 from karthikvetrivel/test/add-service-servicemonitor-tests
tests: Add unit tests for DCGM exporter Service and ServiceMonitor
2 parents 821f1de + d07dde4 commit 5f7a3ab

File tree

1 file changed

+309
-0
lines changed

1 file changed

+309
-0
lines changed

controllers/object_controls_test.go

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
"k8s.io/apimachinery/pkg/runtime/serializer/json"
4242
"k8s.io/apimachinery/pkg/types"
4343
"k8s.io/client-go/kubernetes/scheme"
44+
"k8s.io/utils/ptr"
4445
ctrl "sigs.k8s.io/controller-runtime"
4546
"sigs.k8s.io/controller-runtime/pkg/client"
4647
"sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -1083,3 +1084,311 @@ func TestGetSanitizedKernelVersion(t *testing.T) {
10831084
require.Equal(t, test.expected, result)
10841085
}
10851086
}
1087+
1088+
func TestServiceMonitor(t *testing.T) {
1089+
const (
1090+
testNamespace = "test-namespace"
1091+
testServiceMonitor = "test-service-monitor"
1092+
filledNamespace = "FILLED BY THE OPERATOR"
1093+
)
1094+
1095+
// Create scheme with required types
1096+
scheme := runtime.NewScheme()
1097+
require.NoError(t, promv1.AddToScheme(scheme))
1098+
require.NoError(t, apiextensionsv1.AddToScheme(scheme))
1099+
require.NoError(t, gpuv1.AddToScheme(scheme))
1100+
1101+
serviceMonitor := promv1.ServiceMonitor{
1102+
ObjectMeta: metav1.ObjectMeta{Name: testServiceMonitor, Labels: map[string]string{}},
1103+
Spec: promv1.ServiceMonitorSpec{
1104+
NamespaceSelector: promv1.NamespaceSelector{MatchNames: []string{filledNamespace}},
1105+
Endpoints: []promv1.Endpoint{{}},
1106+
},
1107+
}
1108+
1109+
// Create controller with given spec and state
1110+
newController := func(k8s client.Client, scheme *runtime.Scheme, spec gpuv1.ClusterPolicySpec, state string) ClusterPolicyController {
1111+
clusterPolicy := &gpuv1.ClusterPolicy{Spec: spec}
1112+
resources := []Resources{{ServiceMonitor: serviceMonitor}}
1113+
1114+
return ClusterPolicyController{
1115+
client: k8s,
1116+
ctx: context.Background(),
1117+
singleton: clusterPolicy,
1118+
scheme: scheme,
1119+
operatorNamespace: testNamespace,
1120+
resources: resources,
1121+
stateNames: []string{state},
1122+
idx: 0,
1123+
logger: ctrl.Log.WithName("test"),
1124+
}
1125+
}
1126+
1127+
// CRD object for tests that need ServiceMonitor CRD present
1128+
serviceMonitorCRD := &apiextensionsv1.CustomResourceDefinition{ObjectMeta: metav1.ObjectMeta{Name: ServiceMonitorCRDName}}
1129+
1130+
tests := []struct {
1131+
description string
1132+
stateName string
1133+
k8sObjects []client.Object
1134+
clusterPolicySpec gpuv1.ClusterPolicySpec
1135+
expectedState gpuv1.State
1136+
expectedServiceMonitor *promv1.ServiceMonitor
1137+
}{
1138+
{
1139+
description: "dcgm-exporter disabled, CRD missing -> Ready",
1140+
stateName: "state-dcgm-exporter",
1141+
k8sObjects: nil,
1142+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1143+
DCGMExporter: gpuv1.DCGMExporterSpec{Enabled: ptr.To(false)},
1144+
},
1145+
expectedState: gpuv1.Ready,
1146+
expectedServiceMonitor: nil,
1147+
},
1148+
{
1149+
description: "dcgm-exporter SM enabled, CRD missing -> NotReady",
1150+
stateName: "state-dcgm-exporter",
1151+
k8sObjects: nil,
1152+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1153+
DCGMExporter: gpuv1.DCGMExporterSpec{
1154+
Enabled: ptr.To(true),
1155+
ServiceMonitor: &gpuv1.DCGMExporterServiceMonitorConfig{Enabled: ptr.To(true)},
1156+
},
1157+
},
1158+
expectedState: gpuv1.NotReady,
1159+
expectedServiceMonitor: nil,
1160+
},
1161+
{
1162+
description: "dcgm-exporter SM disabled, CRD present -> Disabled (delete if exists)",
1163+
stateName: "state-dcgm-exporter",
1164+
k8sObjects: []client.Object{serviceMonitorCRD},
1165+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1166+
DCGMExporter: gpuv1.DCGMExporterSpec{
1167+
Enabled: ptr.To(true),
1168+
ServiceMonitor: &gpuv1.DCGMExporterServiceMonitorConfig{Enabled: ptr.To(false)},
1169+
},
1170+
},
1171+
expectedState: gpuv1.Disabled,
1172+
expectedServiceMonitor: nil,
1173+
},
1174+
{
1175+
description: "operator-metrics, CRD missing -> Ready (ignore create)",
1176+
stateName: "state-operator-metrics",
1177+
k8sObjects: nil,
1178+
clusterPolicySpec: gpuv1.ClusterPolicySpec{},
1179+
expectedState: gpuv1.Ready,
1180+
expectedServiceMonitor: nil,
1181+
},
1182+
{
1183+
description: "node-status-exporter disabled, CRD present -> Disabled",
1184+
stateName: "state-node-status-exporter",
1185+
k8sObjects: []client.Object{serviceMonitorCRD},
1186+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1187+
NodeStatusExporter: gpuv1.NodeStatusExporterSpec{Enabled: ptr.To(false)},
1188+
},
1189+
expectedState: gpuv1.Disabled,
1190+
expectedServiceMonitor: nil,
1191+
},
1192+
{
1193+
description: "dcgm-exporter SM enabled, CRD present -> Ready and applies edits",
1194+
stateName: "state-dcgm-exporter",
1195+
k8sObjects: []client.Object{serviceMonitorCRD},
1196+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1197+
DCGMExporter: gpuv1.DCGMExporterSpec{
1198+
Enabled: ptr.To(true),
1199+
ServiceMonitor: &gpuv1.DCGMExporterServiceMonitorConfig{
1200+
Enabled: ptr.To(true),
1201+
Interval: promv1.Duration("15s"),
1202+
HonorLabels: ptr.To(true),
1203+
AdditionalLabels: map[string]string{"a": "b"},
1204+
Relabelings: []*promv1.RelabelConfig{{Action: "keep"}},
1205+
},
1206+
},
1207+
},
1208+
expectedState: gpuv1.Ready,
1209+
expectedServiceMonitor: &promv1.ServiceMonitor{
1210+
ObjectMeta: metav1.ObjectMeta{
1211+
Name: "test-service-monitor",
1212+
Namespace: "test-namespace",
1213+
Labels: map[string]string{"a": "b"},
1214+
},
1215+
Spec: promv1.ServiceMonitorSpec{
1216+
NamespaceSelector: promv1.NamespaceSelector{MatchNames: []string{"test-namespace"}},
1217+
Endpoints: []promv1.Endpoint{{
1218+
Interval: promv1.Duration("15s"),
1219+
HonorLabels: true,
1220+
RelabelConfigs: []promv1.RelabelConfig{{
1221+
Action: "keep",
1222+
}},
1223+
}},
1224+
},
1225+
},
1226+
},
1227+
}
1228+
1229+
for _, tc := range tests {
1230+
t.Run(tc.description, func(t *testing.T) {
1231+
k8sClient := fake.NewClientBuilder().
1232+
WithScheme(scheme).
1233+
WithObjects(tc.k8sObjects...).
1234+
Build()
1235+
1236+
controller := newController(k8sClient, scheme, tc.clusterPolicySpec, tc.stateName)
1237+
1238+
// Calls the actual ServiceMonitor function under test and validates the state
1239+
state, err := ServiceMonitor(controller)
1240+
1241+
require.NoError(t, err)
1242+
require.Equal(t, tc.expectedState, state)
1243+
1244+
found := &promv1.ServiceMonitor{}
1245+
err = k8sClient.Get(context.Background(), client.ObjectKey{Namespace: testNamespace, Name: testServiceMonitor}, found)
1246+
if tc.expectedServiceMonitor == nil {
1247+
require.True(t, apierrors.IsNotFound(err))
1248+
return
1249+
}
1250+
require.NoError(t, err)
1251+
1252+
require.Equal(t, tc.expectedServiceMonitor.Name, found.Name)
1253+
require.Equal(t, tc.expectedServiceMonitor.Namespace, found.Namespace)
1254+
require.Equal(t, tc.expectedServiceMonitor.Labels, found.Labels)
1255+
require.Equal(t, tc.expectedServiceMonitor.Spec, found.Spec)
1256+
})
1257+
}
1258+
}
1259+
1260+
func TestService(t *testing.T) {
1261+
const (
1262+
testNamespace = "test-namespace"
1263+
testService = "nvidia-dcgm-exporter"
1264+
)
1265+
1266+
// Helper to create scheme with required types
1267+
scheme := runtime.NewScheme()
1268+
require.NoError(t, corev1.AddToScheme(scheme))
1269+
require.NoError(t, gpuv1.AddToScheme(scheme))
1270+
1271+
// Template Service
1272+
service := corev1.Service{
1273+
ObjectMeta: metav1.ObjectMeta{Name: testService},
1274+
Spec: corev1.ServiceSpec{},
1275+
}
1276+
1277+
// Helper to create controller with given spec
1278+
newController := func(k8s client.Client, scheme *runtime.Scheme, spec gpuv1.ClusterPolicySpec) ClusterPolicyController {
1279+
clusterPolicy := &gpuv1.ClusterPolicy{Spec: spec}
1280+
resources := []Resources{{Service: service}}
1281+
return ClusterPolicyController{
1282+
client: k8s,
1283+
ctx: context.Background(),
1284+
singleton: clusterPolicy,
1285+
scheme: scheme,
1286+
operatorNamespace: testNamespace,
1287+
resources: resources,
1288+
stateNames: []string{"state-dcgm-exporter"},
1289+
idx: 0,
1290+
logger: ctrl.Log.WithName("test"),
1291+
}
1292+
}
1293+
1294+
localPolicy := corev1.ServiceInternalTrafficPolicyLocal
1295+
1296+
tests := []struct {
1297+
description string
1298+
k8sObjects []client.Object
1299+
clusterPolicySpec gpuv1.ClusterPolicySpec
1300+
expectedState gpuv1.State
1301+
expectService bool
1302+
expectedType corev1.ServiceType
1303+
expectedPolicy *corev1.ServiceInternalTrafficPolicy
1304+
expectedIP string // For ClusterIP preservation test
1305+
}{
1306+
{
1307+
description: "create and preprocess",
1308+
k8sObjects: nil,
1309+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1310+
DCGMExporter: gpuv1.DCGMExporterSpec{
1311+
Enabled: ptr.To(true),
1312+
ServiceSpec: &gpuv1.DCGMExporterServiceConfig{
1313+
Type: corev1.ServiceTypeNodePort,
1314+
InternalTrafficPolicy: &localPolicy,
1315+
},
1316+
},
1317+
},
1318+
expectedState: gpuv1.Ready,
1319+
expectService: true,
1320+
expectedType: corev1.ServiceTypeNodePort,
1321+
expectedPolicy: &localPolicy,
1322+
},
1323+
{
1324+
description: "update preserves ClusterIP",
1325+
k8sObjects: []client.Object{&corev1.Service{
1326+
ObjectMeta: metav1.ObjectMeta{Name: testService, Namespace: testNamespace},
1327+
Spec: corev1.ServiceSpec{
1328+
Type: corev1.ServiceTypeClusterIP,
1329+
ClusterIP: "10.0.0.42",
1330+
},
1331+
}},
1332+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1333+
DCGMExporter: gpuv1.DCGMExporterSpec{
1334+
Enabled: ptr.To(true),
1335+
ServiceSpec: &gpuv1.DCGMExporterServiceConfig{
1336+
Type: corev1.ServiceTypeNodePort,
1337+
InternalTrafficPolicy: &localPolicy,
1338+
},
1339+
},
1340+
},
1341+
expectedState: gpuv1.Ready,
1342+
expectService: true,
1343+
expectedType: corev1.ServiceTypeNodePort,
1344+
expectedPolicy: &localPolicy,
1345+
expectedIP: "10.0.0.42",
1346+
},
1347+
{
1348+
description: "disabled deletes and returns Disabled",
1349+
k8sObjects: []client.Object{&corev1.Service{
1350+
ObjectMeta: metav1.ObjectMeta{Name: testService, Namespace: testNamespace},
1351+
Spec: corev1.ServiceSpec{
1352+
Type: corev1.ServiceTypeClusterIP,
1353+
ClusterIP: "10.0.0.42",
1354+
},
1355+
}},
1356+
clusterPolicySpec: gpuv1.ClusterPolicySpec{
1357+
DCGMExporter: gpuv1.DCGMExporterSpec{Enabled: ptr.To(false)},
1358+
},
1359+
expectedState: gpuv1.Disabled,
1360+
expectService: false,
1361+
},
1362+
}
1363+
1364+
for _, tc := range tests {
1365+
t.Run(tc.description, func(t *testing.T) {
1366+
k8sClient := fake.NewClientBuilder().
1367+
WithScheme(scheme).
1368+
WithObjects(tc.k8sObjects...).
1369+
Build()
1370+
1371+
controller := newController(k8sClient, scheme, tc.clusterPolicySpec)
1372+
1373+
state, err := Service(controller)
1374+
require.NoError(t, err)
1375+
require.Equal(t, tc.expectedState, state)
1376+
1377+
found := &corev1.Service{}
1378+
err = k8sClient.Get(context.Background(), client.ObjectKey{Namespace: testNamespace, Name: testService}, found)
1379+
if !tc.expectService {
1380+
require.True(t, apierrors.IsNotFound(err))
1381+
return
1382+
}
1383+
require.NoError(t, err)
1384+
require.Equal(t, tc.expectedType, found.Spec.Type)
1385+
if tc.expectedPolicy != nil {
1386+
require.NotNil(t, found.Spec.InternalTrafficPolicy)
1387+
require.Equal(t, *tc.expectedPolicy, *found.Spec.InternalTrafficPolicy)
1388+
}
1389+
if tc.expectedIP != "" {
1390+
require.Equal(t, tc.expectedIP, found.Spec.ClusterIP)
1391+
}
1392+
})
1393+
}
1394+
}

0 commit comments

Comments
 (0)