@@ -41,6 +41,7 @@ import (
4141 "k8s.io/apimachinery/pkg/runtime/serializer/json"
4242 "k8s.io/apimachinery/pkg/types"
4343 "k8s.io/client-go/kubernetes/scheme"
44+ "k8s.io/utils/ptr"
4445 ctrl "sigs.k8s.io/controller-runtime"
4546 "sigs.k8s.io/controller-runtime/pkg/client"
4647 "sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -1083,3 +1084,311 @@ func TestGetSanitizedKernelVersion(t *testing.T) {
10831084 require .Equal (t , test .expected , result )
10841085 }
10851086}
1087+
1088+ func TestServiceMonitor (t * testing.T ) {
1089+ const (
1090+ testNamespace = "test-namespace"
1091+ testServiceMonitor = "test-service-monitor"
1092+ filledNamespace = "FILLED BY THE OPERATOR"
1093+ )
1094+
1095+ // Create scheme with required types
1096+ scheme := runtime .NewScheme ()
1097+ require .NoError (t , promv1 .AddToScheme (scheme ))
1098+ require .NoError (t , apiextensionsv1 .AddToScheme (scheme ))
1099+ require .NoError (t , gpuv1 .AddToScheme (scheme ))
1100+
1101+ serviceMonitor := promv1.ServiceMonitor {
1102+ ObjectMeta : metav1.ObjectMeta {Name : testServiceMonitor , Labels : map [string ]string {}},
1103+ Spec : promv1.ServiceMonitorSpec {
1104+ NamespaceSelector : promv1.NamespaceSelector {MatchNames : []string {filledNamespace }},
1105+ Endpoints : []promv1.Endpoint {{}},
1106+ },
1107+ }
1108+
1109+ // Create controller with given spec and state
1110+ newController := func (k8s client.Client , scheme * runtime.Scheme , spec gpuv1.ClusterPolicySpec , state string ) ClusterPolicyController {
1111+ clusterPolicy := & gpuv1.ClusterPolicy {Spec : spec }
1112+ resources := []Resources {{ServiceMonitor : serviceMonitor }}
1113+
1114+ return ClusterPolicyController {
1115+ client : k8s ,
1116+ ctx : context .Background (),
1117+ singleton : clusterPolicy ,
1118+ scheme : scheme ,
1119+ operatorNamespace : testNamespace ,
1120+ resources : resources ,
1121+ stateNames : []string {state },
1122+ idx : 0 ,
1123+ logger : ctrl .Log .WithName ("test" ),
1124+ }
1125+ }
1126+
1127+ // CRD object for tests that need ServiceMonitor CRD present
1128+ serviceMonitorCRD := & apiextensionsv1.CustomResourceDefinition {ObjectMeta : metav1.ObjectMeta {Name : ServiceMonitorCRDName }}
1129+
1130+ tests := []struct {
1131+ description string
1132+ stateName string
1133+ k8sObjects []client.Object
1134+ clusterPolicySpec gpuv1.ClusterPolicySpec
1135+ expectedState gpuv1.State
1136+ expectedServiceMonitor * promv1.ServiceMonitor
1137+ }{
1138+ {
1139+ description : "dcgm-exporter disabled, CRD missing -> Ready" ,
1140+ stateName : "state-dcgm-exporter" ,
1141+ k8sObjects : nil ,
1142+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1143+ DCGMExporter : gpuv1.DCGMExporterSpec {Enabled : ptr .To (false )},
1144+ },
1145+ expectedState : gpuv1 .Ready ,
1146+ expectedServiceMonitor : nil ,
1147+ },
1148+ {
1149+ description : "dcgm-exporter SM enabled, CRD missing -> NotReady" ,
1150+ stateName : "state-dcgm-exporter" ,
1151+ k8sObjects : nil ,
1152+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1153+ DCGMExporter : gpuv1.DCGMExporterSpec {
1154+ Enabled : ptr .To (true ),
1155+ ServiceMonitor : & gpuv1.DCGMExporterServiceMonitorConfig {Enabled : ptr .To (true )},
1156+ },
1157+ },
1158+ expectedState : gpuv1 .NotReady ,
1159+ expectedServiceMonitor : nil ,
1160+ },
1161+ {
1162+ description : "dcgm-exporter SM disabled, CRD present -> Disabled (delete if exists)" ,
1163+ stateName : "state-dcgm-exporter" ,
1164+ k8sObjects : []client.Object {serviceMonitorCRD },
1165+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1166+ DCGMExporter : gpuv1.DCGMExporterSpec {
1167+ Enabled : ptr .To (true ),
1168+ ServiceMonitor : & gpuv1.DCGMExporterServiceMonitorConfig {Enabled : ptr .To (false )},
1169+ },
1170+ },
1171+ expectedState : gpuv1 .Disabled ,
1172+ expectedServiceMonitor : nil ,
1173+ },
1174+ {
1175+ description : "operator-metrics, CRD missing -> Ready (ignore create)" ,
1176+ stateName : "state-operator-metrics" ,
1177+ k8sObjects : nil ,
1178+ clusterPolicySpec : gpuv1.ClusterPolicySpec {},
1179+ expectedState : gpuv1 .Ready ,
1180+ expectedServiceMonitor : nil ,
1181+ },
1182+ {
1183+ description : "node-status-exporter disabled, CRD present -> Disabled" ,
1184+ stateName : "state-node-status-exporter" ,
1185+ k8sObjects : []client.Object {serviceMonitorCRD },
1186+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1187+ NodeStatusExporter : gpuv1.NodeStatusExporterSpec {Enabled : ptr .To (false )},
1188+ },
1189+ expectedState : gpuv1 .Disabled ,
1190+ expectedServiceMonitor : nil ,
1191+ },
1192+ {
1193+ description : "dcgm-exporter SM enabled, CRD present -> Ready and applies edits" ,
1194+ stateName : "state-dcgm-exporter" ,
1195+ k8sObjects : []client.Object {serviceMonitorCRD },
1196+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1197+ DCGMExporter : gpuv1.DCGMExporterSpec {
1198+ Enabled : ptr .To (true ),
1199+ ServiceMonitor : & gpuv1.DCGMExporterServiceMonitorConfig {
1200+ Enabled : ptr .To (true ),
1201+ Interval : promv1 .Duration ("15s" ),
1202+ HonorLabels : ptr .To (true ),
1203+ AdditionalLabels : map [string ]string {"a" : "b" },
1204+ Relabelings : []* promv1.RelabelConfig {{Action : "keep" }},
1205+ },
1206+ },
1207+ },
1208+ expectedState : gpuv1 .Ready ,
1209+ expectedServiceMonitor : & promv1.ServiceMonitor {
1210+ ObjectMeta : metav1.ObjectMeta {
1211+ Name : "test-service-monitor" ,
1212+ Namespace : "test-namespace" ,
1213+ Labels : map [string ]string {"a" : "b" },
1214+ },
1215+ Spec : promv1.ServiceMonitorSpec {
1216+ NamespaceSelector : promv1.NamespaceSelector {MatchNames : []string {"test-namespace" }},
1217+ Endpoints : []promv1.Endpoint {{
1218+ Interval : promv1 .Duration ("15s" ),
1219+ HonorLabels : true ,
1220+ RelabelConfigs : []promv1.RelabelConfig {{
1221+ Action : "keep" ,
1222+ }},
1223+ }},
1224+ },
1225+ },
1226+ },
1227+ }
1228+
1229+ for _ , tc := range tests {
1230+ t .Run (tc .description , func (t * testing.T ) {
1231+ k8sClient := fake .NewClientBuilder ().
1232+ WithScheme (scheme ).
1233+ WithObjects (tc .k8sObjects ... ).
1234+ Build ()
1235+
1236+ controller := newController (k8sClient , scheme , tc .clusterPolicySpec , tc .stateName )
1237+
1238+ // Calls the actual ServiceMonitor function under test and validates the state
1239+ state , err := ServiceMonitor (controller )
1240+
1241+ require .NoError (t , err )
1242+ require .Equal (t , tc .expectedState , state )
1243+
1244+ found := & promv1.ServiceMonitor {}
1245+ err = k8sClient .Get (context .Background (), client.ObjectKey {Namespace : testNamespace , Name : testServiceMonitor }, found )
1246+ if tc .expectedServiceMonitor == nil {
1247+ require .True (t , apierrors .IsNotFound (err ))
1248+ return
1249+ }
1250+ require .NoError (t , err )
1251+
1252+ require .Equal (t , tc .expectedServiceMonitor .Name , found .Name )
1253+ require .Equal (t , tc .expectedServiceMonitor .Namespace , found .Namespace )
1254+ require .Equal (t , tc .expectedServiceMonitor .Labels , found .Labels )
1255+ require .Equal (t , tc .expectedServiceMonitor .Spec , found .Spec )
1256+ })
1257+ }
1258+ }
1259+
1260+ func TestService (t * testing.T ) {
1261+ const (
1262+ testNamespace = "test-namespace"
1263+ testService = "nvidia-dcgm-exporter"
1264+ )
1265+
1266+ // Helper to create scheme with required types
1267+ scheme := runtime .NewScheme ()
1268+ require .NoError (t , corev1 .AddToScheme (scheme ))
1269+ require .NoError (t , gpuv1 .AddToScheme (scheme ))
1270+
1271+ // Template Service
1272+ service := corev1.Service {
1273+ ObjectMeta : metav1.ObjectMeta {Name : testService },
1274+ Spec : corev1.ServiceSpec {},
1275+ }
1276+
1277+ // Helper to create controller with given spec
1278+ newController := func (k8s client.Client , scheme * runtime.Scheme , spec gpuv1.ClusterPolicySpec ) ClusterPolicyController {
1279+ clusterPolicy := & gpuv1.ClusterPolicy {Spec : spec }
1280+ resources := []Resources {{Service : service }}
1281+ return ClusterPolicyController {
1282+ client : k8s ,
1283+ ctx : context .Background (),
1284+ singleton : clusterPolicy ,
1285+ scheme : scheme ,
1286+ operatorNamespace : testNamespace ,
1287+ resources : resources ,
1288+ stateNames : []string {"state-dcgm-exporter" },
1289+ idx : 0 ,
1290+ logger : ctrl .Log .WithName ("test" ),
1291+ }
1292+ }
1293+
1294+ localPolicy := corev1 .ServiceInternalTrafficPolicyLocal
1295+
1296+ tests := []struct {
1297+ description string
1298+ k8sObjects []client.Object
1299+ clusterPolicySpec gpuv1.ClusterPolicySpec
1300+ expectedState gpuv1.State
1301+ expectService bool
1302+ expectedType corev1.ServiceType
1303+ expectedPolicy * corev1.ServiceInternalTrafficPolicy
1304+ expectedIP string // For ClusterIP preservation test
1305+ }{
1306+ {
1307+ description : "create and preprocess" ,
1308+ k8sObjects : nil ,
1309+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1310+ DCGMExporter : gpuv1.DCGMExporterSpec {
1311+ Enabled : ptr .To (true ),
1312+ ServiceSpec : & gpuv1.DCGMExporterServiceConfig {
1313+ Type : corev1 .ServiceTypeNodePort ,
1314+ InternalTrafficPolicy : & localPolicy ,
1315+ },
1316+ },
1317+ },
1318+ expectedState : gpuv1 .Ready ,
1319+ expectService : true ,
1320+ expectedType : corev1 .ServiceTypeNodePort ,
1321+ expectedPolicy : & localPolicy ,
1322+ },
1323+ {
1324+ description : "update preserves ClusterIP" ,
1325+ k8sObjects : []client.Object {& corev1.Service {
1326+ ObjectMeta : metav1.ObjectMeta {Name : testService , Namespace : testNamespace },
1327+ Spec : corev1.ServiceSpec {
1328+ Type : corev1 .ServiceTypeClusterIP ,
1329+ ClusterIP : "10.0.0.42" ,
1330+ },
1331+ }},
1332+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1333+ DCGMExporter : gpuv1.DCGMExporterSpec {
1334+ Enabled : ptr .To (true ),
1335+ ServiceSpec : & gpuv1.DCGMExporterServiceConfig {
1336+ Type : corev1 .ServiceTypeNodePort ,
1337+ InternalTrafficPolicy : & localPolicy ,
1338+ },
1339+ },
1340+ },
1341+ expectedState : gpuv1 .Ready ,
1342+ expectService : true ,
1343+ expectedType : corev1 .ServiceTypeNodePort ,
1344+ expectedPolicy : & localPolicy ,
1345+ expectedIP : "10.0.0.42" ,
1346+ },
1347+ {
1348+ description : "disabled deletes and returns Disabled" ,
1349+ k8sObjects : []client.Object {& corev1.Service {
1350+ ObjectMeta : metav1.ObjectMeta {Name : testService , Namespace : testNamespace },
1351+ Spec : corev1.ServiceSpec {
1352+ Type : corev1 .ServiceTypeClusterIP ,
1353+ ClusterIP : "10.0.0.42" ,
1354+ },
1355+ }},
1356+ clusterPolicySpec : gpuv1.ClusterPolicySpec {
1357+ DCGMExporter : gpuv1.DCGMExporterSpec {Enabled : ptr .To (false )},
1358+ },
1359+ expectedState : gpuv1 .Disabled ,
1360+ expectService : false ,
1361+ },
1362+ }
1363+
1364+ for _ , tc := range tests {
1365+ t .Run (tc .description , func (t * testing.T ) {
1366+ k8sClient := fake .NewClientBuilder ().
1367+ WithScheme (scheme ).
1368+ WithObjects (tc .k8sObjects ... ).
1369+ Build ()
1370+
1371+ controller := newController (k8sClient , scheme , tc .clusterPolicySpec )
1372+
1373+ state , err := Service (controller )
1374+ require .NoError (t , err )
1375+ require .Equal (t , tc .expectedState , state )
1376+
1377+ found := & corev1.Service {}
1378+ err = k8sClient .Get (context .Background (), client.ObjectKey {Namespace : testNamespace , Name : testService }, found )
1379+ if ! tc .expectService {
1380+ require .True (t , apierrors .IsNotFound (err ))
1381+ return
1382+ }
1383+ require .NoError (t , err )
1384+ require .Equal (t , tc .expectedType , found .Spec .Type )
1385+ if tc .expectedPolicy != nil {
1386+ require .NotNil (t , found .Spec .InternalTrafficPolicy )
1387+ require .Equal (t , * tc .expectedPolicy , * found .Spec .InternalTrafficPolicy )
1388+ }
1389+ if tc .expectedIP != "" {
1390+ require .Equal (t , tc .expectedIP , found .Spec .ClusterIP )
1391+ }
1392+ })
1393+ }
1394+ }
0 commit comments