Skip to content

Commit ac8a330

Browse files
author
Israel Blancas
committed
Create ServiceMonitor for operator metrics programmatically
Signed-off-by: Israel Blancas <[email protected]>
1 parent 6a5f9ba commit ac8a330

17 files changed

+383
-34
lines changed

.chloggen/3370-create-dynamic-sm.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
2+
change_type: enhancement
3+
4+
# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
5+
component: operator
6+
7+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
8+
note: Programmatically create the `ServiceMonitor` for the operator metrics endpoint, ensuring correct namespace handling and dynamic configuration.
9+
10+
# One or more tracking issues related to the change
11+
issues: [3370]
12+
13+
# (Optional) One or more lines of additional information to render under the primary note.
14+
# These lines will be padded with 2 spaces and then inserted directly into the document.
15+
# Use pipe (|) for multiline entries.
16+
subtext: |
17+
Previously, the `ServiceMonitor` was created statically from a manifest file, causing failures when the
18+
operator was deployed in a non-default namespace. This enhancement ensures automatic adjustment of the
19+
`serverName` and seamless metrics scraping.

bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ metadata:
9999
categories: Logging & Tracing,Monitoring
100100
certified: "false"
101101
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
102-
createdAt: "2024-10-16T10:10:50Z"
102+
createdAt: "2024-10-28T12:33:36Z"
103103
description: Provides the OpenTelemetry components, including the Collector
104104
operators.operatorframework.io/builder: operator-sdk-v1.29.0
105105
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3

bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
apiVersion: v1
22
kind: Service
33
metadata:
4+
annotations:
5+
service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics
46
creationTimestamp: null
57
labels:
68
app.kubernetes.io/name: opentelemetry-operator
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: monitoring.coreos.com/v1
2+
kind: PrometheusRule
3+
metadata:
4+
labels:
5+
app.kubernetes.io/managed-by: operator-lifecycle-manager
6+
app.kubernetes.io/name: opentelemetry-operator
7+
app.kubernetes.io/part-of: opentelemetry-operator
8+
name: opentelemetry-operator-prometheus-rules
9+
spec:
10+
groups:
11+
- name: opentelemetry-operator-monitoring.rules
12+
rules:
13+
- expr: sum by (type) (opentelemetry_collector_receivers)
14+
record: type:opentelemetry_collector_receivers:sum
15+
- expr: sum by (type) (opentelemetry_collector_exporters)
16+
record: type:opentelemetry_collector_exporters:sum
17+
- expr: sum by (type) (opentelemetry_collector_processors)
18+
record: type:opentelemetry_collector_processors:sum
19+
- expr: sum by (type) (opentelemetry_collector_extensions)
20+
record: type:opentelemetry_collector_extensions:sum
21+
- expr: sum by (type) (opentelemetry_collector_connectors)
22+
record: type:opentelemetry_collector_connectors:sum
23+
- expr: sum by (type) (opentelemetry_collector_info)
24+
record: type:opentelemetry_collector_info:sum
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: Role
3+
metadata:
4+
name: opentelemetry-operator-prometheus
5+
rules:
6+
- apiGroups:
7+
- ""
8+
resources:
9+
- services
10+
- endpoints
11+
- pods
12+
verbs:
13+
- get
14+
- list
15+
- watch
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: RoleBinding
3+
metadata:
4+
name: opentelemetry-operator-prometheus
5+
roleRef:
6+
apiGroup: rbac.authorization.k8s.io
7+
kind: Role
8+
name: opentelemetry-operator-prometheus
9+
subjects:
10+
- kind: ServiceAccount
11+
name: prometheus-k8s
12+
namespace: openshift-monitoring

bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml

+13-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ metadata:
9999
categories: Logging & Tracing,Monitoring
100100
certified: "false"
101101
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
102-
createdAt: "2024-10-16T10:10:50Z"
102+
createdAt: "2024-10-28T12:33:40Z"
103103
description: Provides the OpenTelemetry components, including the Collector
104104
operators.operatorframework.io/builder: operator-sdk-v1.29.0
105105
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
@@ -477,9 +477,9 @@ spec:
477477
- --zap-time-encoding=rfc3339nano
478478
- --enable-nginx-instrumentation=true
479479
- --enable-go-instrumentation=true
480-
- --enable-multi-instrumentation=true
481480
- --openshift-create-dashboard=true
482481
- --feature-gates=+operator.observability.prometheus
482+
- --enable-cr-metrics=true
483483
env:
484484
- name: SERVICE_ACCOUNT_NAME
485485
valueFrom:
@@ -516,6 +516,10 @@ spec:
516516
- --upstream=http://127.0.0.1:8080/
517517
- --logtostderr=true
518518
- --v=0
519+
- --tls-cert-file=/var/run/tls/server/tls.crt
520+
- --tls-private-key-file=/var/run/tls/server/tls.key
521+
- --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256
522+
- --tls-min-version=VersionTLS12
519523
image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1
520524
name: kube-rbac-proxy
521525
ports:
@@ -529,9 +533,16 @@ spec:
529533
requests:
530534
cpu: 5m
531535
memory: 64Mi
536+
volumeMounts:
537+
- mountPath: /var/run/tls/server
538+
name: opentelemetry-operator-metrics-cert
532539
serviceAccountName: opentelemetry-operator-controller-manager
533540
terminationGracePeriodSeconds: 10
534541
volumes:
542+
- name: opentelemetry-operator-metrics-cert
543+
secret:
544+
defaultMode: 420
545+
secretName: opentelemetry-operator-metrics
535546
- name: cert
536547
secret:
537548
defaultMode: 420

config/default/kustomization.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ bases:
1818
- ../manager
1919
- ../webhook
2020
- ../certmanager
21-
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
22-
#- ../prometheus
2321

2422
patchesStrategicMerge:
2523
# Protect the /metrics endpoint by putting it behind auth.

config/overlays/openshift/kustomization.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ patches:
88
kind: Deployment
99
name: controller-manager
1010
path: manager-patch.yaml
11+
12+
patchesStrategicMerge:
13+
- metrics_service_tls_patch.yaml
14+
- manager_auth_proxy_tls_patch.yaml

config/overlays/openshift/manager-patch.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
- --zap-time-encoding=rfc3339nano
88
- --enable-nginx-instrumentation=true
99
- '--enable-go-instrumentation=true'
10-
- '--enable-multi-instrumentation=true'
1110
- '--openshift-create-dashboard=true'
1211
- '--feature-gates=+operator.observability.prometheus'
12+
- '--enable-cr-metrics=true'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: controller-manager
5+
namespace: system
6+
spec:
7+
template:
8+
spec:
9+
containers:
10+
- name: manager # without this line, kustomize reorders the containers, making kube-rbac-proxy the default container
11+
- name: kube-rbac-proxy
12+
args:
13+
- "--secure-listen-address=0.0.0.0:8443"
14+
- "--upstream=http://127.0.0.1:8080/"
15+
- "--logtostderr=true"
16+
- "--v=0"
17+
- "--tls-cert-file=/var/run/tls/server/tls.crt"
18+
- "--tls-private-key-file=/var/run/tls/server/tls.key"
19+
- "--tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256"
20+
- "--tls-min-version=VersionTLS12"
21+
volumeMounts:
22+
- mountPath: /var/run/tls/server
23+
name: opentelemetry-operator-metrics-cert
24+
volumes:
25+
- name: opentelemetry-operator-metrics-cert
26+
secret:
27+
defaultMode: 420
28+
# secret generated by the 'service.beta.openshift.io/serving-cert-secret-name' annotation on the metrics-service
29+
secretName: opentelemetry-operator-metrics
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
annotations:
5+
service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics
6+
name: controller-manager-metrics-service
7+
namespace: system

config/prometheus/kustomization.yaml

-2
This file was deleted.

config/prometheus/monitor.yaml

-26
This file was deleted.

internal/operator-metrics/metrics.go

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Copyright The OpenTelemetry Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package operatormetrics
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"os"
21+
22+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
23+
corev1 "k8s.io/api/core/v1"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/apimachinery/pkg/runtime"
26+
"k8s.io/apimachinery/pkg/util/intstr"
27+
"k8s.io/client-go/rest"
28+
"sigs.k8s.io/controller-runtime/pkg/client"
29+
)
30+
31+
var (
32+
// namespaceFile is the path to the namespace file for the service account.
33+
namespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
34+
35+
// caBundleConfigMap declares the name of the config map for the CA bundle.
36+
caBundleConfigMap = "serving-certs-ca-bundle"
37+
38+
// prometheusCAFile declares the path for prometheus CA file for service monitors in OpenShift.
39+
prometheusCAFile = fmt.Sprintf("/etc/prometheus/configmaps/%s/service-ca.crt", caBundleConfigMap)
40+
41+
// nolint #nosec
42+
// bearerTokenFile declares the path for bearer token file for service monitors.
43+
bearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
44+
45+
// openshiftInClusterMonitoringNamespace declares the namespace for the OpenShift in-cluster monitoring.
46+
openshiftInClusterMonitoringNamespace = "openshift-monitoring"
47+
)
48+
49+
type OperatorMetrics struct {
50+
kubeClient client.Client
51+
}
52+
53+
func NewOperatorMetrics(config *rest.Config, scheme *runtime.Scheme) (OperatorMetrics, error) {
54+
kubeClient, err := client.New(config, client.Options{Scheme: scheme})
55+
if err != nil {
56+
return OperatorMetrics{}, err
57+
}
58+
59+
return OperatorMetrics{
60+
kubeClient: kubeClient,
61+
}, nil
62+
}
63+
64+
func (om OperatorMetrics) Start(ctx context.Context) error {
65+
rawNamespace, err := os.ReadFile(namespaceFile)
66+
if err != nil {
67+
return fmt.Errorf("error reading namespace file: %w", err)
68+
}
69+
namespace := string(rawNamespace)
70+
71+
var tlsConfig *monitoringv1.TLSConfig
72+
73+
if om.caConfigMapExists() {
74+
serviceName := fmt.Sprintf("opentelemetry-operator-controller-manager-metrics-service.%s.svc", namespace)
75+
76+
tlsConfig = &monitoringv1.TLSConfig{
77+
CAFile: prometheusCAFile,
78+
SafeTLSConfig: monitoringv1.SafeTLSConfig{
79+
ServerName: &serviceName,
80+
},
81+
}
82+
} else {
83+
t := true
84+
tlsConfig = &monitoringv1.TLSConfig{
85+
SafeTLSConfig: monitoringv1.SafeTLSConfig{
86+
// kube-rbac-proxy uses a self-signed cert by default
87+
InsecureSkipVerify: &t,
88+
},
89+
}
90+
}
91+
92+
sm := monitoringv1.ServiceMonitor{
93+
ObjectMeta: metav1.ObjectMeta{
94+
Name: "opentelemetry-operator-metrics-monitor",
95+
Namespace: namespace,
96+
Labels: map[string]string{
97+
"app.kubernetes.io/name": "opentelemetry-operator",
98+
"app.kubernetes.io/part-of": "opentelemetry-operator",
99+
"control-plane": "controller-manager",
100+
},
101+
},
102+
Spec: monitoringv1.ServiceMonitorSpec{
103+
Selector: metav1.LabelSelector{
104+
MatchLabels: map[string]string{
105+
"app.kubernetes.io/name": "opentelemetry-operator",
106+
},
107+
},
108+
Endpoints: []monitoringv1.Endpoint{
109+
{
110+
BearerTokenFile: bearerTokenFile,
111+
Interval: "30s",
112+
Path: "/metrics",
113+
Scheme: "https",
114+
ScrapeTimeout: "10s",
115+
TargetPort: &intstr.IntOrString{IntVal: 8443},
116+
TLSConfig: tlsConfig,
117+
},
118+
},
119+
},
120+
}
121+
122+
err = om.kubeClient.Create(ctx, &sm)
123+
if err != nil {
124+
return fmt.Errorf("error creating service monitor: %w", err)
125+
}
126+
127+
<-ctx.Done()
128+
129+
return om.kubeClient.Delete(ctx, &sm)
130+
}
131+
132+
func (om OperatorMetrics) NeedLeaderElection() bool {
133+
return true
134+
}
135+
136+
func (om OperatorMetrics) caConfigMapExists() bool {
137+
return om.kubeClient.Get(context.Background(), client.ObjectKey{
138+
Name: caBundleConfigMap,
139+
Namespace: openshiftInClusterMonitoringNamespace,
140+
}, &corev1.ConfigMap{},
141+
) == nil
142+
}

0 commit comments

Comments
 (0)