Skip to content

Commit 3f8cd03

Browse files
committed
feat: Expose ExternalDNS operand metrics via kube-rbac-proxy sidecar
ExternalDNS operand pods bind metrics to 127.0.0.1, making them inaccessible from outside the pod. Prometheus cannot scrape them. This adds a kube-rbac-proxy sidecar to each operand deployment that proxies the localhost metrics over HTTPS, along with a Service (annotated for OpenShift serving cert) and ServiceMonitor for automatic Prometheus scraping. Resolves: OCPBUGS-58102 Assisted with Claude
1 parent 847c425 commit 3f8cd03

15 files changed

Lines changed: 1158 additions & 29 deletions

bundle/manifests/external-dns-operator.clusterserviceversion.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ spec:
470470
- --operator-namespace=$(OPERATOR_NAMESPACE)
471471
- --operand-namespace=$(OPERATOR_NAMESPACE)
472472
- --externaldns-image=$(RELATED_IMAGE_EXTERNAL_DNS)
473+
- --kube-rbac-proxy-image=$(RELATED_IMAGE_KUBE_RBAC_PROXY)
473474
- --trusted-ca-configmap=$(TRUSTED_CA_CONFIGMAP_NAME)
474475
- --leader-elect
475476
- --webhook-disable-http2
@@ -480,6 +481,8 @@ spec:
480481
fieldPath: metadata.namespace
481482
- name: RELATED_IMAGE_EXTERNAL_DNS
482483
value: quay.io/external-dns-operator/external-dns:latest
484+
- name: RELATED_IMAGE_KUBE_RBAC_PROXY
485+
value: quay.io/openshift/origin-kube-rbac-proxy:latest
483486
- name: TRUSTED_CA_CONFIGMAP_NAME
484487
image: quay.io/openshift/origin-external-dns-operator:latest
485488
name: external-dns-operator

config/rbac/operand_role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,15 @@ rules:
3131
- get
3232
- watch
3333
- list
34+
- apiGroups:
35+
- authentication.k8s.io
36+
resources:
37+
- tokenreviews
38+
verbs:
39+
- create
40+
- apiGroups:
41+
- authorization.k8s.io
42+
resources:
43+
- subjectaccessreviews
44+
verbs:
45+
- create

main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ func main() {
4242
flag.StringVar(&opCfg.OperatorNamespace, "operator-namespace", operatorconfig.DefaultOperatorNamespace, "The namespace that the operator is running in.")
4343
flag.StringVar(&opCfg.OperandNamespace, "operand-namespace", operatorconfig.DefaultOperandNamespace, "The namespace that ExternalDNS containers should run in.")
4444
flag.StringVar(&opCfg.ExternalDNSImage, "externaldns-image", operatorconfig.DefaultExternalDNSImage, "The container image used for running ExternalDNS.")
45+
flag.StringVar(&opCfg.KubeRBACProxyImage, "kube-rbac-proxy-image", operatorconfig.DefaultKubeRBACProxyImage, "The container image used for the kube-rbac-proxy metrics sidecar.")
4546
flag.StringVar(&opCfg.CertDir, "cert-dir", operatorconfig.DefaultCertDir, "The directory for keys and certificates for serving the webhook.")
4647
flag.StringVar(&opCfg.TrustedCAConfigMapName, "trusted-ca-configmap", operatorconfig.DefaultTrustedCAConfigMapName, "The name of the config map containing TLS CA(s) which should be trusted by ExternalDNS containers. PEM encoded file under \"ca-bundle.crt\" key is expected.")
4748
flag.BoolVar(&opCfg.EnableWebhook, "enable-webhook", operatorconfig.DefaultEnableWebhook, "Enable the validating webhook server. Defaults to true.")

pkg/operator/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
const (
3737
DefaultExternalDNSImage = "quay.io/external-dns-operator/external-dns:latest"
38+
DefaultKubeRBACProxyImage = "quay.io/openshift/origin-kube-rbac-proxy:latest"
3839
DefaultMetricsAddr = "127.0.0.1:8080"
3940
DefaultOperatorNamespace = "external-dns-operator"
4041
DefaultOperandNamespace = "external-dns"
@@ -59,6 +60,10 @@ type Config struct {
5960
// by the operator.
6061
ExternalDNSImage string
6162

63+
// KubeRBACProxyImage is the kube-rbac-proxy image for the metrics sidecar container
64+
// in the ExternalDNS operand deployment.
65+
KubeRBACProxyImage string
66+
6267
// MetricsBindAddress is the TCP address that the operator should bind to for
6368
// serving prometheus metrics. It can be set to "0" to disable the metrics serving.
6469
MetricsBindAddress string

pkg/operator/controller/externaldns/controller.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ type Config struct {
5050
Namespace string
5151
// Image is the ExternalDNS image to use.
5252
Image string
53+
// KubeRBACProxyImage is the kube-rbac-proxy image for the metrics sidecar.
54+
KubeRBACProxyImage string
5355
// OperatorNamespace is the namespace in which this operator is deployed.
5456
OperatorNamespace string
5557
// IsOpenShift is the flag which instructs the operator that it runs in OpenShift.
@@ -102,6 +104,10 @@ func New(mgr manager.Manager, cfg Config) (controller.Controller, error) {
102104
return nil, err
103105
}
104106

107+
if err := c.Watch(source.Kind[client.Object](operatorCache, &corev1.Service{}, handler.EnqueueRequestForOwner(operatorScheme, operatorRESTMapper, &operatorv1beta1.ExternalDNS{}, handler.OnlyControllerOwner()))); err != nil {
108+
return nil, err
109+
}
110+
105111
// secret replicated by the credentials controller
106112
// needs to trigger the reconciliation of the corresponding ExternalDNS
107113
// because of the annotation with the secret's hash in the operand deployment
@@ -207,11 +213,33 @@ func (r *reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
207213
trustCAConfigMap = configMap
208214
}
209215

210-
_, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, sa, credSecret, trustCAConfigMap, externalDNS)
216+
_, currentDeployment, err := r.ensureExternalDNSDeployment(ctx, r.config.Namespace, r.config.Image, r.config.KubeRBACProxyImage, sa, credSecret, trustCAConfigMap, externalDNS)
211217
if err != nil {
212218
return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS deployment: %w", err)
213219
}
214220

221+
// Ensure metrics service and service monitor for Prometheus scraping.
222+
// If kube-rbac-proxy image is not configured, clean up any existing metrics resources.
223+
if r.config.KubeRBACProxyImage != "" {
224+
if err := r.ensureExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil {
225+
return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS metrics service: %w", err)
226+
}
227+
if r.config.IsOpenShift {
228+
if err := r.ensureExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil {
229+
return reconcile.Result{}, fmt.Errorf("failed to ensure externalDNS service monitor: %w", err)
230+
}
231+
}
232+
} else {
233+
if err := r.deleteExternalDNSMetricsService(ctx, r.config.Namespace, externalDNS); err != nil {
234+
return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS metrics service: %w", err)
235+
}
236+
if r.config.IsOpenShift {
237+
if err := r.deleteExternalDNSServiceMonitor(ctx, r.config.Namespace, externalDNS); err != nil {
238+
return reconcile.Result{}, fmt.Errorf("failed to delete externalDNS service monitor: %w", err)
239+
}
240+
}
241+
}
242+
215243
if err := r.updateExternalDNSStatus(ctx, externalDNS, currentDeployment, true); err != nil {
216244
return reconcile.Result{}, fmt.Errorf("failed to update externalDNS custom resource %s: %w", externalDNS.Name, err)
217245
}

pkg/operator/controller/externaldns/deployment.go

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ var sourceStringTable = map[operatorv1beta1.ExternalDNSSourceType]string{
7777
type deploymentConfig struct {
7878
namespace string
7979
image string
80+
kubeRBACProxyImage string
8081
serviceAccount *corev1.ServiceAccount
8182
externalDNS *operatorv1beta1.ExternalDNS
8283
isOpenShift bool
@@ -89,7 +90,7 @@ type deploymentConfig struct {
8990

9091
// ensureExternalDNSDeployment ensures that the externalDNS deployment exists.
9192
// Returns a Boolean value indicating whether the deployment exists, a pointer to the deployment, and an error when relevant.
92-
func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace, image string, serviceAccount *corev1.ServiceAccount, credSecret *corev1.Secret, trustCAConfigMap *corev1.ConfigMap, externalDNS *operatorv1beta1.ExternalDNS) (bool, *appsv1.Deployment, error) {
93+
func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace, image, kubeRBACProxyImage string, serviceAccount *corev1.ServiceAccount, credSecret *corev1.Secret, trustCAConfigMap *corev1.ConfigMap, externalDNS *operatorv1beta1.ExternalDNS) (bool, *appsv1.Deployment, error) {
9394
nsName := types.NamespacedName{Namespace: namespace, Name: controller.ExternalDNSResourceName(externalDNS)}
9495

9596
// build credentials secret's hash
@@ -109,16 +110,17 @@ func (r *reconciler) ensureExternalDNSDeployment(ctx context.Context, namespace,
109110
}
110111

111112
desired, err := desiredExternalDNSDeployment(&deploymentConfig{
112-
namespace,
113-
image,
114-
serviceAccount,
115-
externalDNS,
116-
r.config.IsOpenShift,
117-
r.config.PlatformStatus,
118-
credSecret.Name,
119-
credSecretHash,
120-
trustCAConfigMapName,
121-
trustCAConfigMapHash,
113+
namespace: namespace,
114+
image: image,
115+
kubeRBACProxyImage: kubeRBACProxyImage,
116+
serviceAccount: serviceAccount,
117+
externalDNS: externalDNS,
118+
isOpenShift: r.config.IsOpenShift,
119+
platformStatus: r.config.PlatformStatus,
120+
secret: credSecret.Name,
121+
secretHash: credSecretHash,
122+
trustedCAConfigMapName: trustCAConfigMapName,
123+
trustedCAConfigMapHash: trustCAConfigMapHash,
122124
})
123125
if err != nil {
124126
return false, nil, fmt.Errorf("failed to build externalDNS deployment: %w", err)
@@ -296,6 +298,17 @@ func desiredExternalDNSDeployment(cfg *deploymentConfig) (*appsv1.Deployment, er
296298
depl.Spec.Template.Spec.Containers = append(depl.Spec.Template.Spec.Containers, *container)
297299
}
298300
}
301+
// Add kube-rbac-proxy sidecar(s) and metrics cert volume for secure metrics exposure.
302+
// One sidecar per zone container, each proxying the corresponding metrics port.
303+
if cfg.kubeRBACProxyImage != "" {
304+
for i := 0; i < cbld.counter; i++ {
305+
proxyContainer := kubeRBACProxyContainer(cfg.kubeRBACProxyImage, i)
306+
depl.Spec.Template.Spec.Containers = append(depl.Spec.Template.Spec.Containers, proxyContainer)
307+
}
308+
certVolume := metricsCertVolume(controller.ExternalDNSMetricsSecretName(cfg.externalDNS))
309+
depl.Spec.Template.Spec.Volumes = append(depl.Spec.Template.Spec.Volumes, certVolume)
310+
}
311+
299312
return depl, nil
300313
}
301314

@@ -418,6 +431,10 @@ func externalDNSContainersChanged(current, expected, updated *appsv1.Deployment)
418431
updated.Spec.Template.Spec.Containers[currCont.Index].SecurityContext = updatedContext
419432
changed = true
420433
}
434+
if !equalContainerPorts(currCont.Ports, expCont.Ports) {
435+
updated.Spec.Template.Spec.Containers[currCont.Index].Ports = expCont.Ports
436+
changed = true
437+
}
421438
} else {
422439
// expected container is not present - add it
423440
updated.Spec.Template.Spec.Containers = append(updated.Spec.Template.Spec.Containers, expCont.Container)
@@ -701,6 +718,27 @@ func securityContextChanged(current, updated, desired *corev1.SecurityContext) (
701718
return changed, updated
702719
}
703720

721+
// equalContainerPorts returns true if 2 container port slices have the same content.
722+
func equalContainerPorts(current, expected []corev1.ContainerPort) bool {
723+
if len(current) != len(expected) {
724+
return false
725+
}
726+
currentMap := map[string]corev1.ContainerPort{}
727+
for _, p := range current {
728+
currentMap[p.Name] = p
729+
}
730+
for _, ep := range expected {
731+
cp, found := currentMap[ep.Name]
732+
if !found {
733+
return false
734+
}
735+
if cp.ContainerPort != ep.ContainerPort || cp.Protocol != ep.Protocol {
736+
return false
737+
}
738+
}
739+
return true
740+
}
741+
704742
func equalBoolPtr(current, desired *bool) bool {
705743
if desired == nil {
706744
return true

pkg/operator/controller/externaldns/deployment_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3845,15 +3845,15 @@ func TestDesiredExternalDNSDeployment(t *testing.T) {
38453845
}
38463846
}()
38473847
depl, err := desiredExternalDNSDeployment(&deploymentConfig{
3848-
test.OperandNamespace,
3849-
test.OperandImage,
3850-
serviceAccount,
3851-
tc.inputExternalDNS,
3852-
tc.inputIsOpenShift,
3853-
tc.inputPlatformStatus,
3854-
tc.inputSecretName,
3855-
testSecretHash,
3856-
tc.inputTrustedCAConfigMapName, "",
3848+
namespace: test.OperandNamespace,
3849+
image: test.OperandImage,
3850+
serviceAccount: serviceAccount,
3851+
externalDNS: tc.inputExternalDNS,
3852+
isOpenShift: tc.inputIsOpenShift,
3853+
platformStatus: tc.inputPlatformStatus,
3854+
secret: tc.inputSecretName,
3855+
secretHash: testSecretHash,
3856+
trustedCAConfigMapName: tc.inputTrustedCAConfigMapName,
38573857
})
38583858
if err != nil {
38593859
t.Errorf("expected no error from calling desiredExternalDNSDeployment, but received %v", err)
@@ -5999,7 +5999,7 @@ func TestEnsureExternalDNSDeployment(t *testing.T) {
59995999
log: zap.New(zap.UseDevMode(true)),
60006000
}
60016001

6002-
gotExist, gotDepl, err := r.ensureExternalDNSDeployment(context.TODO(), test.OperandNamespace, test.OperandImage, serviceAccount, tc.credSecret, tc.trustCAConfigMap, &tc.extDNS)
6002+
gotExist, gotDepl, err := r.ensureExternalDNSDeployment(context.TODO(), test.OperandNamespace, test.OperandImage, "", serviceAccount, tc.credSecret, tc.trustCAConfigMap, &tc.extDNS)
60036003
if err != nil {
60046004
if !tc.errExpected {
60056005
t.Fatalf("unexpected error received: %v", err)

pkg/operator/controller/externaldns/pod.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"strings"
2525

2626
corev1 "k8s.io/api/core/v1"
27+
"k8s.io/apimachinery/pkg/api/resource"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/utils/ptr"
2930

@@ -57,6 +58,16 @@ const (
5758
// all capabilities in the container security context
5859
allCapabilities = "ALL"
5960
//
61+
// kube-rbac-proxy metrics sidecar
62+
//
63+
kubeRBACProxyContainerName = "kube-rbac-proxy"
64+
kubeRBACProxySecurePort = 8443
65+
kubeRBACProxyPortName = "https"
66+
metricsCertVolumeName = "metrics-cert"
67+
metricsCertMountPath = "/var/run/secrets/serving-cert"
68+
metricsCertTLSCertFile = metricsCertMountPath + "/tls.crt"
69+
metricsCertTLSKeyFile = metricsCertMountPath + "/tls.key"
70+
//
6071
// AWS
6172
//
6273
awsCredentialEnvVarName = "AWS_SHARED_CREDENTIALS_FILE"
@@ -686,3 +697,94 @@ func (b *externalDNSVolumeBuilder) bluecatVolumes() []corev1.Volume {
686697
func addTXTPrefixFlag(args []string) []string {
687698
return append(args, fmt.Sprintf("--txt-prefix=%s", defaultTXTRecordPrefix))
688699
}
700+
701+
// numMetricsPorts returns the number of metrics ports needed for the given ExternalDNS instance.
702+
// This mirrors the zone container creation logic in desiredExternalDNSDeployment.
703+
func numMetricsPorts(externalDNS *operatorv1beta1.ExternalDNS) int {
704+
if len(externalDNS.Spec.Zones) == 0 {
705+
if externalDNS.Spec.Provider.Type == operatorv1beta1.ProviderTypeAzure {
706+
return 2
707+
}
708+
return 1
709+
}
710+
return len(externalDNS.Spec.Zones)
711+
}
712+
713+
// kubeRBACProxyPortNameForSeq returns the port name for the kube-rbac-proxy sidecar
714+
// at the given sequence index.
715+
func kubeRBACProxyPortNameForSeq(seq int) string {
716+
if seq == 0 {
717+
return kubeRBACProxyPortName
718+
}
719+
return fmt.Sprintf("%s-%d", kubeRBACProxyPortName, seq)
720+
}
721+
722+
// kubeRBACProxyContainer returns the kube-rbac-proxy sidecar container definition
723+
// that proxies metrics from the ExternalDNS container's localhost metrics port at the given sequence.
724+
func kubeRBACProxyContainer(image string, seq int) corev1.Container {
725+
securePort := kubeRBACProxySecurePort + seq
726+
upstreamPort := defaultMetricsStartPort + seq
727+
portName := kubeRBACProxyPortNameForSeq(seq)
728+
containerName := kubeRBACProxyContainerName
729+
if seq > 0 {
730+
containerName = fmt.Sprintf("%s-%d", kubeRBACProxyContainerName, seq)
731+
}
732+
return corev1.Container{
733+
Name: containerName,
734+
Image: image,
735+
Args: []string{
736+
fmt.Sprintf("--secure-listen-address=0.0.0.0:%d", securePort),
737+
fmt.Sprintf("--upstream=http://127.0.0.1:%d/", upstreamPort),
738+
"--logtostderr=true",
739+
"--v=10",
740+
fmt.Sprintf("--tls-cert-file=%s", metricsCertTLSCertFile),
741+
fmt.Sprintf("--tls-private-key-file=%s", metricsCertTLSKeyFile),
742+
"--http2-disable",
743+
},
744+
Ports: []corev1.ContainerPort{
745+
{
746+
Name: portName,
747+
ContainerPort: int32(securePort),
748+
Protocol: corev1.ProtocolTCP,
749+
},
750+
},
751+
Resources: corev1.ResourceRequirements{
752+
Requests: corev1.ResourceList{
753+
corev1.ResourceCPU: resource.MustParse("100m"),
754+
corev1.ResourceMemory: resource.MustParse("20Mi"),
755+
},
756+
},
757+
VolumeMounts: []corev1.VolumeMount{
758+
{
759+
Name: metricsCertVolumeName,
760+
MountPath: metricsCertMountPath,
761+
ReadOnly: true,
762+
},
763+
},
764+
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
765+
SecurityContext: &corev1.SecurityContext{
766+
Capabilities: &corev1.Capabilities{
767+
Drop: []corev1.Capability{allCapabilities},
768+
},
769+
Privileged: ptr.To[bool](false),
770+
RunAsNonRoot: ptr.To[bool](true),
771+
AllowPrivilegeEscalation: ptr.To[bool](false),
772+
SeccompProfile: &corev1.SeccompProfile{
773+
Type: corev1.SeccompProfileTypeRuntimeDefault,
774+
},
775+
},
776+
}
777+
}
778+
779+
// metricsCertVolume returns the volume for the metrics serving certificate secret.
780+
func metricsCertVolume(secretName string) corev1.Volume {
781+
return corev1.Volume{
782+
Name: metricsCertVolumeName,
783+
VolumeSource: corev1.VolumeSource{
784+
Secret: &corev1.SecretVolumeSource{
785+
SecretName: secretName,
786+
},
787+
},
788+
}
789+
}
790+

0 commit comments

Comments
 (0)