Skip to content

Commit 0ef71e3

Browse files
Fixed Otel Controller - Auxiliares resources deletion (open-telemetry#2575)
* Fixed HPA deletion Signed-off-by: Yuri Sa <[email protected]> * Fix e2e tests Signed-off-by: Yuri Sa <[email protected]> * Added reconciliation test Signed-off-by: Yuri Sa <[email protected]> * Added owned objects function Signed-off-by: Yuri Sa <[email protected]> * Fixed controller test Signed-off-by: Yuri Sa <[email protected]> * Fixed controller test Signed-off-by: Yuri Sa <[email protected]> * Fixed controller test Signed-off-by: Yuri Sa <[email protected]> * Add PodDisruptionBudget Signed-off-by: Yuri Sa <[email protected]> * Change vars setting Signed-off-by: Yuri Sa <[email protected]> * Change vars setting Signed-off-by: Yuri Sa <[email protected]> * Update e2e tests to chainsaw Signed-off-by: Yuri Sa <[email protected]> * Added ingress e2e tests Signed-off-by: Yuri Sa <[email protected]> * Added Volumes, changed function's place Signed-off-by: Yuri Sa <[email protected]> * Added Volumes, changed function's place Signed-off-by: Yuri Sa <[email protected]> * Added Volumes, changed function's place Signed-off-by: Yuri Sa <[email protected]> * Fixed Suite Test Signed-off-by: Yuri Sa <[email protected]> * Added RBAC for Volumes Signed-off-by: Yuri Sa <[email protected]> * Added RBAC for Volumes Signed-off-by: Yuri Sa <[email protected]> * Fixed Suite Test Signed-off-by: Yuri Sa <[email protected]> * Fixed Suite Test Signed-off-by: Yuri Sa <[email protected]> * Fixed Suite Test Signed-off-by: Yuri Sa <[email protected]> * Add a sleep * Fixed Chainsaw test Signed-off-by: Yuri Sa <[email protected]> * Fixed Chainsaw test Signed-off-by: Yuri Sa <[email protected]> * Fixed Chainsaw test Signed-off-by: Yuri Sa <[email protected]> * Fixed Chainsaw test Signed-off-by: Yuri Sa <[email protected]> --------- Signed-off-by: Yuri Sa <[email protected]> Co-authored-by: Jacob Aronoff <[email protected]>
1 parent aa994e8 commit 0ef71e3

25 files changed

+416
-41
lines changed

.chloggen/fix-hpa-delete.yaml

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
2+
change_type: 'bug_fix'
3+
4+
# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
5+
component: operator
6+
7+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
8+
note: Fixed HPA deletion
9+
10+
# One or more tracking issues related to the change
11+
issues: [2568, 2587, 2651]
12+
13+
# (Optional) One or more lines of additional information to render under the primary note.
14+
# These lines will be padded with 2 spaces and then inserted directly into the document.
15+
# Use pipe (|) for multiline entries.
16+
subtext:

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ endif
6060

6161
START_KIND_CLUSTER ?= true
6262

63-
KUBE_VERSION ?= 1.24
63+
KUBE_VERSION ?= 1.29
6464
KIND_CONFIG ?= kind-$(KUBE_VERSION).yaml
6565
KIND_CLUSTER_NAME ?= "otel-operator"
6666

@@ -367,7 +367,7 @@ KUSTOMIZE_VERSION ?= v5.0.3
367367
CONTROLLER_TOOLS_VERSION ?= v0.12.0
368368
GOLANGCI_LINT_VERSION ?= v1.54.0
369369
KIND_VERSION ?= v0.20.0
370-
CHAINSAW_VERSION ?= v0.1.6
370+
CHAINSAW_VERSION ?= v0.1.7
371371

372372
.PHONY: install-tools
373373
install-tools: kustomize golangci-lint kind controller-gen envtest crdoc kind operator-sdk chainsaw

bundle/manifests/opentelemetry-operator.clusterserviceversion.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ metadata:
6565
categories: Logging & Tracing,Monitoring
6666
certified: "false"
6767
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
68-
createdAt: "2024-02-15T18:18:46Z"
68+
createdAt: "2024-02-27T11:16:56Z"
6969
description: Provides the OpenTelemetry components, including the Collector
7070
operators.operatorframework.io/builder: operator-sdk-v1.29.0
7171
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
@@ -175,6 +175,8 @@ spec:
175175
- ""
176176
resources:
177177
- configmaps
178+
- persistentvolumeclaims
179+
- persistentvolumes
178180
- pods
179181
- serviceaccounts
180182
- services

config/rbac/role.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ rules:
88
- ""
99
resources:
1010
- configmaps
11+
- persistentvolumeclaims
12+
- persistentvolumes
1113
- pods
1214
- serviceaccounts
1315
- services

controllers/builder_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,9 @@ service:
590590
"app.kubernetes.io/instance": "test.test",
591591
"app.kubernetes.io/managed-by": "opentelemetry-operator",
592592
"app.kubernetes.io/name": "test-ingress",
593+
"app.kubernetes.io/component": "opentelemetry-collector",
594+
"app.kubernetes.io/part-of": "opentelemetry",
595+
"app.kubernetes.io/version": "latest",
593596
},
594597
Annotations: map[string]string{
595598
"something": "true",

controllers/common.go

+22-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
rbacv1 "k8s.io/api/rbac/v1"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525
"k8s.io/apimachinery/pkg/runtime"
26+
"k8s.io/apimachinery/pkg/types"
2627
"k8s.io/client-go/util/retry"
2728
ctrl "sigs.k8s.io/controller-runtime"
2829
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -77,7 +78,7 @@ func BuildOpAMPBridge(params manifests.Params) ([]client.Object, error) {
7778
}
7879

7980
// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
80-
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
81+
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects []client.Object, ownedObjects map[types.UID]client.Object) error {
8182
var errs []error
8283
for _, desired := range desiredObjects {
8384
l := logger.WithValues(
@@ -91,7 +92,6 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
9192
continue
9293
}
9394
}
94-
9595
// existing is an object the controller runtime will hydrate for us
9696
// we obtain the existing object by deep copying the desired object because it's the most convenient way
9797
existing := desired.DeepCopyObject().(client.Object)
@@ -116,9 +116,29 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
116116
}
117117

118118
l.V(1).Info(fmt.Sprintf("desired has been %s", op))
119+
// This object is still managed by the operator, remove it from the list of objects to prune
120+
delete(ownedObjects, existing.GetUID())
119121
}
120122
if len(errs) > 0 {
121123
return fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
122124
}
125+
// Pruning owned objects in the cluster which are not should not be present after the reconciliation.
126+
pruneErrs := []error{}
127+
for _, obj := range ownedObjects {
128+
l := logger.WithValues(
129+
"object_name", obj.GetName(),
130+
"object_kind", obj.GetObjectKind().GroupVersionKind(),
131+
)
132+
133+
l.Info("pruning unmanaged resource")
134+
err := kubeClient.Delete(ctx, obj)
135+
if err != nil {
136+
l.Error(err, "failed to delete resource")
137+
pruneErrs = append(pruneErrs, err)
138+
}
139+
}
140+
if len(pruneErrs) > 0 {
141+
return fmt.Errorf("failed to prune objects for %s: %w", owner.GetName(), errors.Join(pruneErrs...))
142+
}
123143
return nil
124144
}

controllers/opampbridge_controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ func (r *OpAMPBridgeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
103103
if buildErr != nil {
104104
return ctrl.Result{}, buildErr
105105
}
106-
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects...)
106+
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects, nil)
107107
return opampbridgeStatus.HandleReconcileStatus(ctx, log, params, err)
108108
}
109109

controllers/opentelemetrycollector_controller.go

+87-4
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,32 @@ package controllers
1717

1818
import (
1919
"context"
20+
"fmt"
2021

2122
"github.com/go-logr/logr"
23+
routev1 "github.com/openshift/api/route/v1"
2224
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2325
appsv1 "k8s.io/api/apps/v1"
2426
autoscalingv2 "k8s.io/api/autoscaling/v2"
2527
corev1 "k8s.io/api/core/v1"
28+
networkingv1 "k8s.io/api/networking/v1"
2629
policyV1 "k8s.io/api/policy/v1"
2730
rbacv1 "k8s.io/api/rbac/v1"
2831
apierrors "k8s.io/apimachinery/pkg/api/errors"
32+
"k8s.io/apimachinery/pkg/labels"
2933
"k8s.io/apimachinery/pkg/runtime"
34+
"k8s.io/apimachinery/pkg/types"
3035
"k8s.io/client-go/tools/record"
3136
ctrl "sigs.k8s.io/controller-runtime"
3237
"sigs.k8s.io/controller-runtime/pkg/client"
3338

3439
"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
3540
"github.com/open-telemetry/opentelemetry-operator/internal/api/convert"
41+
"github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
3642
"github.com/open-telemetry/opentelemetry-operator/internal/config"
3743
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
44+
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector"
45+
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
3846
collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector"
3947
"github.com/open-telemetry/opentelemetry-operator/pkg/featuregate"
4048
)
@@ -57,6 +65,71 @@ type Params struct {
5765
Config config.Config
5866
}
5967

68+
func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) {
69+
ownedObjects := map[types.UID]client.Object{}
70+
71+
listOps := &client.ListOptions{
72+
Namespace: params.OtelCol.Namespace,
73+
LabelSelector: labels.SelectorFromSet(manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)),
74+
}
75+
hpaList := &autoscalingv2.HorizontalPodAutoscalerList{}
76+
err := r.List(ctx, hpaList, listOps)
77+
if err != nil {
78+
return nil, fmt.Errorf("error listing HorizontalPodAutoscalers: %w", err)
79+
}
80+
for i := range hpaList.Items {
81+
ownedObjects[hpaList.Items[i].GetUID()] = &hpaList.Items[i]
82+
}
83+
if featuregate.PrometheusOperatorIsAvailable.IsEnabled() {
84+
servicemonitorList := &monitoringv1.ServiceMonitorList{}
85+
err = r.List(ctx, servicemonitorList, listOps)
86+
if err != nil {
87+
return nil, fmt.Errorf("error listing ServiceMonitors: %w", err)
88+
}
89+
for i := range servicemonitorList.Items {
90+
ownedObjects[servicemonitorList.Items[i].GetUID()] = servicemonitorList.Items[i]
91+
}
92+
93+
podMonitorList := &monitoringv1.PodMonitorList{}
94+
err = r.List(ctx, podMonitorList, listOps)
95+
if err != nil {
96+
return nil, fmt.Errorf("error listing PodMonitors: %w", err)
97+
}
98+
for i := range podMonitorList.Items {
99+
ownedObjects[podMonitorList.Items[i].GetUID()] = podMonitorList.Items[i]
100+
}
101+
}
102+
ingressList := &networkingv1.IngressList{}
103+
err = r.List(ctx, ingressList, listOps)
104+
if err != nil {
105+
return nil, fmt.Errorf("error listing Ingresses: %w", err)
106+
}
107+
for i := range ingressList.Items {
108+
ownedObjects[ingressList.Items[i].GetUID()] = &ingressList.Items[i]
109+
}
110+
111+
if params.Config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
112+
routesList := &routev1.RouteList{}
113+
err = r.List(ctx, routesList, listOps)
114+
if err != nil {
115+
return nil, fmt.Errorf("error listing Routes: %w", err)
116+
}
117+
for i := range routesList.Items {
118+
ownedObjects[routesList.Items[i].GetUID()] = &routesList.Items[i]
119+
}
120+
}
121+
pdbList := &policyV1.PodDisruptionBudgetList{}
122+
err = r.List(ctx, pdbList, listOps)
123+
if err != nil {
124+
return nil, fmt.Errorf("error listing PodDisruptionBudgets: %w", err)
125+
}
126+
for i := range pdbList.Items {
127+
ownedObjects[pdbList.Items[i].GetUID()] = &pdbList.Items[i]
128+
}
129+
130+
return ownedObjects, nil
131+
}
132+
60133
func (r *OpenTelemetryCollectorReconciler) getParams(instance v1alpha1.OpenTelemetryCollector) (manifests.Params, error) {
61134
otelCol, err := convert.V1Alpha1to2(instance)
62135
if err != nil {
@@ -84,7 +157,7 @@ func NewReconciler(p Params) *OpenTelemetryCollectorReconciler {
84157
return r
85158
}
86159

87-
// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts,verbs=get;list;watch;create;update;patch;delete
160+
// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts;persistentvolumeclaims;persistentvolumes,verbs=get;list;watch;create;update;patch;delete
88161
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
89162
// +kubebuilder:rbac:groups=apps,resources=daemonsets;deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete
90163
// +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete
@@ -134,9 +207,13 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct
134207
if buildErr != nil {
135208
return ctrl.Result{}, buildErr
136209
}
137-
// TODO: https://github.com/open-telemetry/opentelemetry-operator/issues/2620
138-
// TODO: Change &instance to use params.OtelCol
139-
err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects...)
210+
211+
ownedObjects, err := r.findOtelOwnedObjects(ctx, params)
212+
if err != nil {
213+
return ctrl.Result{}, err
214+
}
215+
216+
err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects, ownedObjects)
140217
return collectorStatus.HandleReconcileStatus(ctx, log, params, instance, err)
141218
}
142219

@@ -150,6 +227,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
150227
Owns(&appsv1.Deployment{}).
151228
Owns(&appsv1.DaemonSet{}).
152229
Owns(&appsv1.StatefulSet{}).
230+
Owns(&corev1.PersistentVolume{}).
231+
Owns(&corev1.PersistentVolumeClaim{}).
232+
Owns(&networkingv1.Ingress{}).
153233
Owns(&autoscalingv2.HorizontalPodAutoscaler{}).
154234
Owns(&policyV1.PodDisruptionBudget{})
155235

@@ -162,6 +242,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
162242
builder.Owns(&monitoringv1.ServiceMonitor{})
163243
builder.Owns(&monitoringv1.PodMonitor{})
164244
}
245+
if r.config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
246+
builder.Owns(&routev1.Route{})
247+
}
165248

166249
return builder.Complete(r)
167250
}

controllers/suite_test.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ import (
2626
"time"
2727

2828
routev1 "github.com/openshift/api/route/v1"
29+
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
2930
"github.com/stretchr/testify/assert"
3031
v1 "k8s.io/api/core/v1"
32+
networkingv1 "k8s.io/api/networking/v1"
3133
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
3234
"k8s.io/apimachinery/pkg/api/errors"
3335
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -104,7 +106,7 @@ func TestMain(m *testing.M) {
104106

105107
testEnv = &envtest.Environment{
106108
CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
107-
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD},
109+
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD, testdata.ServiceMonitorCRD, testdata.PodMonitorCRD},
108110
WebhookInstallOptions: envtest.WebhookInstallOptions{
109111
Paths: []string{filepath.Join("..", "config", "webhook")},
110112
},
@@ -114,12 +116,18 @@ func TestMain(m *testing.M) {
114116
fmt.Printf("failed to start testEnv: %v", err)
115117
os.Exit(1)
116118
}
117-
119+
if err = monitoringv1.AddToScheme(testScheme); err != nil {
120+
fmt.Printf("failed to register scheme: %v", err)
121+
os.Exit(1)
122+
}
123+
if err = networkingv1.AddToScheme(testScheme); err != nil {
124+
fmt.Printf("failed to register scheme: %v", err)
125+
os.Exit(1)
126+
}
118127
if err = routev1.AddToScheme(testScheme); err != nil {
119128
fmt.Printf("failed to register scheme: %v", err)
120129
os.Exit(1)
121130
}
122-
123131
if err = v1alpha1.AddToScheme(testScheme); err != nil {
124132
fmt.Printf("failed to register scheme: %v", err)
125133
os.Exit(1)

internal/api/convert/v1alpha.go

+6
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ func V1Alpha1to2(in v1alpha1.OpenTelemetryCollector) (v1beta1.OpenTelemetryColle
5151
Pods: m.Pods,
5252
}
5353
}
54+
if copy.Spec.MaxReplicas != nil && copy.Spec.Autoscaler.MaxReplicas == nil {
55+
copy.Spec.Autoscaler.MaxReplicas = copy.Spec.MaxReplicas
56+
}
57+
if copy.Spec.MinReplicas != nil && copy.Spec.Autoscaler.MinReplicas == nil {
58+
copy.Spec.Autoscaler.MinReplicas = copy.Spec.MinReplicas
59+
}
5460
out.Spec.OpenTelemetryCommonFields.Autoscaler = &v1beta1.AutoscalerSpec{
5561
MinReplicas: copy.Spec.Autoscaler.MinReplicas,
5662
MaxReplicas: copy.Spec.Autoscaler.MaxReplicas,

internal/manifests/collector/horizontalpodautoscaler.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,22 @@ import (
1818
autoscalingv2 "k8s.io/api/autoscaling/v2"
1919
corev1 "k8s.io/api/core/v1"
2020
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
21-
"sigs.k8s.io/controller-runtime/pkg/client"
2221

2322
"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
2423
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
2524
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
2625
"github.com/open-telemetry/opentelemetry-operator/internal/naming"
2726
)
2827

29-
func HorizontalPodAutoscaler(params manifests.Params) (client.Object, error) {
28+
func HorizontalPodAutoscaler(params manifests.Params) (*autoscalingv2.HorizontalPodAutoscaler, error) {
3029
name := naming.Collector(params.OtelCol.Name)
3130
labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, params.Config.LabelsFilter())
3231
annotations, err := Annotations(params.OtelCol)
3332
if err != nil {
3433
return nil, err
3534
}
3635

37-
var result client.Object
36+
var result *autoscalingv2.HorizontalPodAutoscaler
3837

3938
objectMeta := metav1.ObjectMeta{
4039
Name: naming.HorizontalPodAutoscaler(params.OtelCol.Name),

internal/manifests/collector/horizontalpodautoscaler_test.go

+1-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919

2020
"github.com/stretchr/testify/assert"
2121
"github.com/stretchr/testify/require"
22-
autoscalingv2 "k8s.io/api/autoscaling/v2"
2322
corev1 "k8s.io/api/core/v1"
2423
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2524

@@ -95,11 +94,9 @@ func TestHPA(t *testing.T) {
9594
},
9695
Log: logger,
9796
}
98-
raw, err := HorizontalPodAutoscaler(params)
97+
hpa, err := HorizontalPodAutoscaler(params)
9998
require.NoError(t, err)
10099

101-
hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler)
102-
103100
// verify
104101
assert.Equal(t, "my-instance-collector", hpa.Name)
105102
assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])

0 commit comments

Comments
 (0)