Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Otel Controller - Auxiliares resources deletion #2575

Merged
merged 31 commits into from
Feb 28, 2024
Merged
Changes from 16 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5a31481
Fixed HPA deletion
yuriolisa Jan 29, 2024
bd9bd93
Fix e2e tests
yuriolisa Jan 29, 2024
10f82b0
Added reconciliation test
yuriolisa Feb 12, 2024
515a034
Changed from client.Object to HPA Obj
yuriolisa Feb 12, 2024
2ac2af7
Added owned objects function
yuriolisa Feb 12, 2024
a61de02
Fixed controller test
yuriolisa Feb 13, 2024
20c7bf6
Fixed controller test
yuriolisa Feb 13, 2024
29135cd
Fixed controller test
yuriolisa Feb 13, 2024
8f6159c
Add PodDisruptionBudget
yuriolisa Feb 14, 2024
dacc99a
Add PodDisruptionBudget
yuriolisa Feb 15, 2024
a0a30da
Change vars setting
yuriolisa Feb 16, 2024
1edd721
Change vars setting
yuriolisa Feb 16, 2024
29ad837
Change vars setting
yuriolisa Feb 19, 2024
2f1b576
Update e2e tests to chainsaw
yuriolisa Feb 20, 2024
9a669d0
Added ingress e2e tests
yuriolisa Feb 21, 2024
ebbb447
Added Volumes, changed function's place
yuriolisa Feb 26, 2024
55240ba
Added Volumes, changed function's place
yuriolisa Feb 26, 2024
4350a56
Added Volumes, changed function's place
yuriolisa Feb 27, 2024
1839504
Fixed Suite Test
yuriolisa Feb 27, 2024
88b30ef
Added RBAC for Volumes
yuriolisa Feb 27, 2024
7e1b010
Added RBAC for Volumes
yuriolisa Feb 27, 2024
18c8385
Fixed Suite Test
yuriolisa Feb 27, 2024
3e77e72
Fixed Suite Test
yuriolisa Feb 27, 2024
8bd21d5
Fixed Suite Test
yuriolisa Feb 27, 2024
2087366
Add a sleep
jaronoff97 Feb 27, 2024
d2a0269
Merge pull request #1 from jaronoff97/fix-hpa-delete
yuriolisa Feb 28, 2024
7d09dd0
Fixed Chainsaw test
yuriolisa Feb 28, 2024
42469b4
Fixed Chainsaw test
yuriolisa Feb 28, 2024
8af8038
Fixed Chainsaw test
yuriolisa Feb 28, 2024
1ffc7eb
Fixed Chainsaw test
yuriolisa Feb 28, 2024
ba7c3b2
Fixed Chainsaw test
yuriolisa Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/fix-hpa-delete.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
component: operator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed HPA deletion

# One or more tracking issues related to the change
issues: [2568, 2587, 2651]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
24 changes: 22 additions & 2 deletions controllers/common.go
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -77,7 +78,7 @@ func BuildOpAMPBridge(params manifests.Params) ([]client.Object, error) {
}

// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects []client.Object, ownedObjects map[types.UID]client.Object) error {
var errs []error
for _, desired := range desiredObjects {
l := logger.WithValues(
@@ -91,7 +92,6 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
continue
}
}

// existing is an object the controller runtime will hydrate for us
// we obtain the existing object by deep copying the desired object because it's the most convenient way
existing := desired.DeepCopyObject().(client.Object)
@@ -116,9 +116,29 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
}

l.V(1).Info(fmt.Sprintf("desired has been %s", op))
// This object is still managed by the operator, remove it from the list of objects to prune
delete(ownedObjects, existing.GetUID())
}
if len(errs) > 0 {
return fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
}
// Pruning owned objects in the cluster which are not should not be present after the reconciliation.
pruneErrs := []error{}
for _, obj := range ownedObjects {
l := logger.WithValues(
"object_name", obj.GetName(),
"object_kind", obj.GetObjectKind().GroupVersionKind(),
)

l.Info("pruning unmanaged resource")
err := kubeClient.Delete(ctx, obj)
if err != nil {
l.Error(err, "failed to delete resource")
pruneErrs = append(pruneErrs, err)
}
}
if len(pruneErrs) > 0 {
return fmt.Errorf("failed to prune objects for %s: %w", owner.GetName(), errors.Join(pruneErrs...))
}
return nil
}
2 changes: 1 addition & 1 deletion controllers/opampbridge_controller.go
Original file line number Diff line number Diff line change
@@ -103,7 +103,7 @@ func (r *OpAMPBridgeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if buildErr != nil {
return ctrl.Result{}, buildErr
}
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects...)
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects, nil)
return opampbridgeStatus.HandleReconcileStatus(ctx, log, params, err)
}

92 changes: 89 additions & 3 deletions controllers/opentelemetrycollector_controller.go
Original file line number Diff line number Diff line change
@@ -17,24 +17,33 @@ package controllers

import (
"context"
"fmt"

"github.com/go-logr/logr"
routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
policyV1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/api/convert"
"github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
"github.com/open-telemetry/opentelemetry-operator/internal/config"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
"github.com/open-telemetry/opentelemetry-operator/internal/naming"
collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector"
"github.com/open-telemetry/opentelemetry-operator/pkg/featuregate"
)
@@ -57,6 +66,73 @@ type Params struct {
Config config.Config
}

func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) {
ownedObjects := map[types.UID]client.Object{}

listOps := &client.ListOptions{
Namespace: params.OtelCol.Namespace,
LabelSelector: labels.SelectorFromSet(manifestutils.Labels(params.OtelCol.ObjectMeta, naming.Collector(params.OtelCol.Name), params.OtelCol.Spec.Image, collector.ComponentOpenTelemetryCollector, params.Config.LabelsFilter())),
}
hpaList := &autoscalingv2.HorizontalPodAutoscalerList{}
err := r.List(ctx, hpaList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing HorizontalPodAutoscalers: %w", err)
}
for i := range hpaList.Items {
ownedObjects[hpaList.Items[i].GetUID()] = &hpaList.Items[i]
}

if featuregate.PrometheusOperatorIsAvailable.IsEnabled() {
servicemonitorList := &monitoringv1.ServiceMonitorList{}
err = r.List(ctx, servicemonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing ServiceMonitors: %w", err)
}
for i := range servicemonitorList.Items {
ownedObjects[servicemonitorList.Items[i].GetUID()] = servicemonitorList.Items[i]
}

podMonitorList := &monitoringv1.PodMonitorList{}
err = r.List(ctx, podMonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodMonitors: %w", err)
}
for i := range podMonitorList.Items {
ownedObjects[podMonitorList.Items[i].GetUID()] = podMonitorList.Items[i]
}
}
ingressList := &networkingv1.IngressList{}
err = r.List(ctx, ingressList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Ingresses: %w", err)
}
for i := range ingressList.Items {
ownedObjects[ingressList.Items[i].GetUID()] = &ingressList.Items[i]
}

if params.Config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
routesList := &routev1.RouteList{}
err = r.List(ctx, routesList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Routes: %w", err)
}
for i := range routesList.Items {
ownedObjects[routesList.Items[i].GetUID()] = &routesList.Items[i]
}
}

pdbList := &policyV1.PodDisruptionBudgetList{}
err = r.List(ctx, pdbList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodDisruptionBudgets: %w", err)
}
for i := range pdbList.Items {
ownedObjects[pdbList.Items[i].GetUID()] = &pdbList.Items[i]
}

return ownedObjects, nil
}

func (r *OpenTelemetryCollectorReconciler) getParams(instance v1alpha1.OpenTelemetryCollector) (manifests.Params, error) {
otelCol, err := convert.V1Alpha1to2(instance)
if err != nil {
@@ -134,9 +210,13 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct
if buildErr != nil {
return ctrl.Result{}, buildErr
}
// TODO: https://github.com/open-telemetry/opentelemetry-operator/issues/2620
// TODO: Change &instance to use params.OtelCol
err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects...)

ownedObjects, err := r.findOtelOwnedObjects(ctx, params)
if err != nil {
return ctrl.Result{}, err
}

err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects, ownedObjects)
return collectorStatus.HandleReconcileStatus(ctx, log, params, instance, err)
}

@@ -150,6 +230,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
Owns(&appsv1.Deployment{}).
Owns(&appsv1.DaemonSet{}).
Owns(&appsv1.StatefulSet{}).
Owns(&corev1.PersistentVolume{}).
Owns(&corev1.PersistentVolumeClaim{}).
Owns(&networkingv1.Ingress{}).
Owns(&autoscalingv2.HorizontalPodAutoscaler{}).
Owns(&policyV1.PodDisruptionBudget{})

@@ -162,6 +245,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
builder.Owns(&monitoringv1.ServiceMonitor{})
builder.Owns(&monitoringv1.PodMonitor{})
}
if r.config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
builder.Owns(&routev1.Route{})
}

return builder.Complete(r)
}
6 changes: 6 additions & 0 deletions internal/api/convert/v1alpha.go
Original file line number Diff line number Diff line change
@@ -51,6 +51,12 @@ func V1Alpha1to2(in v1alpha1.OpenTelemetryCollector) (v1beta1.OpenTelemetryColle
Pods: m.Pods,
}
}
if copy.Spec.MaxReplicas != nil && copy.Spec.Autoscaler.MaxReplicas == nil {
copy.Spec.Autoscaler.MaxReplicas = copy.Spec.MaxReplicas
}
if copy.Spec.MinReplicas != nil && copy.Spec.Autoscaler.MinReplicas == nil {
copy.Spec.Autoscaler.MinReplicas = copy.Spec.MinReplicas
}
out.Spec.OpenTelemetryCommonFields.Autoscaler = &v1beta1.AutoscalerSpec{
MinReplicas: copy.Spec.Autoscaler.MinReplicas,
MaxReplicas: copy.Spec.Autoscaler.MaxReplicas,
5 changes: 2 additions & 3 deletions internal/manifests/collector/horizontalpodautoscaler.go
Original file line number Diff line number Diff line change
@@ -18,23 +18,22 @@ import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
"github.com/open-telemetry/opentelemetry-operator/internal/naming"
)

func HorizontalPodAutoscaler(params manifests.Params) (client.Object, error) {
func HorizontalPodAutoscaler(params manifests.Params) (*autoscalingv2.HorizontalPodAutoscaler, error) {
name := naming.Collector(params.OtelCol.Name)
labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, params.Config.LabelsFilter())
annotations, err := Annotations(params.OtelCol)
if err != nil {
return nil, err
}

var result client.Object
var result *autoscalingv2.HorizontalPodAutoscaler

objectMeta := metav1.ObjectMeta{
Name: naming.HorizontalPodAutoscaler(params.OtelCol.Name),
Original file line number Diff line number Diff line change
@@ -19,7 +19,6 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

@@ -95,11 +94,9 @@ func TestHPA(t *testing.T) {
},
Log: logger,
}
raw, err := HorizontalPodAutoscaler(params)
hpa, err := HorizontalPodAutoscaler(params)
require.NoError(t, err)

hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler)

// verify
assert.Equal(t, "my-instance-collector", hpa.Name)
assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])
28 changes: 28 additions & 0 deletions tests/e2e-autoscale/autoscale/04-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: simplest-collector
spec:
maxReplicas: 2
scaleTargetRef:
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
name: simplest
status:
currentMetrics: null
currentReplicas: 1
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: simplest-set-utilization-collector
spec:
maxReplicas: 2
scaleTargetRef:
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
name: simplest-set-utilization
status:
currentMetrics: null
currentReplicas: 1
74 changes: 74 additions & 0 deletions tests/e2e-autoscale/autoscale/04-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# This creates two different deployments:
# * The first one is to ensure an autoscaler is created without
# setting any metrics in the Spec.
# * The second is to check that scaling works properly and that
# spec updates are working as expected.
#
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
name: simplest
spec:
# TODO: these tests use .Spec.MaxReplicas and .Spec.MinReplicas. These fields are
# deprecated and moved to .Spec.Autoscaler. Fine to use these fields to test that old CRD is
# still supported but should eventually be updated.
minReplicas: 1
maxReplicas: 2
resources:
limits:
cpu: 500m
memory: 128Mi
requests:
cpu: 5m
memory: 64Mi

config: |
receivers:
otlp:
protocols:
grpc:
http:
processors:

exporters:
debug:

service:
pipelines:
traces:
receivers: [otlp]
processors: []
exporters: [debug]
---
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
name: simplest-set-utilization
spec:
minReplicas: 1
maxReplicas: 2
resources:
limits:
cpu: 250m
memory: 128Mi
requests:
cpu: 5m
memory: 64Mi

config: |
receivers:
otlp:
protocols:
grpc:
http:
processors:

exporters:
debug:

service:
pipelines:
traces:
receivers: [otlp]
processors: []
exporters: [debug]
7 changes: 7 additions & 0 deletions tests/e2e-autoscale/autoscale/chainsaw-test.yaml
Original file line number Diff line number Diff line change
@@ -33,3 +33,10 @@ spec:
name: telemetrygen-set-utilization
- assert:
file: 03-assert.yaml
# applying the same OpenTelemetryCollector from the 00-step, but this time without the spec.autoscaler, which should inform the controller to delete the HPAs resources. So, we should get an error checking if the HPAs still exist
- name: step-04
try:
- apply:
file: 04-install.yaml
- error:
file: 04-error.yaml
35 changes: 35 additions & 0 deletions tests/e2e/ingress/01-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
something.com: "true"
labels:
app.kubernetes.io/managed-by: opentelemetry-operator
app.kubernetes.io/name: simplest-ingress
name: simplest-ingress
ownerReferences:
- apiVersion: opentelemetry.io/v1alpha1
blockOwnerDeletion: true
controller: true
kind: OpenTelemetryCollector
name: simplest
spec:
rules:
- host: example.com
http:
paths:
- backend:
service:
name: simplest-collector
port:
name: otlp-grpc
path: /otlp-grpc
pathType: Prefix
- backend:
service:
name: simplest-collector
port:
name: otlp-http
path: /otlp-http
pathType: Prefix
23 changes: 23 additions & 0 deletions tests/e2e/ingress/01-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
name: simplest
spec:
mode: "deployment"
config: |
receivers:
otlp:
protocols:
grpc:
http:
exporters:
debug:
service:
pipelines:
traces:
receivers: [otlp]
processors: []
exporters: [debug]
19 changes: 13 additions & 6 deletions tests/e2e/ingress/chainsaw-test.yaml
Original file line number Diff line number Diff line change
@@ -6,9 +6,16 @@ metadata:
name: ingress
spec:
steps:
- name: step-00
try:
- apply:
file: 00-install.yaml
- assert:
file: 00-assert.yaml
- name: step-00
try:
- apply:
file: 00-install.yaml
- assert:
file: 00-assert.yaml
# Applying the same OpenTelemetryCollector from the previous step, but this time without the spec.ingress, which should inform the controller to delete the ingress resource. So, we should get an error checking if the ingress still exists.
- name: step-01
try:
- apply:
file: 01-install.yaml
- error:
file: 01-error.yaml
53 changes: 53 additions & 0 deletions tests/e2e/statefulset-features/02-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: stateful-collector
spec:
podManagementPolicy: Parallel
template:
spec:
containers:
- args:
- --config=/conf/collector.yaml
name: otc-container
volumeMounts:
- mountPath: /conf
name: otc-internal
- mountPath: /usr/share/testvolume
name: testvolume
volumes:
- configMap:
items:
- key: collector.yaml
path: collector.yaml
name: stateful-collector
name: otc-internal
- emptyDir: {}
name: testvolume
volumeClaimTemplates:
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: testvolume
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 2Gi
volumeMode: Filesystem
status:
replicas: 2
readyReplicas: 2
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: testvolume
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 2Gi
volumeMode: Filesystem
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ metadata:
name: stateful
spec:
mode: statefulset
replicas: 3
replicas: 2
volumes:
- name: testvolume
volumeMounts:
@@ -17,7 +17,7 @@ spec:
accessModes: [ "ReadWriteMany" ] # change accessMode to trigger statefulset recreation.
resources:
requests:
storage: 1Gi
storage: 2Gi
config: |
receivers:
jaeger:
30 changes: 18 additions & 12 deletions tests/e2e/statefulset-features/chainsaw-test.yaml
Original file line number Diff line number Diff line change
@@ -6,15 +6,21 @@ metadata:
name: statefulset-features
spec:
steps:
- name: step-00
try:
- apply:
file: 00-install.yaml
- assert:
file: 00-assert.yaml
- name: step-01
try:
- apply:
file: 01-update-volume-claim-templates.yaml
- assert:
file: 01-assert.yaml
- name: step-00
try:
- apply:
file: 00-install.yaml
- assert:
file: 00-assert.yaml
- name: step-01
try:
- apply:
file: 01-update-volume-claim-templates.yaml
- assert:
file: 01-assert.yaml
- name: step-02
try:
- apply:
file: 02-update-volume-claim-storage.yaml
- assert:
file: 02-assert.yaml