Skip to content

Commit 7a48b56

Browse files
authored
feat: Add support for PDBs on deployment and statefulset (#2141)
1 parent 3b10d4c commit 7a48b56

25 files changed

+789
-1
lines changed

.chloggen/add-pdb-support.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
2+
change_type: enhancement
3+
4+
# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
5+
component: operator
6+
7+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
8+
note: "Add PDB support for OpenTelemetryCollector"
9+
10+
# One or more tracking issues related to the change
11+
issues:
12+
- 2136
13+
14+
# (Optional) One or more lines of additional information to render under the primary note.
15+
# These lines will be padded with 2 spaces and then inserted directly into the document.
16+
# Use pipe (|) for multiline entries.
17+
subtext: |
18+
This PR adds support for PodDisruptionBudgets when OpenTelemetryCollector is deployed
19+
as `deployment` or `statefulset`.

.github/workflows/e2e.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
- e2e-prometheuscr
3131
- e2e-autoscale
3232
- e2e-multi-instrumentation
33+
- e2e-pdb
3334

3435
steps:
3536
- name: Set up Go

Makefile

+5
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ e2e-upgrade: undeploy
197197
e2e-autoscale:
198198
$(KUTTL) test --config kuttl-test-autoscale.yaml
199199

200+
# end-to-end-test for testing pdb support
201+
.PHONY: e2e-pdb
202+
e2e-pdb:
203+
$(KUTTL) test --config kuttl-test-pdb.yaml
204+
200205
# end-to-end-test for testing OpenShift cases
201206
.PHONY: e2e-openshift
202207
e2e-openshift:

apis/v1alpha1/opentelemetrycollector_types.go

+23
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
v1 "k8s.io/api/core/v1"
2020
networkingv1 "k8s.io/api/networking/v1"
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
"k8s.io/apimachinery/pkg/util/intstr"
2223
)
2324

2425
// ManagementStateType defines the type for CR management states.
@@ -123,6 +124,11 @@ type OpenTelemetryCollectorSpec struct {
123124
//
124125
// +optional
125126
Autoscaler *AutoscalerSpec `json:"autoscaler,omitempty"`
127+
// PodDisruptionBudget specifies the pod disruption budget configuration to use
128+
// for the OpenTelemetryCollector workload.
129+
//
130+
// +optional
131+
PodDisruptionBudget *PodDisruptionBudgetSpec `json:"podDisruptionBudget,omitempty"`
126132
// SecurityContext configures the container security context for
127133
// the opentelemetry-collector container.
128134
//
@@ -448,6 +454,23 @@ type AutoscalerSpec struct {
448454
TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"`
449455
}
450456

457+
// PodDisruptionBudgetSpec defines the OpenTelemetryCollector's pod disruption budget specification.
458+
type PodDisruptionBudgetSpec struct {
459+
// An eviction is allowed if at least "minAvailable" pods selected by
460+
// "selector" will still be available after the eviction, i.e. even in the
461+
// absence of the evicted pod. So for example you can prevent all voluntary
462+
// evictions by specifying "100%".
463+
// +optional
464+
MinAvailable *intstr.IntOrString `json:"minAvailable,omitempty"`
465+
466+
// An eviction is allowed if at most "maxUnavailable" pods selected by
467+
// "selector" are unavailable after the eviction, i.e. even in absence of
468+
// the evicted pod. For example, one can prevent all voluntary evictions
469+
// by specifying 0. This is a mutually exclusive setting with "minAvailable".
470+
// +optional
471+
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`
472+
}
473+
451474
// MetricsConfigSpec defines a metrics config.
452475
type MetricsConfigSpec struct {
453476
// EnableMetrics specifies if ServiceMonitor should be created for the OpenTelemetry Collector and Prometheus Exporters.

apis/v1alpha1/opentelemetrycollector_webhook.go

+23
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919

2020
autoscalingv2 "k8s.io/api/autoscaling/v2"
2121
"k8s.io/apimachinery/pkg/runtime"
22+
"k8s.io/apimachinery/pkg/util/intstr"
2223
"k8s.io/apimachinery/pkg/util/validation"
2324
ctrl "sigs.k8s.io/controller-runtime"
2425
logf "sigs.k8s.io/controller-runtime/pkg/log"
@@ -91,6 +92,20 @@ func (r *OpenTelemetryCollector) Default() {
9192
r.Spec.Autoscaler.TargetCPUUtilization = &defaultCPUTarget
9293
}
9394
}
95+
96+
// if pod isn't provided, we set MaxUnavailable 1,
97+
// which will work even if there is just one replica,
98+
// not blocking node drains but preventing out-of-the-box
99+
// from disruption generated by them with replicas > 1
100+
if r.Spec.PodDisruptionBudget == nil {
101+
r.Spec.PodDisruptionBudget = &PodDisruptionBudgetSpec{
102+
MaxUnavailable: &intstr.IntOrString{
103+
Type: intstr.Int,
104+
IntVal: 1,
105+
},
106+
}
107+
}
108+
94109
if r.Spec.Ingress.Type == IngressTypeRoute && r.Spec.Ingress.Route.Termination == "" {
95110
r.Spec.Ingress.Route.Termination = TLSRouteTerminationTypeEdge
96111
}
@@ -231,6 +246,14 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
231246
}
232247
}
233248

249+
// validate pod disruption budget
250+
251+
if r.Spec.PodDisruptionBudget != nil {
252+
if r.Spec.PodDisruptionBudget.MaxUnavailable != nil && r.Spec.PodDisruptionBudget.MinAvailable != nil {
253+
return fmt.Errorf("the OpenTelemetry Spec podDisruptionBudget configuration is incorrect, minAvailable and maxUnavailable are mutually exclusive")
254+
}
255+
}
256+
234257
if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {
235258
return fmt.Errorf("the OpenTelemetry Spec Ingress configuiration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
236259
ModeDeployment, ModeDaemonSet, ModeStatefulSet,

apis/v1alpha1/opentelemetrycollector_webhook_test.go

+88
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
v1 "k8s.io/api/core/v1"
2424
"k8s.io/apimachinery/pkg/api/resource"
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
"k8s.io/apimachinery/pkg/util/intstr"
2627
)
2728

2829
func TestOTELColDefaultingWebhook(t *testing.T) {
@@ -48,6 +49,12 @@ func TestOTELColDefaultingWebhook(t *testing.T) {
4849
Mode: ModeDeployment,
4950
Replicas: &one,
5051
UpgradeStrategy: UpgradeStrategyAutomatic,
52+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
53+
MaxUnavailable: &intstr.IntOrString{
54+
Type: intstr.Int,
55+
IntVal: 1,
56+
},
57+
},
5158
},
5259
},
5360
},
@@ -70,6 +77,12 @@ func TestOTELColDefaultingWebhook(t *testing.T) {
7077
Mode: ModeSidecar,
7178
Replicas: &five,
7279
UpgradeStrategy: "adhoc",
80+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
81+
MaxUnavailable: &intstr.IntOrString{
82+
Type: intstr.Int,
83+
IntVal: 1,
84+
},
85+
},
7386
},
7487
},
7588
},
@@ -98,6 +111,12 @@ func TestOTELColDefaultingWebhook(t *testing.T) {
98111
MaxReplicas: &five,
99112
MinReplicas: &one,
100113
},
114+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
115+
MaxUnavailable: &intstr.IntOrString{
116+
Type: intstr.Int,
117+
IntVal: 1,
118+
},
119+
},
101120
},
102121
},
103122
},
@@ -126,6 +145,12 @@ func TestOTELColDefaultingWebhook(t *testing.T) {
126145
MinReplicas: &one,
127146
},
128147
MaxReplicas: &five,
148+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
149+
MaxUnavailable: &intstr.IntOrString{
150+
Type: intstr.Int,
151+
IntVal: 1,
152+
},
153+
},
129154
},
130155
},
131156
},
@@ -155,6 +180,44 @@ func TestOTELColDefaultingWebhook(t *testing.T) {
155180
},
156181
Replicas: &one,
157182
UpgradeStrategy: UpgradeStrategyAutomatic,
183+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
184+
MaxUnavailable: &intstr.IntOrString{
185+
Type: intstr.Int,
186+
IntVal: 1,
187+
},
188+
},
189+
},
190+
},
191+
},
192+
{
193+
name: "Defined PDB",
194+
otelcol: OpenTelemetryCollector{
195+
Spec: OpenTelemetryCollectorSpec{
196+
Mode: ModeDeployment,
197+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
198+
MinAvailable: &intstr.IntOrString{
199+
Type: intstr.String,
200+
StrVal: "10%",
201+
},
202+
},
203+
},
204+
},
205+
expected: OpenTelemetryCollector{
206+
ObjectMeta: metav1.ObjectMeta{
207+
Labels: map[string]string{
208+
"app.kubernetes.io/managed-by": "opentelemetry-operator",
209+
},
210+
},
211+
Spec: OpenTelemetryCollectorSpec{
212+
Mode: ModeDeployment,
213+
Replicas: &one,
214+
UpgradeStrategy: UpgradeStrategyAutomatic,
215+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
216+
MinAvailable: &intstr.IntOrString{
217+
Type: intstr.String,
218+
StrVal: "10%",
219+
},
220+
},
158221
},
159222
},
160223
},
@@ -237,6 +300,12 @@ func TestOTELColValidatingWebhook(t *testing.T) {
237300
},
238301
TargetCPUUtilization: &five,
239302
},
303+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
304+
MinAvailable: &intstr.IntOrString{
305+
Type: intstr.Int,
306+
IntVal: 1,
307+
},
308+
},
240309
},
241310
},
242311
},
@@ -489,6 +558,25 @@ func TestOTELColValidatingWebhook(t *testing.T) {
489558
},
490559
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, invalid pods target type",
491560
},
561+
{
562+
name: "pdb minAvailable and maxUnavailable have been set together",
563+
otelcol: OpenTelemetryCollector{
564+
Spec: OpenTelemetryCollectorSpec{
565+
MaxReplicas: &three,
566+
PodDisruptionBudget: &PodDisruptionBudgetSpec{
567+
MinAvailable: &intstr.IntOrString{
568+
Type: intstr.Int,
569+
IntVal: 1,
570+
},
571+
MaxUnavailable: &intstr.IntOrString{
572+
Type: intstr.Int,
573+
IntVal: 1,
574+
},
575+
},
576+
},
577+
},
578+
expectedErr: "the OpenTelemetry Spec podDisruptionBudget configuration is incorrect, minAvailable and maxUnavailable are mutually exclusive",
579+
},
492580
{
493581
name: "invalid deployment mode incompabible with ingress settings",
494582
otelcol: OpenTelemetryCollector{

apis/v1alpha1/zz_generated.deepcopy.go

+31
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/opentelemetry-operator.clusterserviceversion.yaml

+12
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,18 @@ spec:
237237
- get
238238
- patch
239239
- update
240+
- apiGroups:
241+
- policy
242+
resources:
243+
- poddisruptionbudgets
244+
verbs:
245+
- create
246+
- delete
247+
- get
248+
- list
249+
- patch
250+
- update
251+
- watch
240252
- apiGroups:
241253
- route.openshift.io
242254
resources:

bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml

+21
Original file line numberDiff line numberDiff line change
@@ -3719,6 +3719,27 @@ spec:
37193719
description: PodAnnotations is the set of annotations that will be
37203720
attached to Collector and Target Allocator pods.
37213721
type: object
3722+
podDisruptionBudget:
3723+
description: PodDisruptionBudget specifies the pod disruption budget
3724+
configuration to use for the OpenTelemetryCollector workload.
3725+
properties:
3726+
maxUnavailable:
3727+
anyOf:
3728+
- type: integer
3729+
- type: string
3730+
description: An eviction is allowed if at most "maxUnavailable"
3731+
pods selected by "selector" are unavailable after the eviction,
3732+
i.e. even in absence of the evicted pod.
3733+
x-kubernetes-int-or-string: true
3734+
minAvailable:
3735+
anyOf:
3736+
- type: integer
3737+
- type: string
3738+
description: An eviction is allowed if at least "minAvailable"
3739+
pods selected by "selector" will still be available after the
3740+
eviction, i.e. even in the absence of the evicted pod.
3741+
x-kubernetes-int-or-string: true
3742+
type: object
37223743
podSecurityContext:
37233744
description: PodSecurityContext configures the pod security context
37243745
for the opentelemetry-collector pod, when running as a deployment,

config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml

+21
Original file line numberDiff line numberDiff line change
@@ -3716,6 +3716,27 @@ spec:
37163716
description: PodAnnotations is the set of annotations that will be
37173717
attached to Collector and Target Allocator pods.
37183718
type: object
3719+
podDisruptionBudget:
3720+
description: PodDisruptionBudget specifies the pod disruption budget
3721+
configuration to use for the OpenTelemetryCollector workload.
3722+
properties:
3723+
maxUnavailable:
3724+
anyOf:
3725+
- type: integer
3726+
- type: string
3727+
description: An eviction is allowed if at most "maxUnavailable"
3728+
pods selected by "selector" are unavailable after the eviction,
3729+
i.e. even in absence of the evicted pod.
3730+
x-kubernetes-int-or-string: true
3731+
minAvailable:
3732+
anyOf:
3733+
- type: integer
3734+
- type: string
3735+
description: An eviction is allowed if at least "minAvailable"
3736+
pods selected by "selector" will still be available after the
3737+
eviction, i.e. even in the absence of the evicted pod.
3738+
x-kubernetes-int-or-string: true
3739+
type: object
37193740
podSecurityContext:
37203741
description: PodSecurityContext configures the pod security context
37213742
for the opentelemetry-collector pod, when running as a deployment,

0 commit comments

Comments
 (0)