From 72dd89d095e0c9a6d5e10dfb59c8cebc958b7426 Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Mon, 15 Jun 2026 11:28:33 +0100 Subject: [PATCH] Replace StatefulSets with a customized controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each etcd member maps 1-to-1 to a Pod owned directly by EtcdCluster. All startup configuration (initial cluster state, initial cluster peers, data dir) is embedded in the Pod spec's env vars, not in a shared ConfigMap. A Pod is immutable once created — if a member needs to be replaced, the old Pod (and its PVC) is deleted and a fresh Pod is created. Signed-off-by: Benjamin Wang --- api/v1alpha1/etcdcluster_types.go | 6 +- api/v1alpha1/zz_generated.deepcopy.go | 2 +- .../bases/operator.etcd.io_etcdclusters.yaml | 12 +- config/rbac/role.yaml | 21 +- internal/controller/etcdcluster_controller.go | 354 +++----- .../controller/etcdcluster_controller_test.go | 498 +++-------- internal/controller/utils.go | 833 ++++++++--------- internal/controller/utils_test.go | 844 +++++++----------- 8 files changed, 996 insertions(+), 1574 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index f007ae04..9ea88538 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -143,13 +143,11 @@ type EtcdClusterStatus struct { // +optional ObservedGeneration int64 `json:"observedGeneration,omitempty"` - // CurrentReplicas is the number of etcd pods managed by the StatefulSet for this cluster. - // This reflects the .spec.replicas of the underlying StatefulSet. + // CurrentReplicas is the number of etcd member pods currently owned by this cluster. // +optional CurrentReplicas int32 `json:"currentReplicas,omitempty"` - // ReadyReplicas is the number of etcd pods managed by the StatefulSet that are currently ready. - // This reflects the .status.readyReplicas of the underlying StatefulSet. + // ReadyReplicas is the number of etcd member pods that are currently Ready. // +optional ReadyReplicas int32 `json:"readyReplicas,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 9db6ca12..743093dc 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -23,7 +23,7 @@ package v1alpha1 import ( "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime" netx "net" ) diff --git a/config/crd/bases/operator.etcd.io_etcdclusters.yaml b/config/crd/bases/operator.etcd.io_etcdclusters.yaml index 0ae1e80e..5e1b97f7 100644 --- a/config/crd/bases/operator.etcd.io_etcdclusters.yaml +++ b/config/crd/bases/operator.etcd.io_etcdclusters.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.20.1 + controller-gen.kubebuilder.io/version: v0.21.0 name: etcdclusters.operator.etcd.io spec: group: operator.etcd.io @@ -1256,9 +1256,8 @@ spec: - type x-kubernetes-list-type: map currentReplicas: - description: |- - CurrentReplicas is the number of etcd pods managed by the StatefulSet for this cluster. - This reflects the .spec.replicas of the underlying StatefulSet. + description: CurrentReplicas is the number of etcd member pods currently + owned by this cluster. format: int32 type: integer currentVersion: @@ -1327,9 +1326,8 @@ spec: format: int64 type: integer readyReplicas: - description: |- - ReadyReplicas is the number of etcd pods managed by the StatefulSet that are currently ready. - This reflects the .status.readyReplicas of the underlying StatefulSet. + description: ReadyReplicas is the number of etcd member pods that + are currently Ready. format: int32 type: integer type: object diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 6c9ac65e..f790be21 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,20 +4,6 @@ kind: ClusterRole metadata: name: manager-role rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - services - verbs: - - create - - delete - - get - - list - - patch - - update - - watch - apiGroups: - "" resources: @@ -29,9 +15,12 @@ rules: - patch - update - apiGroups: - - apps + - "" resources: - - statefulsets + - persistentvolumeclaims + - pods + - secrets + - services verbs: - create - delete diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 867886a3..795e8d5c 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -23,7 +23,6 @@ import ( "time" certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -53,11 +52,9 @@ type EtcdClusterReconciler struct { } // reconcileState holds all transient data for a single reconciliation loop. -// Every phase of Reconcile stores intermediate information here so that -// subsequent phases can operate without additional lookups. type reconcileState struct { - cluster *ecv1alpha1.EtcdCluster // cluster custom resource currently being reconciled - sts *appsv1.StatefulSet // associated StatefulSet for the cluster + cluster *ecv1alpha1.EtcdCluster // cluster CR being reconciled + pods []*corev1.Pod // member pods owned by this cluster, sorted by ordinal memberListResp *clientv3.MemberListResponse // member list fetched from the etcd cluster memberHealth []etcdutils.EpHealth // health information for each etcd member } @@ -65,32 +62,24 @@ type reconcileState struct { // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters/finalizers,verbs=update -// +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch;get;list;update // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;patch;update;delete // +kubebuilder:rbac:groups="cert-manager.io",resources=certificates,verbs=get;list;watch;create;patch;update;delete // +kubebuilder:rbac:groups="cert-manager.io",resources=clusterissuers,verbs=get;list;watch // +kubebuilder:rbac:groups="cert-manager.io",resources=issuers,verbs=get;list;watch -// Reconcile orchestrates a single reconciliation cycle for an EtcdCluster. It -// sequentially fetches resources, ensures primitive objects exist, checks the -// health of the etcd cluster and then adjusts its state to match the desired -// specification. Each phase is handled by a dedicated helper method. -// -// For more details on the controller-runtime Reconcile contract see: -// https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/reconcile +// Reconcile orchestrates a single reconciliation cycle for an EtcdCluster. func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { var state *reconcileState var res ctrl.Result var err error - // Defer status update to ensure it's called regardless of return path defer func() { if state != nil { if statusErr := r.updateStatus(ctx, state); statusErr != nil { - // Log but don't override the main reconciliation error log.FromContext(ctx).Error(statusErr, "Failed to update status") } } @@ -101,7 +90,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return res, err } - if res, err = r.bootstrapStatefulSet(ctx, state); err != nil || !res.IsZero() { + if res, err = r.bootstrapCluster(ctx, state); err != nil || !res.IsZero() { return res, err } @@ -112,10 +101,9 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return r.reconcileClusterState(ctx, state) } -// fetchAndValidateState retrieves the EtcdCluster and its StatefulSet and ensures -// the StatefulSet, if present, is owned by the cluster. It returns a populated -// reconcileState for use in later phases. A non-empty ctrl.Result requests a -// requeue when transient issues occur. +// fetchAndValidateState retrieves the EtcdCluster and lists the Pods it owns. +// It also validates the upgrade path when the desired version differs from the +// version currently running in the first pod. func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req ctrl.Request) (*reconcileState, ctrl.Result, error) { logger := log.FromContext(ctx) @@ -128,18 +116,15 @@ func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req c return nil, ctrl.Result{}, err } - // Determine desired etcd image registry if ec.Spec.ImageRegistry == "" { ec.Spec.ImageRegistry = r.ImageRegistry } - // Ensure the operator has TLS credentials when the cluster requests TLS. if ec.Spec.TLS != nil { if err := createClientCertificate(ctx, ec, r.Client); err != nil { logger.Error(err, "Failed to create Client Certificate.") } } else { - // TODO: instead of logging error, set default autoConfig logger.Error(nil, fmt.Sprintf( "missing TLS config for %s,\n running etcd-cluster without TLS protection is NOT recommended for production.", ec.Name, @@ -148,55 +133,38 @@ func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req c logger.Info("Reconciling EtcdCluster", "spec", ec.Spec) - sts, err := getStatefulSet(ctx, r.Client, ec.Name, ec.Namespace) + pods, err := listOwnedPods(ctx, r.Client, ec) if err != nil { - if errors.IsNotFound(err) { - sts = nil - } else { - logger.Error(err, "Failed to get StatefulSet. Requesting requeue") - return nil, ctrl.Result{RequeueAfter: requeueDuration}, nil - } + logger.Error(err, "Failed to list pods. Requesting requeue") + return nil, ctrl.Result{RequeueAfter: requeueDuration}, nil } - if sts != nil { - if err := checkStatefulSetControlledByEtcdOperator(ec, sts); err != nil { - logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") - return nil, ctrl.Result{}, err - } - - // If the version to be reconciled is unsupported, throw an error. - if len(sts.Spec.Template.Spec.Containers) == 0 { - logger.Error(err, "StatefulSet has no containers yet") - return &reconcileState{cluster: ec, sts: sts}, ctrl.Result{}, nil - } - stsImage := sts.Spec.Template.Spec.Containers[0].Image - // Note: createOrPatchStatefulSet() only supports the "registry-path:image" format. - // TODO: switch to using the version from ec.Status eventually. - // https://github.com/etcd-io/etcd-operator/pull/278/changes#r2764796805 - idx := strings.Index(stsImage, ":") - if idx == -1 { - logger.Error(err, "could not extract image version from StatefulSet image", - "image", stsImage) - return &reconcileState{cluster: ec, sts: sts}, ctrl.Result{}, nil - } - currentVersion := stsImage[idx+1:] - targetVersion := ec.Spec.Version - - // Only handle cases when there is a version change. - if currentVersion != targetVersion { - // TODO: consider adding an option in the CRD called allowCustomImageUpgrade to make - // the behavior here optional: - // https://github.com/etcd-io/etcd-operator/pull/278/changes#r2764717418 - canParse, err := validateEtcdUpgradePath(etcdversions.AllVersions, currentVersion, targetVersion) - if !canParse { - logger.Info("error when parsing reconcile versions; it is your responsibility "+ - "to validate if the upgrade path is supported", - "current", currentVersion, - "target", targetVersion, - "error", err, - ) - return &reconcileState{cluster: ec, sts: sts}, ctrl.Result{}, nil - } else { + // Validate the upgrade path using the image tag of the first pod. + if len(pods) > 0 { + for _, c := range pods[0].Spec.Containers { + if c.Name != "etcd" { + continue + } + idx := strings.Index(c.Image, ":") + if idx == -1 { + logger.Info("could not extract image version from pod image", + "image", c.Image) + return &reconcileState{cluster: ec, pods: pods}, ctrl.Result{}, nil + } + currentVersion := c.Image[idx+1:] + targetVersion := ec.Spec.Version + + if currentVersion != targetVersion { + canParse, err := validateEtcdUpgradePath(etcdversions.AllVersions, currentVersion, targetVersion) + if !canParse { + logger.Info("error when parsing reconcile versions; it is your responsibility "+ + "to validate if the upgrade path is supported", + "current", currentVersion, + "target", targetVersion, + "error", err, + ) + return &reconcileState{cluster: ec, pods: pods}, ctrl.Result{}, nil + } if err != nil { logger.Error(err, "unsupported upgrade path between current and target versions", "current", currentVersion, @@ -208,59 +176,42 @@ func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req c "current", currentVersion, "target", targetVersion) } + break } } - return &reconcileState{cluster: ec, sts: sts}, ctrl.Result{}, nil + return &reconcileState{cluster: ec, pods: pods}, ctrl.Result{}, nil } -// bootstrapStatefulSet ensures that the foundational Kubernetes objects for -// a cluster exist and are correctly initialized. It creates the StatefulSet (initially -// with 0 replicas) and the headless Service if necessary. When either resource -// is created or the StatefulSet is scaled from zero to one replica, the returned -// ctrl.Result requests a requeue so the next reconciliation loop can observe the -// new state. The reconcileState is updated with the current StatefulSet. -func (r *EtcdClusterReconciler) bootstrapStatefulSet(ctx context.Context, s *reconcileState) (ctrl.Result, error) { +// bootstrapCluster ensures the headless Service exists and, when no pods are +// present, creates the first member pod (ordinal 0) to bootstrap a new cluster. +// A non-zero ctrl.Result requests a requeue so the next loop observes the new pod. +func (r *EtcdClusterReconciler) bootstrapCluster(ctx context.Context, s *reconcileState) (ctrl.Result, error) { logger := log.FromContext(ctx) - requeue := false - var err error - - switch { - case s.sts == nil: - logger.Info("Creating StatefulSet with 0 replica", "expectedSize", s.cluster.Spec.Size) - s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, 0, r.Scheme) - if err != nil { - return ctrl.Result{}, err - } - requeue = true - case s.sts.Spec.Replicas != nil && *s.sts.Spec.Replicas == 0: - logger.Info("StatefulSet has 0 replicas. Trying to create a new cluster with 1 member") - s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, 1, r.Scheme) - if err != nil { - return ctrl.Result{}, err - } - requeue = true + // Service must exist before pods start so that headless DNS resolves. + if err := createHeadlessServiceIfNotExist(ctx, logger, r.Client, s.cluster, r.Scheme); err != nil { + return ctrl.Result{}, err } - if err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, s.cluster, r.Scheme); err != nil { - return ctrl.Result{}, err + if len(s.pods) > 0 { + return ctrl.Result{}, nil } - if requeue { - return ctrl.Result{RequeueAfter: requeueDuration}, nil + logger.Info("No member pods found, creating first member pod", "expectedSize", s.cluster.Spec.Size) + if err := createMemberPod(ctx, logger, r.Client, s.cluster, 0, r.Scheme); err != nil { + return ctrl.Result{}, err } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: requeueDuration}, nil } // performHealthChecks obtains the member list and health status from the etcd -// cluster specified in the StatefulSet. Results are stored on the reconcileState -// for later reconciliation steps. +// cluster. Results are stored on reconcileState for later reconciliation steps. func (r *EtcdClusterReconciler) performHealthChecks(ctx context.Context, s *reconcileState) error { logger := log.FromContext(ctx) logger.Info("Now checking health of the cluster members") var err error - s.memberListResp, s.memberHealth, err = healthCheck(s.sts, logger) + s.memberListResp, s.memberHealth, err = healthCheck(s.cluster.Name, s.cluster.Namespace, s.pods, logger) if err != nil { return fmt.Errorf("health check failed: %w", err) } @@ -268,33 +219,34 @@ func (r *EtcdClusterReconciler) performHealthChecks(ctx context.Context, s *reco } // reconcileClusterState compares the desired cluster size with the observed -// etcd member list and StatefulSet replica count. It performs scaling actions -// and handles learner promotion when needed. A ctrl.Result with a requeue -// instructs the controller to retry after adjustments. +// etcd member list and pod count. It performs scaling and learner promotion. func (r *EtcdClusterReconciler) reconcileClusterState(ctx context.Context, s *reconcileState) (ctrl.Result, error) { logger := log.FromContext(ctx) + memberCnt := 0 if s.memberListResp != nil { memberCnt = len(s.memberListResp.Members) } - targetReplica := *s.sts.Spec.Replicas - var err error - - // The number of replicas in the StatefulSet doesn't match the number of etcd members in the cluster. - if int(targetReplica) != memberCnt { - logger.Info("The expected number of replicas doesn't match the number of etcd members in the cluster", "targetReplica", targetReplica, "memberCnt", memberCnt) - if int(targetReplica) < memberCnt { - logger.Info("An etcd member was added into the cluster, but the StatefulSet hasn't scaled out yet") - newReplicaCount := targetReplica + 1 - logger.Info("Increasing StatefulSet replicas to match the etcd cluster member count", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - if _, err := reconcileStatefulSet(ctx, logger, s.cluster, r.Client, newReplicaCount, r.Scheme); err != nil { + currentPodCount := int32(len(s.pods)) + + // Reconcile any discrepancy between pod count and etcd member count. + // This can occur when a previous reconcile was interrupted between the + // etcd member API call and the Pod create/delete. + if int(currentPodCount) != memberCnt { + logger.Info("Pod count and etcd member count differ", + "podCount", currentPodCount, "memberCnt", memberCnt) + if int(currentPodCount) < memberCnt { + // A member was added to etcd but the pod was not yet created. + logger.Info("Creating pod for already-registered etcd member") + nextOrdinal := int(currentPodCount) + if err := createMemberPod(ctx, logger, r.Client, s.cluster, nextOrdinal, r.Scheme); err != nil { return ctrl.Result{}, err } } else { - logger.Info("An etcd member was removed from the cluster, but the StatefulSet hasn't scaled in yet") - newReplicaCount := targetReplica - 1 - logger.Info("Decreasing StatefulSet replicas to remove the unneeded Pod.", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - if _, err := reconcileStatefulSet(ctx, logger, s.cluster, r.Client, newReplicaCount, r.Scheme); err != nil { + // A member was removed from etcd but the pod was not yet deleted. + logger.Info("Deleting pod for already-removed etcd member") + podToRemove := s.pods[len(s.pods)-1] + if err := r.Delete(ctx, podToRemove); err != nil { return ctrl.Result{}, err } } @@ -308,93 +260,75 @@ func (r *EtcdClusterReconciler) reconcileClusterState(ctx context.Context, s *re ) if memberCnt > 0 { - // Find the leader status _, leaderStatus = etcdutils.FindLeaderStatus(s.memberHealth, logger) if leaderStatus == nil { - // If the leader is not available, wait for the leader to be elected return ctrl.Result{}, fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) } learner, learnerStatus = etcdutils.FindLearnerStatus(s.memberHealth, logger) if learner > 0 { - // There is at least one learner. Try to promote it if it's ready; otherwise requeue and wait. - logger.Info("Learner found", "learnedID", learner, "learnerStatus", learnerStatus) + logger.Info("Learner found", "learnerID", learner, "learnerStatus", learnerStatus) if etcdutils.IsLearnerReady(leaderStatus, learnerStatus) { logger.Info("Learner is ready to be promoted to voting member", "learnerID", learner) - logger.Info("Promoting the learner member", "learnerID", learner) - eps := clientEndpointsFromStatefulsets(s.sts) + eps := clientEndpointsFromPods(s.cluster.Name, s.cluster.Namespace, s.pods) + // Exclude the learner (last ordinal) from the endpoint list used for promotion. eps = eps[:(len(eps) - 1)] if err := etcdutils.PromoteLearner(eps, learner); err != nil { - // The member is not promoted yet, so we error out and requeue via the caller. return ctrl.Result{}, err } } else { - // Learner is not yet ready. We can't add another learner or proceed further until this one is promoted. logger.Info("The learner member isn't ready to be promoted yet", "learnerID", learner) return ctrl.Result{RequeueAfter: requeueDuration}, nil } } } - if targetReplica == int32(s.cluster.Spec.Size) { + if currentPodCount == int32(s.cluster.Spec.Size) { logger.Info("EtcdCluster is already up-to-date") return ctrl.Result{}, nil } - eps := clientEndpointsFromStatefulsets(s.sts) + eps := clientEndpointsFromPods(s.cluster.Name, s.cluster.Namespace, s.pods) - // If there are no learners left, we can proceed to scale the cluster towards the desired size. - // When there are no members to add, the controller will requeue above and this block won't execute. - if targetReplica < int32(s.cluster.Spec.Size) { - // scale out - _, peerURL := peerEndpointForOrdinalIndex(s.cluster, int(targetReplica)) - targetReplica++ - logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURLs", peerURL) + if currentPodCount < int32(s.cluster.Spec.Size) { + // Scale out: add a new learner member to etcd, then create its pod. + nextOrdinal := int(currentPodCount) + _, peerURL := peerEndpointForOrdinalIndex(s.cluster, nextOrdinal) + logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURL", peerURL) if _, err := etcdutils.AddMember(eps, []string{peerURL}, true); err != nil { return ctrl.Result{}, err } - - logger.Info("Learner member added successfully", "peerURLs", peerURL) + logger.Info("Learner member added successfully", "peerURL", peerURL) // gofail: var exceptionAfterMemberAdd struct{} - if s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, targetReplica, r.Scheme); err != nil { + if err := createMemberPod(ctx, logger, r.Client, s.cluster, nextOrdinal, r.Scheme); err != nil { return ctrl.Result{}, err } - return ctrl.Result{RequeueAfter: requeueDuration}, nil } - if targetReplica > int32(s.cluster.Spec.Size) { - // scale in - targetReplica-- - logger = logger.WithValues("targetReplica", targetReplica, "expectedSize", s.cluster.Spec.Size) - + if currentPodCount > int32(s.cluster.Spec.Size) { + // Scale in: remove the last member from etcd, then delete its pod. + podToRemove := s.pods[len(s.pods)-1] memberID := s.memberHealth[memberCnt-1].Status.Header.MemberId + logger.Info("[Scale in] removing one member", "memberID", memberID, "pod", podToRemove.Name) - logger.Info("[Scale in] removing one member", "memberID", memberID) - eps = eps[:targetReplica] - if err := etcdutils.RemoveMember(eps, memberID); err != nil { + epsForRemoval := eps[:len(eps)-1] + if err := etcdutils.RemoveMember(epsForRemoval, memberID); err != nil { return ctrl.Result{}, err } // gofail: var exceptionAfterMemberDelete struct{} - if s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, targetReplica, r.Scheme); err != nil { + if err := r.Delete(ctx, podToRemove); err != nil { return ctrl.Result{}, err } - return ctrl.Result{RequeueAfter: requeueDuration}, nil } - // Ensure every etcd member reports itself healthy before declaring success. - allMembersHealthy, err := areAllMembersHealthy(s.sts, logger) - if err != nil { - return ctrl.Result{}, err - } - - if !allMembersHealthy { - // Requeue until the StatefulSet settles and all members are healthy. + // Ensure every member is healthy before declaring success. + if !areAllMembersHealthy(s.memberHealth) { return ctrl.Result{RequeueAfter: requeueDuration}, nil } @@ -402,35 +336,32 @@ func (r *EtcdClusterReconciler) reconcileClusterState(ctx context.Context, s *re return ctrl.Result{}, nil } -// updateStatus updates the EtcdCluster status based on observed state. -// It is called at the end of each reconciliation cycle. +// updateStatus reflects the current observed state onto EtcdCluster.Status. func (r *EtcdClusterReconciler) updateStatus(ctx context.Context, s *reconcileState) error { logger := log.FromContext(ctx) - // Update ObservedGeneration s.cluster.Status.ObservedGeneration = s.cluster.Generation - // Update replica counts from StatefulSet - if s.sts != nil { - if s.sts.Spec.Replicas != nil { - s.cluster.Status.CurrentReplicas = *s.sts.Spec.Replicas + // Pod counts. + s.cluster.Status.CurrentReplicas = int32(len(s.pods)) + readyCount := int32(0) + for _, pod := range s.pods { + if isPodReady(pod) { + readyCount++ } - s.cluster.Status.ReadyReplicas = s.sts.Status.ReadyReplicas } + s.cluster.Status.ReadyReplicas = readyCount - // Update member count from etcd cluster + // etcd membership. if s.memberListResp != nil { s.cluster.Status.MemberCount = int32(len(s.memberListResp.Members)) - // Update individual member statuses s.cluster.Status.Members = make([]ecv1alpha1.MemberStatus, 0, len(s.memberListResp.Members)) for i, member := range s.memberListResp.Members { memberStatus := ecv1alpha1.MemberStatus{ ID: fmt.Sprintf("%x", member.ID), Name: member.Name, } - - // Find health info for this member if i < len(s.memberHealth) { health := s.memberHealth[i] memberStatus.IsHealthy = health.Health @@ -439,18 +370,15 @@ func (r *EtcdClusterReconciler) updateStatus(ctx context.Context, s *reconcileSt memberStatus.IsLeader = health.Status.Header.MemberId == health.Status.Leader } } - memberStatus.IsLearner = member.IsLearner s.cluster.Status.Members = append(s.cluster.Status.Members, memberStatus) } - // Update leader ID _, leaderStatus := etcdutils.FindLeaderStatus(s.memberHealth, logger) if leaderStatus != nil { s.cluster.Status.LeaderID = fmt.Sprintf("%x", leaderStatus.Leader) } - // Update current version from leader or first healthy member if leaderStatus != nil { s.cluster.Status.CurrentVersion = leaderStatus.Version } else if len(s.memberHealth) > 0 && s.memberHealth[0].Status != nil { @@ -458,23 +386,19 @@ func (r *EtcdClusterReconciler) updateStatus(ctx context.Context, s *reconcileSt } } - // Update conditions r.updateConditions(s) - // Persist status update if err := r.Status().Update(ctx, s.cluster); err != nil { logger.Error(err, "Failed to update EtcdCluster status") return err } - return nil } -// updateConditions sets the standard Kubernetes conditions based on observed state +// updateConditions sets the standard Kubernetes conditions based on observed state. func (r *EtcdClusterReconciler) updateConditions(s *reconcileState) { now := metav1.Now() - // Determine if cluster is available (has quorum and healthy members) availableCondition := metav1.Condition{ Type: "Available", Status: metav1.ConditionFalse, @@ -491,18 +415,19 @@ func (r *EtcdClusterReconciler) updateConditions(s *reconcileState) { healthyCount++ } } - quorum := (len(s.memberListResp.Members) / 2) + 1 if healthyCount >= quorum { availableCondition.Status = metav1.ConditionTrue availableCondition.Reason = "ClusterAvailable" - availableCondition.Message = fmt.Sprintf("Etcd cluster has %d/%d healthy members with quorum", healthyCount, len(s.memberListResp.Members)) + availableCondition.Message = fmt.Sprintf("Etcd cluster has %d/%d healthy members with quorum", + healthyCount, len(s.memberListResp.Members)) } else { - availableCondition.Message = fmt.Sprintf("Etcd cluster has %d/%d healthy members, quorum requires %d", healthyCount, len(s.memberListResp.Members), quorum) + availableCondition.Message = fmt.Sprintf( + "Etcd cluster has %d/%d healthy members, quorum requires %d", + healthyCount, len(s.memberListResp.Members), quorum) } } - // Determine if cluster is progressing (scaling or upgrading) progressingCondition := metav1.Condition{ Type: "Progressing", Status: metav1.ConditionFalse, @@ -512,34 +437,31 @@ func (r *EtcdClusterReconciler) updateConditions(s *reconcileState) { Message: "Etcd cluster is stable", } - if s.sts != nil && s.sts.Spec.Replicas != nil { - currentReplicas := *s.sts.Spec.Replicas - desiredSize := int32(s.cluster.Spec.Size) - - if currentReplicas != desiredSize { - progressingCondition.Status = metav1.ConditionTrue - progressingCondition.Reason = "ScalingInProgress" - progressingCondition.Message = fmt.Sprintf("Scaling from %d to %d replicas", currentReplicas, desiredSize) - } else if s.memberListResp != nil && int32(len(s.memberListResp.Members)) != desiredSize { - progressingCondition.Status = metav1.ConditionTrue - progressingCondition.Reason = "MembershipChanging" - progressingCondition.Message = fmt.Sprintf("Etcd membership changing: %d members, target %d", len(s.memberListResp.Members), desiredSize) - } + currentPodCount := int32(len(s.pods)) + desiredSize := int32(s.cluster.Spec.Size) + + if currentPodCount != desiredSize { + progressingCondition.Status = metav1.ConditionTrue + progressingCondition.Reason = "ScalingInProgress" + progressingCondition.Message = fmt.Sprintf("Scaling from %d to %d pods", currentPodCount, desiredSize) + } else if s.memberListResp != nil && int32(len(s.memberListResp.Members)) != desiredSize { + progressingCondition.Status = metav1.ConditionTrue + progressingCondition.Reason = "MembershipChanging" + progressingCondition.Message = fmt.Sprintf("Etcd membership changing: %d members, target %d", + len(s.memberListResp.Members), desiredSize) + } - // Check for learners (indicates scaling in progress) - if s.memberListResp != nil { - for _, member := range s.memberListResp.Members { - if member.IsLearner { - progressingCondition.Status = metav1.ConditionTrue - progressingCondition.Reason = "LearnerPromotion" - progressingCondition.Message = "Waiting for learner member to be promoted" - break - } + if s.memberListResp != nil { + for _, member := range s.memberListResp.Members { + if member.IsLearner { + progressingCondition.Status = metav1.ConditionTrue + progressingCondition.Reason = "LearnerPromotion" + progressingCondition.Message = "Waiting for learner member to be promoted" + break } } } - // Determine if cluster is degraded degradedCondition := metav1.Condition{ Type: "Degraded", Status: metav1.ConditionFalse, @@ -550,13 +472,12 @@ func (r *EtcdClusterReconciler) updateConditions(s *reconcileState) { } if s.memberListResp != nil && len(s.memberHealth) > 0 { - unhealthyMembers := []string{} + var unhealthyMembers []string for _, health := range s.memberHealth { if !health.Health && health.Status != nil { unhealthyMembers = append(unhealthyMembers, fmt.Sprintf("%x", health.Status.Header.MemberId)) } } - if len(unhealthyMembers) > 0 { degradedCondition.Status = metav1.ConditionTrue degradedCondition.Reason = "UnhealthyMembers" @@ -564,13 +485,12 @@ func (r *EtcdClusterReconciler) updateConditions(s *reconcileState) { } } - // Update or append conditions meta.SetStatusCondition(&s.cluster.Status.Conditions, availableCondition) meta.SetStatusCondition(&s.cluster.Status.Conditions, progressingCondition) meta.SetStatusCondition(&s.cluster.Status.Conditions, degradedCondition) } -// isCertManagerCRDPresent checks if cert-manager CRDs are installed in the cluster +// isCertManagerCRDPresent checks if cert-manager CRDs are installed in the cluster. func isCertManagerCRDPresent(mgr ctrl.Manager) bool { gvk := certv1.SchemeGroupVersion.WithKind("Certificate") _, err := mgr.GetRESTMapper().RESTMapping(gvk.GroupKind(), gvk.Version) @@ -584,18 +504,14 @@ func (r *EtcdClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { builder := ctrl.NewControllerManagedBy(mgr). For(&ecv1alpha1.EtcdCluster{}). - Owns(&appsv1.StatefulSet{}). - Owns(&corev1.Service{}). - Owns(&corev1.ConfigMap{}) + Owns(&corev1.Pod{}). + Owns(&corev1.PersistentVolumeClaim{}). + Owns(&corev1.Service{}) - // Conditionally watch cert-manager Certificate resources if CRDs are installed - // This allows the controller to react to Certificate status changes when using cert-manager provider if isCertManagerCRDPresent(mgr) { - // cert-manager CRDs are installed, add Certificate watch builder = builder.Owns(&certv1.Certificate{}) setupLog.Info("cert-manager CRDs detected, enabling Certificate watches") } else { - // cert-manager CRDs not installed, skip Certificate watch setupLog.Info("cert-manager CRDs not detected, only auto provider will be available. Restart the controller after cert-manager CRDs are installed") } diff --git a/internal/controller/etcdcluster_controller_test.go b/internal/controller/etcdcluster_controller_test.go index 892e321a..a025b8a7 100644 --- a/internal/controller/etcdcluster_controller_test.go +++ b/internal/controller/etcdcluster_controller_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -33,186 +32,127 @@ import ( ecv1alpha1 "go.etcd.io/etcd-operator/api/v1alpha1" ) -// TestFetchAndValidateState describes the scenarios for the fetchAndValidateState -// helper. Each sub-test will set up a fake client with different existing -// resources and assert on the returned state, result and error. +// TestFetchAndValidateState verifies the fetchAndValidateState helper across +// a range of conditions (missing cluster, no pods, pods owned by this cluster, +// pods owned by a different cluster, and version-upgrade validation). func TestFetchAndValidateState(t *testing.T) { scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) + // helper to build a minimal owned pod with a specific etcd image tag. + ownedPod := func(clusterName, namespace, uid, imageTag string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName + "-0", + Namespace: namespace, + Labels: map[string]string{ + "app": clusterName, + "controller": clusterName, + }, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: ecv1alpha1.GroupVersion.String(), + Kind: "EtcdCluster", + Name: clusterName, + UID: types.UID(uid), + Controller: pointerToBool(true), + }}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "etcd", Image: "gcr.io/etcd-development/etcd:" + imageTag}, + }, + }, + } + } + cases := []struct { name string req ctrl.Request ec *ecv1alpha1.EtcdCluster - sts *appsv1.StatefulSet - assert func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) + pods []*corev1.Pod + assert func(t *testing.T, state *reconcileState, res ctrl.Result, err error) }{ { name: "EtcdCluster Not Found", req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, _ *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { assert.Nil(t, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) }, }, { - name: "StatefulSet Not Found", + name: "No Pods Found", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "1", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "1"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, }, req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { require.NotNil(t, state) - assert.Equal(t, ec.Name, state.cluster.Name) - assert.Nil(t, state.sts) + assert.Equal(t, "etcd", state.cluster.Name) + assert.Empty(t, state.pods) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) }, }, { - name: "Resources Exist and Owned", + name: "Pod Exists and Owned", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "3.5.17")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { require.NotNil(t, state) - assert.Equal(t, ec.Name, state.cluster.Name) - require.NotNil(t, state.sts) - assert.Equal(t, sts.Name, state.sts.Name) + assert.Equal(t, "etcd", state.cluster.Name) + assert.Len(t, state.pods, 1) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) }, }, { - name: "StatefulSet Not Owned", + name: "Pod Not Owned By This Cluster Is Ignored", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "3", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "3"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - }, - }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, _ *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { - assert.Nil(t, state) - assert.Error(t, err) - assert.Contains(t, err.Error(), "not controlled") + // Pod has different UID owner → filtered out by listOwnedPods. + pods: []*corev1.Pod{ownedPod("etcd", "default", "other-uid", "3.5.17")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { + require.NotNil(t, state) + assert.Empty(t, state.pods) + assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) }, }, { name: "Valid upgrade path", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.6.17"}, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.6.17"}, }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd:3.5.17"}, - }, - }, - }, - }, - }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "3.5.17")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { require.NotNil(t, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) }, }, { - name: "Cannot parse StatefulSet image tag", + name: "Cannot parse pod image tag", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.6.17"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd#notag"}, - }, - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.6.17"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { + // Image has no ":" so version can't be extracted; state is returned but no error. require.NotNil(t, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) @@ -221,40 +161,13 @@ func TestFetchAndValidateState(t *testing.T) { { name: "Invalid upgrade path", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.7.1"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd:3.5.17"}, - }, - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.7.1"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { - require.Nil(t, state) + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "3.5.17")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { + assert.Nil(t, state) assert.Error(t, err) assert.Equal(t, ctrl.Result{}, res) }, @@ -262,40 +175,13 @@ func TestFetchAndValidateState(t *testing.T) { { name: "Downgrades are unsupported", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.1"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd:3.6.10"}, - }, - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.1"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { - require.Nil(t, state) + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "3.6.10")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { + assert.Nil(t, state) assert.Error(t, err) assert.Equal(t, ctrl.Result{}, res) }, @@ -303,39 +189,12 @@ func TestFetchAndValidateState(t *testing.T) { { name: "Upgrade with non-semver versions", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "foo"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd:bar"}, - }, - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "foo"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "bar")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { require.NotNil(t, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) @@ -344,39 +203,12 @@ func TestFetchAndValidateState(t *testing.T) { { name: "Equal tags are a no-op even if they are not semver", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - UID: "2", - }, - Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "bar"}, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: ecv1alpha1.GroupVersion.String(), - Kind: "EtcdCluster", - Name: "etcd", - UID: "2", - Controller: pointerToBool(true), - }, - }, - }, - Spec: appsv1.StatefulSetSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - {Image: "gcr.io/etcd-development/etcd:bar"}, - }, - }, - }, - }, + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "default", UID: "2"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "bar"}, }, - req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, - assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + pods: []*corev1.Pod{ownedPod("etcd", "default", "2", "bar")}, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error) { require.NotNil(t, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) @@ -392,8 +224,8 @@ func TestFetchAndValidateState(t *testing.T) { if tc.ec != nil { objs = append(objs, tc.ec) } - if tc.sts != nil { - objs = append(objs, tc.sts) + for _, pod := range tc.pods { + objs = append(objs, pod) } builder := fake.NewClientBuilder().WithScheme(scheme) @@ -401,21 +233,20 @@ func TestFetchAndValidateState(t *testing.T) { builder.WithObjects(objs...) } fakeClient := builder.Build() - r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} state, res, err := r.fetchAndValidateState(ctx, tc.req) - tc.assert(t, state, res, err, tc.ec, tc.sts) + tc.assert(t, state, res, err) }) } } -// TestBootstrapStatefulSet outlines tests for ensuring StatefulSet and Service -// creation and bootstrap logic. -func TestBootstrapStatefulSet(t *testing.T) { +// TestBootstrapCluster verifies the bootstrapCluster helper creates the first +// member pod and the headless Service when none exist, and is a no-op when the +// cluster is already bootstrapped. +func TestBootstrapCluster(t *testing.T) { scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) ec := &ecv1alpha1.EtcdCluster{ @@ -425,48 +256,52 @@ func TestBootstrapStatefulSet(t *testing.T) { UID: "1", }, Spec: ecv1alpha1.EtcdClusterSpec{ - Size: 1, + Size: 3, Version: "3.5.17", }, } - t.Run("Initial Creation", func(t *testing.T) { + t.Run("Initial Creation — no pods exist", func(t *testing.T) { ctx := t.Context() - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} state := &reconcileState{cluster: ec} - res, err := r.bootstrapStatefulSet(ctx, state) + res, err := r.bootstrapCluster(ctx, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{RequeueAfter: requeueDuration}, res) - require.NotNil(t, state.sts) - assert.NotNil(t, state.sts.Spec.Replicas) - assert.Equal(t, int32(0), *state.sts.Spec.Replicas) - - sts := &appsv1.StatefulSet{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, sts) - assert.NoError(t, err) - assert.NotNil(t, sts.Spec.Replicas) - assert.Equal(t, int32(0), *sts.Spec.Replicas) + // Headless Service should be created. svc := &corev1.Service{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc) - assert.NoError(t, err) + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc)) assert.Equal(t, "None", svc.Spec.ClusterIP) - cm := &corev1.ConfigMap{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, cm) - assert.NoError(t, err) + // Pod-0 should be created. + pod := &corev1.Pod{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: "etcd-0", Namespace: ec.Namespace}, pod)) + + // Verify per-pod env vars contain the expected etcd bootstrap config. + envMap := make(map[string]string) + for _, e := range pod.Spec.Containers[0].Env { + envMap[e.Name] = e.Value + } + assert.Equal(t, string(etcdClusterStateNew), envMap["ETCD_INITIAL_CLUSTER_STATE"]) + assert.Contains(t, envMap["ETCD_INITIAL_CLUSTER"], "etcd-0=") + assert.Equal(t, etcdDataDir, envMap["ETCD_DATA_DIR"]) + + // Pod must be owned by the EtcdCluster. + require.Len(t, pod.OwnerReferences, 1) + assert.Equal(t, ec.Name, pod.OwnerReferences[0].Name) }) - t.Run("Bootstrap from Zero", func(t *testing.T) { + t.Run("Already Bootstrapped — pods exist", func(t *testing.T) { ctx := t.Context() - sts := &appsv1.StatefulSet{ + pod0 := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: ec.Name, + Name: "etcd-0", Namespace: ec.Namespace, + Labels: etcdClusterLabels(ec), OwnerReferences: []metav1.OwnerReference{{ APIVersion: ecv1alpha1.GroupVersion.String(), Kind: "EtcdCluster", @@ -475,52 +310,34 @@ func TestBootstrapStatefulSet(t *testing.T) { Controller: pointerToBool(true), }}, }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(0), - }, - Status: appsv1.StatefulSetStatus{ReadyReplicas: 1}, + } + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: ec.Name, Namespace: ec.Namespace}, + Spec: corev1.ServiceSpec{ClusterIP: "None"}, } - cm := newEtcdClusterState(ec, 0) - - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, sts, cm).Build() + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, pod0, svc).Build() r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} - state := &reconcileState{cluster: ec, sts: sts} + state := &reconcileState{cluster: ec, pods: []*corev1.Pod{pod0}} - oldRV := sts.ResourceVersion - res, err := r.bootstrapStatefulSet(ctx, state) + res, err := r.bootstrapCluster(ctx, state) assert.NoError(t, err) - assert.Equal(t, ctrl.Result{RequeueAfter: requeueDuration}, res) + assert.Equal(t, ctrl.Result{}, res) // no requeue - updatedSTS := &appsv1.StatefulSet{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, updatedSTS) - assert.NoError(t, err) - assert.Equal(t, int32(1), *updatedSTS.Spec.Replicas) - assert.Equal(t, int32(1), updatedSTS.Status.ReadyReplicas) - assert.NotEqual(t, oldRV, updatedSTS.ResourceVersion) - - require.NotNil(t, state.sts) - assert.Equal(t, int32(1), *state.sts.Spec.Replicas) - - svc := &corev1.Service{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc) - assert.NoError(t, err) - assert.Equal(t, "None", svc.Spec.ClusterIP) - - cmUpdated := &corev1.ConfigMap{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, cmUpdated) - assert.NoError(t, err) - assert.Equal(t, "new", cmUpdated.Data["ETCD_INITIAL_CLUSTER_STATE"]) - assert.Contains(t, cmUpdated.Data["ETCD_INITIAL_CLUSTER"], "etcd-0=") + // No additional pods should have been created. + podList := &corev1.PodList{} + require.NoError(t, fakeClient.List(ctx, podList, client.InNamespace(ec.Namespace))) + assert.Len(t, podList.Items, 1) }) - t.Run("Resources Already Exist", func(t *testing.T) { + t.Run("Service created if missing even when pods exist", func(t *testing.T) { ctx := t.Context() - sts := &appsv1.StatefulSet{ + pod0 := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: ec.Name, + Name: "etcd-0", Namespace: ec.Namespace, + Labels: etcdClusterLabels(ec), OwnerReferences: []metav1.OwnerReference{{ APIVersion: ecv1alpha1.GroupVersion.String(), Kind: "EtcdCluster", @@ -529,45 +346,18 @@ func TestBootstrapStatefulSet(t *testing.T) { Controller: pointerToBool(true), }}, }, - Spec: appsv1.StatefulSetSpec{Replicas: pointerToInt32(1)}, - Status: appsv1.StatefulSetStatus{ReadyReplicas: 1}, } - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: ec.Name, - Namespace: ec.Namespace, - }, - Spec: corev1.ServiceSpec{ClusterIP: "None"}, - } - - cm := newEtcdClusterState(ec, 1) - - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, sts.DeepCopy(), svc.DeepCopy(), cm.DeepCopy()).Build() + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, pod0).Build() r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} - state := &reconcileState{cluster: ec, sts: sts} + state := &reconcileState{cluster: ec, pods: []*corev1.Pod{pod0}} - // Capture current objects to verify no updates occur. - storedSTS := sts.DeepCopy() - storedSvc := svc.DeepCopy() - storedCM := cm.DeepCopy() - res, err := r.bootstrapStatefulSet(ctx, state) + res, err := r.bootstrapCluster(ctx, state) assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, res) - fetchedSTS := &appsv1.StatefulSet{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, fetchedSTS) - assert.NoError(t, err) - assert.Equal(t, storedSTS.Spec, fetchedSTS.Spec) - - fetchedSvc := &corev1.Service{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, fetchedSvc) - assert.NoError(t, err) - assert.Equal(t, storedSvc.Spec, fetchedSvc.Spec) - - fetchedCM := &corev1.ConfigMap{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, fetchedCM) - assert.NoError(t, err) - assert.Equal(t, storedCM.Data, fetchedCM.Data) + svc := &corev1.Service{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc)) + assert.Equal(t, "None", svc.Spec.ClusterIP) }) } diff --git a/internal/controller/utils.go b/internal/controller/utils.go index 3092c379..94d973b5 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -8,13 +8,13 @@ import ( "maps" "net" "slices" + "sort" "strconv" "strings" "time" "github.com/coreos/go-semver/semver" "github.com/go-logr/logr" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -46,258 +46,170 @@ const ( etcdClusterStateExisting etcdClusterState = "existing" ) -func reconcileStatefulSet(ctx context.Context, logger logr.Logger, ec *ecv1alpha1.EtcdCluster, c client.Client, replicas int32, scheme *runtime.Scheme) (*appsv1.StatefulSet, error) { +// memberPodName returns the deterministic name for an etcd member pod. +// The naming convention mirrors StatefulSet so that headless-service DNS is identical. +func memberPodName(clusterName string, ordinal int) string { + return fmt.Sprintf("%s-%d", clusterName, ordinal) +} - // prepare/update configmap for StatefulSet - err := applyEtcdClusterState(ctx, ec, int(replicas), c, scheme, logger) - if err != nil { - return nil, err - } +// pvcNameForMember returns the PVC name for a given pod, matching the naming +// convention that StatefulSet VolumeClaimTemplates would have produced. +func pvcNameForMember(podName string) string { + return fmt.Sprintf("%s-%s", volumeName, podName) +} - // Add server and peer certificate - err = applyEtcdMemberCerts(ctx, ec, c) +// podOrdinal extracts the numeric ordinal from a pod name of the form +// "{clusterName}-{ordinal}". Returns -1 on parse failure. +func podOrdinal(podName, clusterName string) int { + suffix := strings.TrimPrefix(podName, clusterName+"-") + ordinal, err := strconv.Atoi(suffix) if err != nil { - return nil, err + return -1 } + return ordinal +} - // Create Update StatefulSet - err = createOrPatchStatefulSet(ctx, logger, ec, c, replicas, scheme) - if err != nil { - return nil, err +// listOwnedPods returns all Pods that are owned (via OwnerReference) by ec, +// sorted in ascending ordinal order. +func listOwnedPods(ctx context.Context, c client.Client, ec *ecv1alpha1.EtcdCluster) ([]*corev1.Pod, error) { + podList := &corev1.PodList{} + if err := c.List(ctx, podList, + client.InNamespace(ec.Namespace), + client.MatchingLabels(etcdClusterLabels(ec)), + ); err != nil { + return nil, fmt.Errorf("failed to list pods for cluster %s: %w", ec.Name, err) } - // Wait for statefulset to be ready - err = waitForStatefulSetReady(ctx, logger, c, ec.Name, ec.Namespace) - if err != nil { - return nil, err + var owned []*corev1.Pod + for i := range podList.Items { + if metav1.IsControlledBy(&podList.Items[i], ec) { + owned = append(owned, &podList.Items[i]) + } } - // Return latest Stateful set. (This is to ensure that we return the latest statefulset for next operation to act on) - return getStatefulSet(ctx, c, ec.Name, ec.Namespace) + sort.Slice(owned, func(i, j int) bool { + return podOrdinal(owned[i].Name, ec.Name) < podOrdinal(owned[j].Name, ec.Name) + }) + return owned, nil } -func defaultArgs(name string) []string { - return []string{ - "--name=$(POD_NAME)", - "--listen-peer-urls=http://0.0.0.0:2380", // TODO: only listen on 127.0.0.1 and host IP - "--listen-client-urls=http://0.0.0.0:2379", // TODO: only listen on 127.0.0.1 and host IP - fmt.Sprintf("--initial-advertise-peer-urls=http://$(POD_NAME).%s.$(POD_NAMESPACE).svc.cluster.local:2380", name), - fmt.Sprintf("--advertise-client-urls=http://$(POD_NAME).%s.$(POD_NAMESPACE).svc.cluster.local:2379", name), +// etcdClusterLabels returns the label set applied to every member pod and used +// by the headless Service selector. +func etcdClusterLabels(ec *ecv1alpha1.EtcdCluster) map[string]string { + return map[string]string{ + "app": ec.Name, + "controller": ec.Name, } } -func RemoveStringFromSlice(s []string, str string) []string { - for i := range s { - defaultArg := getArgName(s[i]) - if defaultArg == str { - s = slices.Delete(s, i, i+1) - break - } - } - return s -} +// createMemberPod creates a single etcd member Pod (and, if needed, its PVC) +// for the given ordinal index. It does not wait for the pod to become ready; +// the caller is responsible for requeueing until the pod is healthy. +func createMemberPod(ctx context.Context, logger logr.Logger, c client.Client, ec *ecv1alpha1.EtcdCluster, ordinal int, scheme *runtime.Scheme) error { + podName := memberPodName(ec.Name, ordinal) -func getArgName(s string) string { - idx := strings.Index(s, "=") + // Ensure TLS certificates exist before the pod mounts them. + if err := applyEtcdMemberCerts(ctx, ec, c); err != nil { + return err + } - if idx != -1 { - return s[:idx] + // Create per-member PVC for ReadWriteOnce storage. + if ec.Spec.StorageSpec != nil && ec.Spec.StorageSpec.AccessModes != corev1.ReadWriteMany { + if err := createPVCForMember(ctx, c, ec, podName, scheme); err != nil { + return err + } } - idx = strings.Index(s, " ") - if idx != -1 { - return s[:idx] + state := etcdClusterStateExisting + if ordinal == 0 { + state = etcdClusterStateNew } - // Assume arg is bool switch if idx is still -1 - return strings.TrimSpace(s) -} + // Build the initial-cluster value: all peers from ordinal 0 to this one. + var clusterParts []string + for i := 0; i <= ordinal; i++ { + name, peerURL := peerEndpointForOrdinalIndex(ec, i) + clusterParts = append(clusterParts, fmt.Sprintf("%s=%s", name, peerURL)) + } -func createArgs(name string, etcdOptions []string) []string { - defaultArgs := defaultArgs(name) - if len(etcdOptions) > 0 { - var argName string - // Remove default arguments if conflicts with user supplied - for i := range etcdOptions { - argName = getArgName(etcdOptions[i]) - defaultArgs = RemoveStringFromSlice(defaultArgs, argName) - } + pod := buildMemberPod(ec, podName, state, strings.Join(clusterParts, ",")) + if err := controllerutil.SetControllerReference(ec, pod, scheme); err != nil { + return err } - defaultArgs = append(defaultArgs, etcdOptions...) - return defaultArgs + + logger.Info("Creating member pod", "name", podName, "ordinal", ordinal, "state", state) + return c.Create(ctx, pod) } -func createOrPatchStatefulSet(ctx context.Context, logger logr.Logger, ec *ecv1alpha1.EtcdCluster, c client.Client, replicas int32, scheme *runtime.Scheme) error { - sts := &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: ec.Name, - Namespace: ec.Namespace, - }, +// buildMemberPod constructs the Pod object for a single etcd member. +func buildMemberPod(ec *ecv1alpha1.EtcdCluster, podName string, state etcdClusterState, initialCluster string) *corev1.Pod { + // Start with custom labels then overwrite with the mandatory defaults so + // that the headless-service selector is always satisfied. + labels := make(map[string]string) + if ec.Spec.PodTemplate != nil && ec.Spec.PodTemplate.Metadata != nil { + maps.Copy(labels, ec.Spec.PodTemplate.Metadata.Labels) } + maps.Copy(labels, etcdClusterLabels(ec)) - labels := map[string]string{ - "app": ec.Name, - "controller": ec.Name, + // Annotations are purely from the PodTemplate; nil when not provided. + var annotations map[string]string + if ec.Spec.PodTemplate != nil && ec.Spec.PodTemplate.Metadata != nil && + len(ec.Spec.PodTemplate.Metadata.Annotations) > 0 { + annotations = ec.Spec.PodTemplate.Metadata.Annotations } - podSpec := corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "etcd", - Command: []string{"/usr/local/bin/etcd"}, - Args: createArgs(ec.Name, ec.Spec.EtcdOptions), - Image: fmt.Sprintf("%s:%s", ec.Spec.ImageRegistry, ec.Spec.Version), - Env: []corev1.EnvVar{ - { - Name: "POD_NAME", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "metadata.name", - }, - }, - }, - { - Name: "POD_NAMESPACE", - ValueFrom: &corev1.EnvVarSource{ - FieldRef: &corev1.ObjectFieldSelector{ - FieldPath: "metadata.namespace", - }, - }, - }, - }, - EnvFrom: []corev1.EnvFromSource{ - { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: configMapNameForEtcdCluster(ec), - }, - }, - }, - }, - Ports: []corev1.ContainerPort{ - { - Name: "client", - ContainerPort: 2379, - }, - { - Name: "peer", - ContainerPort: 2380, - }, - }, + envVars := []corev1.EnvVar{ + { + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.name"}, }, }, - } - - // mount server and peer certificate secret to each pods of the statefulset via PodSpec - var certVolume []corev1.Volume - serverCertName := getServerCertName(ec.Name) - peerCertName := getPeerCertName(ec.Name) - if ec.Spec.TLS != nil { - serverCertVolume := corev1.Volume{ - Name: "server-secret", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{SecretName: serverCertName}, - }, - } - peerCertVolume := corev1.Volume{ - Name: "peer-secret", - VolumeSource: corev1.VolumeSource{ - Secret: &corev1.SecretVolumeSource{SecretName: peerCertName}, + { + Name: "POD_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"}, }, - } - certVolume = append(certVolume, serverCertVolume, peerCertVolume) - } - if len(certVolume) != 0 { - podSpec.Volumes = certVolume + }, + {Name: "ETCD_INITIAL_CLUSTER_STATE", Value: string(state)}, + {Name: "ETCD_INITIAL_CLUSTER", Value: initialCluster}, + {Name: "ETCD_DATA_DIR", Value: etcdDataDir}, + } + + container := corev1.Container{ + Name: "etcd", + Image: fmt.Sprintf("%s:%s", ec.Spec.ImageRegistry, ec.Spec.Version), + Command: []string{"/usr/local/bin/etcd"}, + Args: createArgs(ec.Name, ec.Spec.EtcdOptions), + Env: envVars, + Ports: []corev1.ContainerPort{ + {Name: "client", ContainerPort: 2379}, + {Name: "peer", ContainerPort: 2380}, + }, } - // Prepare pod template metadata - podTemplateMetadata := metav1.ObjectMeta{ - Labels: make(map[string]string), - Annotations: make(map[string]string), + podSpec := corev1.PodSpec{ + Containers: []corev1.Container{container}, } - // Pod Scheduling specs + // Pod scheduling customisation. if ec.Spec.PodTemplate != nil && ec.Spec.PodTemplate.Spec != nil { podSpec.Affinity = ec.Spec.PodTemplate.Spec.Affinity podSpec.NodeSelector = ec.Spec.PodTemplate.Spec.NodeSelector podSpec.Tolerations = ec.Spec.PodTemplate.Spec.Tolerations } - // Apply custom metadata from PodTemplate if provided - if ec.Spec.PodTemplate != nil && ec.Spec.PodTemplate.Metadata != nil { - // Apply custom labels - if len(ec.Spec.PodTemplate.Metadata.Labels) > 0 { - maps.Copy(podTemplateMetadata.Labels, ec.Spec.PodTemplate.Metadata.Labels) - } - - // Apply annotations - if len(ec.Spec.PodTemplate.Metadata.Annotations) > 0 { - podTemplateMetadata.Annotations = ec.Spec.PodTemplate.Metadata.Annotations - } - } - - // Apply default labels - maps.Copy(podTemplateMetadata.Labels, labels) - - stsSpec := appsv1.StatefulSetSpec{ - Replicas: &replicas, - ServiceName: ec.Name, - Selector: &metav1.LabelSelector{ - MatchLabels: labels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: podTemplateMetadata, - Spec: podSpec, - }, - } - + // Persistent storage volumes. if ec.Spec.StorageSpec != nil { - - stsSpec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ - Name: volumeName, - MountPath: etcdDataDir, - SubPathExpr: "$(POD_NAME)", + podSpec.Containers[0].VolumeMounts = []corev1.VolumeMount{{ + Name: volumeName, + MountPath: etcdDataDir, }} - // Create a new volume claim template - if ec.Spec.StorageSpec.VolumeSizeRequest.Cmp(resource.MustParse("1Mi")) < 0 { - return fmt.Errorf("VolumeSizeRequest must be at least 1Mi") - } - - if ec.Spec.StorageSpec.VolumeSizeLimit.IsZero() { - logger.Info("VolumeSizeLimit is not set. Setting it to VolumeSizeRequest") - ec.Spec.StorageSpec.VolumeSizeLimit = ec.Spec.StorageSpec.VolumeSizeRequest - } - - pvcResources := corev1.VolumeResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceStorage: ec.Spec.StorageSpec.VolumeSizeRequest, - }, - Limits: corev1.ResourceList{ - corev1.ResourceStorage: ec.Spec.StorageSpec.VolumeSizeLimit, - }, - } switch ec.Spec.StorageSpec.AccessModes { - case corev1.ReadWriteOnce, "": - stsSpec.VolumeClaimTemplates = []corev1.PersistentVolumeClaim{ - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName}, - Spec: corev1.PersistentVolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, - Resources: pvcResources, - }, - }, - } - - if ec.Spec.StorageSpec.StorageClassName != "" { - stsSpec.VolumeClaimTemplates[0].Spec.StorageClassName = &ec.Spec.StorageSpec.StorageClassName - } case corev1.ReadWriteMany: - if ec.Spec.StorageSpec.PVCName == "" { - return fmt.Errorf("PVCName must be set when AccessModes is ReadWriteMany") - } - stsSpec.Template.Spec.Volumes = append(stsSpec.Template.Spec.Volumes, corev1.Volume{ + // All pods share a single pre-existing PVC. + podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ Name: volumeName, VolumeSource: corev1.VolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ @@ -305,37 +217,104 @@ func createOrPatchStatefulSet(ctx context.Context, logger logr.Logger, ec *ecv1a }, }, }) - default: - return fmt.Errorf("AccessMode %s is not supported", ec.Spec.StorageSpec.AccessModes) + default: // ReadWriteOnce + podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ + Name: volumeName, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcNameForMember(podName), + }, + }, + }) } } - logger.Info("Now creating/updating statefulset", "name", ec.Name, "namespace", ec.Namespace, "replicas", replicas) - _, err := controllerutil.CreateOrPatch(ctx, c, sts, func() error { - // Define or update the desired spec - sts.ObjectMeta = metav1.ObjectMeta{ - Name: ec.Name, + // TLS certificate volumes (mounted as secrets). + if ec.Spec.TLS != nil { + podSpec.Volumes = append(podSpec.Volumes, + corev1.Volume{ + Name: "server-secret", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{SecretName: getServerCertName(ec.Name)}, + }, + }, + corev1.Volume{ + Name: "peer-secret", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{SecretName: getPeerCertName(ec.Name)}, + }, + }, + ) + } + + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: ec.Namespace, + Labels: labels, + Annotations: annotations, + }, + Spec: podSpec, + } +} + +// createPVCForMember creates a PVC for the given pod if one does not already +// exist. Naming mirrors StatefulSet VolumeClaimTemplates: "{volumeName}-{podName}". +func createPVCForMember(ctx context.Context, c client.Client, ec *ecv1alpha1.EtcdCluster, podName string, scheme *runtime.Scheme) error { + pvcName := pvcNameForMember(podName) + + existing := &corev1.PersistentVolumeClaim{} + err := c.Get(ctx, types.NamespacedName{Name: pvcName, Namespace: ec.Namespace}, existing) + if err == nil { + return nil // already exists + } + if !k8serrors.IsNotFound(err) { + return fmt.Errorf("failed to check PVC %s: %w", pvcName, err) + } + + if ec.Spec.StorageSpec.VolumeSizeRequest.Cmp(resource.MustParse("1Mi")) < 0 { + return fmt.Errorf("VolumeSizeRequest must be at least 1Mi") + } + + volumeSizeLimit := ec.Spec.StorageSpec.VolumeSizeLimit + if volumeSizeLimit.IsZero() { + volumeSizeLimit = ec.Spec.StorageSpec.VolumeSizeRequest + } + + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, Namespace: ec.Namespace, - } - sts.Spec = stsSpec + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: ec.Spec.StorageSpec.VolumeSizeRequest, + }, + Limits: corev1.ResourceList{ + corev1.ResourceStorage: volumeSizeLimit, + }, + }, + }, + } - // Set ower reference - if err := controllerutil.SetControllerReference(ec, sts, scheme); err != nil { - return err - } + if ec.Spec.StorageSpec.StorageClassName != "" { + pvc.Spec.StorageClassName = &ec.Spec.StorageSpec.StorageClassName + } - return nil - }) - if err != nil { + if err := controllerutil.SetControllerReference(ec, pvc, scheme); err != nil { return err } - logger.Info("Stateful set created/updated", "name", ec.Name, "namespace", ec.Namespace, "replicas", replicas) - return nil + return c.Create(ctx, pvc) } -func waitForStatefulSetReady(ctx context.Context, logger logr.Logger, r client.Client, name, namespace string) error { - logger.Info("Now checking the readiness of statefulset", "name", name, "namespace", namespace) +// waitForPodReady polls until the given Pod has its Ready condition set to True, +// using an exponential back-off. It is provided as a utility; the primary +// reconcile paths do not block on it, relying on natural requeueing instead. +func waitForPodReady(ctx context.Context, logger logr.Logger, c client.Client, podName, namespace string) error { + logger.Info("Waiting for pod to become ready", "name", podName, "namespace", namespace) backoff := wait.Backoff{ Duration: 3 * time.Second, @@ -344,241 +323,213 @@ func waitForStatefulSetReady(ctx context.Context, logger logr.Logger, r client.C } err := wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) { - // Fetch the StatefulSet - sts, err := getStatefulSet(ctx, r, name, namespace) - if err != nil { + pod := &corev1.Pod{} + if err := c.Get(ctx, types.NamespacedName{Name: podName, Namespace: namespace}, pod); err != nil { return false, err } - - // Check if the StatefulSet is ready - if sts.Status.ReadyReplicas == *sts.Spec.Replicas { - // StatefulSet is ready - logger.Info("StatefulSet is ready", "name", name, "namespace", namespace) + if isPodReady(pod) { + logger.Info("Pod is ready", "name", podName, "namespace", namespace) return true, nil } - - // Log the current status - logger.Info("StatefulSet is not ready", "ReadyReplicas", strconv.Itoa(int(sts.Status.ReadyReplicas)), "DesiredReplicas", strconv.Itoa(int(*sts.Spec.Replicas))) + logger.Info("Pod is not ready yet", "name", podName, "namespace", namespace) return false, nil }) if err != nil { - return fmt.Errorf("StatefulSet %s/%s did not become ready: %w", namespace, name, err) + return fmt.Errorf("pod %s/%s did not become ready: %w", namespace, podName, err) } - return nil } -func createHeadlessServiceIfNotExist(ctx context.Context, logger logr.Logger, c client.Client, ec *ecv1alpha1.EtcdCluster, scheme *runtime.Scheme) error { - service := &corev1.Service{} - err := c.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, service) - - if err != nil { - if k8serrors.IsNotFound(err) { - logger.Info("Headless service does not exist. Creating headless service") - - labels := map[string]string{ - "app": ec.Name, - "controller": ec.Name, - } - // Create the headless service - headlessSvc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: ec.Name, - Namespace: ec.Namespace, - Labels: labels, - }, - Spec: corev1.ServiceSpec{ - ClusterIP: "None", // Key for headless service - Selector: labels, - }, - } - - // Set owner reference - if err := controllerutil.SetControllerReference(ec, headlessSvc, scheme); err != nil { - return err - } - - if createErr := c.Create(ctx, headlessSvc); createErr != nil { - return fmt.Errorf("failed to create headless service: %w", createErr) - } - logger.Info("Headless service created successfully") - - return nil +// isPodReady returns true when the Pod's Ready condition is True. +func isPodReady(pod *corev1.Pod) bool { + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { + return true } - return fmt.Errorf("failed to get headless service: %w", err) } - return nil + return false } -func checkStatefulSetControlledByEtcdOperator(ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) error { - if !metav1.IsControlledBy(sts, ec) { - return fmt.Errorf("StatefulSet %s/%s is not controlled by EtcdCluster %s/%s", sts.Namespace, sts.Name, ec.Namespace, ec.Name) +// clientEndpointsFromPods builds the client endpoint URL for every pod in the +// slice, in the same order. The DNS form is: +// +// http://{podName}.{clusterName}.{namespace}.svc.cluster.local:2379 +func clientEndpointsFromPods(clusterName, namespace string, pods []*corev1.Pod) []string { + if len(pods) == 0 { + return nil } - return nil + eps := make([]string, 0, len(pods)) + for _, pod := range pods { + eps = append(eps, clientEndpointForOrdinal(clusterName, namespace, podOrdinal(pod.Name, clusterName))) + } + return eps } -func configMapNameForEtcdCluster(ec *ecv1alpha1.EtcdCluster) string { - return fmt.Sprintf("%s-state", ec.Name) +// clientEndpointForOrdinal returns the client endpoint URL for a member at the +// given ordinal index. +func clientEndpointForOrdinal(clusterName, namespace string, ordinal int) string { + return fmt.Sprintf("http://%s-%d.%s.%s.svc.cluster.local:2379", + clusterName, ordinal, clusterName, namespace) } -func peerEndpointForOrdinalIndex(ec *ecv1alpha1.EtcdCluster, index int) (string, string) { - name := fmt.Sprintf("%s-%d", ec.Name, index) - return name, fmt.Sprintf("http://%s-%d.%s.%s.svc.cluster.local:2380", - ec.Name, index, ec.Name, ec.Namespace) +// areAllMembersHealthy returns true when every entry in the supplied health +// slice reports healthy. It uses already-fetched health data and does not make +// additional network calls. +func areAllMembersHealthy(memberHealth []etcdutils.EpHealth) bool { + for _, h := range memberHealth { + if !h.Health { + return false + } + } + return true } -func newEtcdClusterState(ec *ecv1alpha1.EtcdCluster, replica int) *corev1.ConfigMap { - // We always add members one by one, so the state is always - // "existing" if replica > 1. - - state := etcdClusterStateNew - if replica > 1 { - state = etcdClusterStateExisting +// healthCheck returns a MemberListResponse and per-endpoint health information +// for the etcd cluster reachable through the given pods. +func healthCheck(clusterName, namespace string, pods []*corev1.Pod, lg klog.Logger) (*clientv3.MemberListResponse, []etcdutils.EpHealth, error) { + if len(pods) == 0 { + return nil, nil, nil } - var initialCluster []string - for i := 0; i < replica; i++ { - name, peerURL := peerEndpointForOrdinalIndex(ec, i) - initialCluster = append(initialCluster, fmt.Sprintf("%s=%s", name, peerURL)) - } + endpoints := clientEndpointsFromPods(clusterName, namespace, pods) - return &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: configMapNameForEtcdCluster(ec), - Namespace: ec.Namespace, - }, - Data: map[string]string{ - "ETCD_INITIAL_CLUSTER_STATE": string(state), - "ETCD_INITIAL_CLUSTER": strings.Join(initialCluster, ","), - "ETCD_DATA_DIR": etcdDataDir, - }, + memberlistResp, err := etcdutils.MemberList(endpoints) + if err != nil { + return nil, nil, err } -} + memberCnt := len(memberlistResp.Members) -func applyEtcdClusterState(ctx context.Context, ec *ecv1alpha1.EtcdCluster, replica int, c client.Client, scheme *runtime.Scheme, logger logr.Logger) error { - cm := newEtcdClusterState(ec, replica) + // Use the smaller of the two counts: pods that are starting up may not yet + // appear in the member list and already-removed members may have no pod. + cnt := min(len(pods), memberCnt) + lg.Info("health checking", "podCount", len(pods), "len(members)", memberCnt) + endpoints = endpoints[:cnt] - // Set owner reference - if err := controllerutil.SetControllerReference(ec, cm, scheme); err != nil { - return err + healthInfos, err := etcdutils.ClusterHealth(endpoints) + if err != nil { + return memberlistResp, nil, err } - logger.Info("Now updating configmap", "name", configMapNameForEtcdCluster(ec), "namespace", ec.Namespace) - err := c.Get(ctx, types.NamespacedName{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, &corev1.ConfigMap{}) - if err != nil { - if k8serrors.IsNotFound(err) { - createErr := c.Create(ctx, cm) - return createErr + var memberErrors []error + for _, healthInfo := range healthInfos { + if !healthInfo.Health { + memberErrors = append(memberErrors, errors.New(healthInfo.String())) } - return err + lg.Info(healthInfo.String()) } - updateErr := c.Update(ctx, cm) - return updateErr + return memberlistResp, healthInfos, utilerrors.NewAggregate(memberErrors) } -func clientEndpointForOrdinalIndex(sts *appsv1.StatefulSet, index int) string { - return fmt.Sprintf("http://%s-%d.%s.%s.svc.cluster.local:2379", - sts.Name, index, sts.Name, sts.Namespace) -} +// --------------------------------------------------------------------------- +// etcd argument helpers +// --------------------------------------------------------------------------- -func getStatefulSet(ctx context.Context, c client.Client, name, namespace string) (*appsv1.StatefulSet, error) { - sts := &appsv1.StatefulSet{} - err := c.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, sts) - if err != nil { - return nil, err +func defaultArgs(name string) []string { + return []string{ + "--name=$(POD_NAME)", + "--listen-peer-urls=http://0.0.0.0:2380", + "--listen-client-urls=http://0.0.0.0:2379", + fmt.Sprintf("--initial-advertise-peer-urls=http://$(POD_NAME).%s.$(POD_NAMESPACE).svc.cluster.local:2380", name), + fmt.Sprintf("--advertise-client-urls=http://$(POD_NAME).%s.$(POD_NAMESPACE).svc.cluster.local:2379", name), } - return sts, nil } -func clientEndpointsFromStatefulsets(sts *appsv1.StatefulSet) []string { - var endpoints []string - replica := int(*sts.Spec.Replicas) - if replica > 0 { - for i := 0; i < replica; i++ { - endpoints = append(endpoints, clientEndpointForOrdinalIndex(sts, i)) +func RemoveStringFromSlice(s []string, str string) []string { + for i := range s { + defaultArg := getArgName(s[i]) + if defaultArg == str { + s = slices.Delete(s, i, i+1) + break } } - return endpoints + return s } -func areAllMembersHealthy(sts *appsv1.StatefulSet, logger logr.Logger) (bool, error) { - _, health, err := healthCheck(sts, logger) - if err != nil { - return false, err +func getArgName(s string) string { + idx := strings.Index(s, "=") + if idx != -1 { + return s[:idx] } - - for _, h := range health { - if !h.Health { - return false, nil - } + idx = strings.Index(s, " ") + if idx != -1 { + return s[:idx] } - return true, nil + return strings.TrimSpace(s) } -// healthCheck returns a memberList and an error. -// If any member (excluding not yet started or already removed member) -// is unhealthy, the error won't be nil. -func healthCheck(sts *appsv1.StatefulSet, lg klog.Logger) (*clientv3.MemberListResponse, []etcdutils.EpHealth, error) { - replica := int(*sts.Spec.Replicas) - if replica == 0 { - return nil, nil, nil - } - - endpoints := clientEndpointsFromStatefulsets(sts) - - memberlistResp, err := etcdutils.MemberList(endpoints) - if err != nil { - return nil, nil, err +func createArgs(name string, etcdOptions []string) []string { + defaultArgs := defaultArgs(name) + if len(etcdOptions) > 0 { + for i := range etcdOptions { + argName := getArgName(etcdOptions[i]) + defaultArgs = RemoveStringFromSlice(defaultArgs, argName) + } } - memberCnt := len(memberlistResp.Members) - - // Usually replica should be equal to memberCnt. If it isn't, then - // it means previous reconcile loop somehow interrupted right after - // adding (replica < memberCnt) or removing (replica > memberCnt) - // a member from the cluster. In that case, we shouldn't run health - // check on the not yet started or already removed member. - cnt := min(replica, memberCnt) + defaultArgs = append(defaultArgs, etcdOptions...) + return defaultArgs +} - lg.Info("health checking", "replica", replica, "len(members)", memberCnt) - endpoints = endpoints[:cnt] +// --------------------------------------------------------------------------- +// Kubernetes resource helpers +// --------------------------------------------------------------------------- - healthInfos, err := etcdutils.ClusterHealth(endpoints) +func createHeadlessServiceIfNotExist(ctx context.Context, logger logr.Logger, c client.Client, ec *ecv1alpha1.EtcdCluster, scheme *runtime.Scheme) error { + service := &corev1.Service{} + err := c.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, service) if err != nil { - return memberlistResp, nil, err - } - - var memberErrors []error - for _, healthInfo := range healthInfos { - if !healthInfo.Health { - // TODO: also update metrics? - memberErrors = append(memberErrors, errors.New(healthInfo.String())) + if k8serrors.IsNotFound(err) { + logger.Info("Headless service does not exist. Creating headless service") + headlessSvc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: ec.Name, + Namespace: ec.Namespace, + Labels: etcdClusterLabels(ec), + }, + Spec: corev1.ServiceSpec{ + ClusterIP: "None", + Selector: etcdClusterLabels(ec), + }, + } + if err := controllerutil.SetControllerReference(ec, headlessSvc, scheme); err != nil { + return err + } + if createErr := c.Create(ctx, headlessSvc); createErr != nil { + return fmt.Errorf("failed to create headless service: %w", createErr) + } + logger.Info("Headless service created successfully") + return nil } - lg.Info(healthInfo.String()) + return fmt.Errorf("failed to get headless service: %w", err) } + return nil +} - return memberlistResp, healthInfos, utilerrors.NewAggregate(memberErrors) +// peerEndpointForOrdinalIndex returns the member name and peer URL for a given +// ordinal, used both to build ETCD_INITIAL_CLUSTER and to call AddMember. +func peerEndpointForOrdinalIndex(ec *ecv1alpha1.EtcdCluster, index int) (string, string) { + name := fmt.Sprintf("%s-%d", ec.Name, index) + return name, fmt.Sprintf("http://%s-%d.%s.%s.svc.cluster.local:2380", + ec.Name, index, ec.Name, ec.Namespace) } +// --------------------------------------------------------------------------- +// Certificate helpers (unchanged from original implementation) +// --------------------------------------------------------------------------- + func getClientCertName(etcdClusterName string) string { - clientCertName := fmt.Sprintf("%s-%s-tls", etcdClusterName, "client") - return clientCertName + return fmt.Sprintf("%s-%s-tls", etcdClusterName, "client") } func getServerCertName(etcdClusterName string) string { - serverCertName := fmt.Sprintf("%s-%s-tls", etcdClusterName, "server") - return serverCertName + return fmt.Sprintf("%s-%s-tls", etcdClusterName, "server") } func getPeerCertName(etcdClusterName string) string { - peerCertName := fmt.Sprintf("%s-%s-tls", etcdClusterName, "peer") - return peerCertName + return fmt.Sprintf("%s-%s-tls", etcdClusterName, "peer") } -// parseValidityDuration parses a duration string and returns the parsed duration. -// If the customizedDuration is empty, it returns the defaultDuration. -// Returns an error if the duration string cannot be parsed. func parseValidityDuration(customizedDuration string, defaultDuration time.Duration) (time.Duration, error) { if customizedDuration == "" { return defaultDuration, nil @@ -596,7 +547,6 @@ func createCMCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Confi return nil, fmt.Errorf("cert-manager configuration is not present") } - // Set default duration to 90 days for cert-manager if not provided duration, err := parseValidityDuration(cmConfig.ValidityDuration, certInterface.DefaultCertManagerValidity) if err != nil { return nil, err @@ -609,18 +559,14 @@ func createCMCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Confi IPs: make([]net.IP, len(cmConfig.AltNames.DNSNames)), } } else { - // Use wildcard DNS for the cluster's headless service to cover all pods - // This allows the certificate to work for pod-0, pod-1, etc. defaultDNSNames := []string{ fmt.Sprintf("*.%s.%s.%s", ec.Name, ec.Namespace, certInterface.DefaultDomainName), fmt.Sprintf("%s.%s.%s", ec.Name, ec.Namespace, certInterface.DefaultDomainName), } - getAltNames = certInterface.AltNames{ - DNSNames: defaultDNSNames, - } + getAltNames = certInterface.AltNames{DNSNames: defaultDNSNames} } - config := &certInterface.Config{ + return &certInterface.Config{ CommonName: cmConfig.CommonName, Organization: cmConfig.Organization, ValidityDuration: duration, @@ -629,13 +575,11 @@ func createCMCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Confi "issuerName": cmConfig.IssuerName, "issuerKind": cmConfig.IssuerKind, }, - } - return config, nil + }, nil } func createAutoCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Config, error) { autoConfig := ec.Spec.TLS.ProviderCfg.AutoCfg - // Set default values for auto configuration if not present if autoConfig == nil { autoConfig = &ecv1alpha1.ProviderAutoConfig{ CommonConfig: ecv1alpha1.CommonConfig{ @@ -645,7 +589,6 @@ func createAutoCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Con } } - // Set default duration to 365 days for auto provider if not provided duration, err := parseValidityDuration(autoConfig.ValidityDuration, certInterface.DefaultAutoValidity) if err != nil { return nil, err @@ -658,28 +601,22 @@ func createAutoCertificateConfig(ec *ecv1alpha1.EtcdCluster) (*certInterface.Con IPs: make([]net.IP, len(autoConfig.AltNames.DNSNames)), } } else { - // Use wildcard DNS for the cluster's headless service to cover all pods - // This allows the certificate to work for pod-0, pod-1, etc. defaultDNSNames := []string{ fmt.Sprintf("*.%s.%s.%s", ec.Name, ec.Namespace, certInterface.DefaultDomainName), fmt.Sprintf("%s.%s.%s", ec.Name, ec.Namespace, certInterface.DefaultDomainName), } - altNames = certInterface.AltNames{ - DNSNames: defaultDNSNames, - } + altNames = certInterface.AltNames{DNSNames: defaultDNSNames} } - config := &certInterface.Config{ + return &certInterface.Config{ CommonName: autoConfig.CommonName, Organization: autoConfig.Organization, ValidityDuration: duration, AltNames: altNames, - } - return config, nil + }, nil } func createCertificate(ec *ecv1alpha1.EtcdCluster, ctx context.Context, c client.Client, certName string) error { - // The TLS field is present but spec is empty providerName := ec.Spec.TLS.Provider if providerName == "" { providerName = string(certificate.Auto) @@ -687,7 +624,6 @@ func createCertificate(ec *ecv1alpha1.EtcdCluster, ctx context.Context, c client cert, certErr := certificate.NewProvider(certificate.ProviderType(providerName), c) if certErr != nil { - // TODO: instead of error, set default autoConfig return certErr } _, getCertError := cert.GetCertificateConfig(ctx, client.ObjectKey{Name: certName, Namespace: ec.Namespace}) @@ -702,8 +638,7 @@ func createCertificate(ec *ecv1alpha1.EtcdCluster, ctx context.Context, c client if err != nil { return fmt.Errorf("error creating auto certificate config: %w", err) } - createCertErr := cert.EnsureCertificateSecret(ctx, secretKey, autoConfig) - if createCertErr != nil { + if createCertErr := cert.EnsureCertificateSecret(ctx, secretKey, autoConfig); createCertErr != nil { return fmt.Errorf("error creating auto certificate: %w", createCertErr) } return nil @@ -712,73 +647,50 @@ func createCertificate(ec *ecv1alpha1.EtcdCluster, ctx context.Context, c client if err != nil { return fmt.Errorf("error creating cert-manager certificate config: %w", err) } - createCertErr := cert.EnsureCertificateSecret(ctx, secretKey, cmConfig) - if createCertErr != nil { + if createCertErr := cert.EnsureCertificateSecret(ctx, secretKey, cmConfig); createCertErr != nil { return fmt.Errorf("error creating cert-manager certificate: %w", createCertErr) } return nil - default: // This should never happen - // TODO: Use AuthProvider, since both AutoCfg and CertManagerCfg is not present + default: log.Printf("Error creating certificate, valid certificate provider not defined.") return nil } - } else { - return fmt.Errorf("%s:Error getting certificate", getCertError) } + return fmt.Errorf("%s:Error getting certificate", getCertError) } - return nil } func createClientCertificate(ctx context.Context, ec *ecv1alpha1.EtcdCluster, c client.Client) error { certName := getClientCertName(ec.Name) - err := createCertificate(ec, ctx, c, certName) - if err != nil { + if err := createCertificate(ec, ctx, c, certName); err != nil { return err } - err = patchCertificateSecret(ctx, ec, c, certName) - if err != nil { - return fmt.Errorf("patching certificate secret: %s with ownerReference failed: %w", certName, err) - } - return err + return patchCertificateSecret(ctx, ec, c, certName) } func createServerCertificate(ctx context.Context, ec *ecv1alpha1.EtcdCluster, c client.Client) error { serverCertName := getServerCertName(ec.Name) - err := createCertificate(ec, ctx, c, serverCertName) - if err != nil { + if err := createCertificate(ec, ctx, c, serverCertName); err != nil { return err } - err = patchCertificateSecret(ctx, ec, c, serverCertName) - if err != nil { - return fmt.Errorf("patching certificate secret: %s with ownerReference failed: %w", serverCertName, err) - } - return nil + return patchCertificateSecret(ctx, ec, c, serverCertName) } func createPeerCertificate(ctx context.Context, ec *ecv1alpha1.EtcdCluster, c client.Client) error { peerCertName := getPeerCertName(ec.Name) - err := createCertificate(ec, ctx, c, peerCertName) - if err != nil { + if err := createCertificate(ec, ctx, c, peerCertName); err != nil { return err } - err = patchCertificateSecret(ctx, ec, c, peerCertName) - if err != nil { - return fmt.Errorf("patching certificate secret: %s with ownerReference failed: %w", peerCertName, err) - } - return nil + return patchCertificateSecret(ctx, ec, c, peerCertName) } func applyEtcdMemberCerts(ctx context.Context, ec *ecv1alpha1.EtcdCluster, c client.Client) error { if ec.Spec.TLS != nil { - err := createServerCertificate(ctx, ec, c) - if err != nil { - return err - } - err = createPeerCertificate(ctx, ec, c) - if err != nil { + if err := createServerCertificate(ctx, ec, c); err != nil { return err } + return createPeerCertificate(ctx, ec, c) } return nil } @@ -796,14 +708,16 @@ func patchCertificateSecret(ctx context.Context, ec *ecv1alpha1.EtcdCluster, c c if err := c.Update(ctx, getCertSecret); err != nil { return fmt.Errorf("failed to update certificate secret with ownerReference: %w", err) } - return nil } -// validateEtcdUpgradePat can be used to check if the current and target versions align -// with the official upgrade paths for etcd. If one of the versions cannot be parsed -// canParse is false. If the versions are equal the function will not return an error -// but that should be handled on the call site. +// --------------------------------------------------------------------------- +// Version validation +// --------------------------------------------------------------------------- + +// validateEtcdUpgradePath checks whether upgrading from current to target is +// permitted by the official etcd upgrade policy. If canParse is false, one of +// the version strings could not be parsed as semver. func validateEtcdUpgradePath(etcdVersions []semver.Version, current, target string) (canParse bool, err error) { var ( currentVer *semver.Version @@ -835,14 +749,11 @@ func validateEtcdUpgradePath(etcdVersions []semver.Version, current, target stri switch { case currentIdx == -1: return true, fmt.Errorf("unknown current version %s", currentVer) - case targetIdx == -1: return true, fmt.Errorf("unknown target version %s", targetVer) - case currentIdx > targetIdx || (currentIdx == targetIdx && currentVer.Patch > targetVer.Patch): return true, fmt.Errorf("downgrading from version %s to version %s is not allowed", currentVer, targetVer) - case targetIdx > currentIdx+1: return true, fmt.Errorf("upgrading from version %s to version %s is not allowed", currentVer, targetVer) diff --git a/internal/controller/utils_test.go b/internal/controller/utils_test.go index 67592316..b7a1f880 100644 --- a/internal/controller/utils_test.go +++ b/internal/controller/utils_test.go @@ -8,13 +8,12 @@ import ( "time" "github.com/coreos/go-semver/semver" - "github.com/go-logr/logr" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" @@ -27,110 +26,191 @@ import ( clientv3 "go.etcd.io/etcd/client/v3" ) -func pointerToInt32(value int32) *int32 { +func pointerToBool(value bool) *bool { return &value } -func TestReconcileStatefulSet(t *testing.T) { +// --------------------------------------------------------------------------- +// listOwnedPods +// --------------------------------------------------------------------------- + +func TestListOwnedPods(t *testing.T) { scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) - fakeClient := fake.NewClientBuilder().Build() - logger := log.FromContext(t.Context()) - ec := &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-etcd", - Namespace: "default", - }, - Spec: ecv1alpha1.EtcdClusterSpec{ - Size: 3, - Version: "3.5.17", - }, + ObjectMeta: metav1.ObjectMeta{Name: "my-cluster", Namespace: "default", UID: "abc"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 3, Version: "3.5.17"}, + } + + makePod := func(name, uid string, owned bool) *corev1.Pod { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + Labels: etcdClusterLabels(ec), + }, + } + if owned { + pod.OwnerReferences = []metav1.OwnerReference{{ + APIVersion: ecv1alpha1.GroupVersion.String(), + Kind: "EtcdCluster", + Name: ec.Name, + UID: types.UID(uid), + Controller: pointerToBool(true), + }} + } + return pod } - _, _ = reconcileStatefulSet(t.Context(), logger, ec, fakeClient, 3, scheme) + t.Run("returns only owned pods sorted by ordinal", func(t *testing.T) { + ctx := t.Context() + pod0 := makePod("my-cluster-0", "abc", true) + pod2 := makePod("my-cluster-2", "abc", true) + pod1 := makePod("my-cluster-1", "abc", true) + foreign := makePod("my-cluster-3", "different-uid", false) + + fakeClient := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(ec, pod0, pod1, pod2, foreign).Build() + + pods, err := listOwnedPods(ctx, fakeClient, ec) + require.NoError(t, err) + require.Len(t, pods, 3) + assert.Equal(t, "my-cluster-0", pods[0].Name) + assert.Equal(t, "my-cluster-1", pods[1].Name) + assert.Equal(t, "my-cluster-2", pods[2].Name) + }) + + t.Run("returns empty slice when no pods exist", func(t *testing.T) { + ctx := t.Context() + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + + pods, err := listOwnedPods(ctx, fakeClient, ec) + require.NoError(t, err) + assert.Empty(t, pods) + }) +} + +// --------------------------------------------------------------------------- +// createMemberPod +// --------------------------------------------------------------------------- - sts := &appsv1.StatefulSet{} - err := fakeClient.Get(t.Context(), client.ObjectKey{Name: "test-etcd", Namespace: "default"}, sts) - if err != nil { - t.Fatalf("expected no error, got %v", err) +func TestCreateMemberPod(t *testing.T) { + scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) + _ = ecv1alpha1.AddToScheme(scheme) + ctx := t.Context() + logger := log.FromContext(ctx) + + ec := &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-etcd", Namespace: "default", UID: "1"}, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 3, Version: "3.5.17"}, } - if *sts.Spec.Replicas != 3 { - t.Fatalf("expected 3 replicas, got %d", *sts.Spec.Replicas) + t.Run("creates pod-0 with state=new", func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + err := createMemberPod(ctx, logger, fakeClient, ec, 0, scheme) + require.NoError(t, err) + + pod := &corev1.Pod{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: "test-etcd-0", Namespace: "default"}, pod)) + assert.Equal(t, "test-etcd-0", pod.Name) + + envMap := envVarsToMap(pod) + assert.Equal(t, string(etcdClusterStateNew), envMap["ETCD_INITIAL_CLUSTER_STATE"]) + assert.Contains(t, envMap["ETCD_INITIAL_CLUSTER"], "test-etcd-0=") + assert.NotContains(t, envMap["ETCD_INITIAL_CLUSTER"], "test-etcd-1=") + assert.Equal(t, etcdDataDir, envMap["ETCD_DATA_DIR"]) + + require.Len(t, pod.OwnerReferences, 1) + assert.Equal(t, ec.Name, pod.OwnerReferences[0].Name) + }) + + t.Run("creates pod-2 with state=existing and full initial cluster", func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + err := createMemberPod(ctx, logger, fakeClient, ec, 2, scheme) + require.NoError(t, err) + + pod := &corev1.Pod{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: "test-etcd-2", Namespace: "default"}, pod)) + + envMap := envVarsToMap(pod) + assert.Equal(t, string(etcdClusterStateExisting), envMap["ETCD_INITIAL_CLUSTER_STATE"]) + assert.Contains(t, envMap["ETCD_INITIAL_CLUSTER"], "test-etcd-0=") + assert.Contains(t, envMap["ETCD_INITIAL_CLUSTER"], "test-etcd-1=") + assert.Contains(t, envMap["ETCD_INITIAL_CLUSTER"], "test-etcd-2=") + }) +} + +// envVarsToMap converts a container's env slice into a name→value map. +func envVarsToMap(pod *corev1.Pod) map[string]string { + m := make(map[string]string) + for _, e := range pod.Spec.Containers[0].Env { + m[e.Name] = e.Value } + return m } -func TestWaitForStatefulSetReady(t *testing.T) { - // Create a scheme and register the necessary types +// --------------------------------------------------------------------------- +// waitForPodReady +// --------------------------------------------------------------------------- + +func TestWaitForPodReady(t *testing.T) { scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) + + readyPod := func(name, namespace string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }}, + }, + } + } + notReadyPod := func(name, namespace string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + } + } tests := []struct { - name string - statefulSet *appsv1.StatefulSet - expectedResult bool - expectedError error + name string + pod *corev1.Pod + expectedError error }{ { - name: "StatefulSet is ready", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(3), - }, - Status: appsv1.StatefulSetStatus{ - ReadyReplicas: 3, - }, - }, - expectedResult: true, - expectedError: nil, + name: "Pod is ready", + pod: readyPod("test-pod", "default"), + expectedError: nil, }, { - name: "StatefulSet is not ready", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(3), - }, - Status: appsv1.StatefulSetStatus{ - ReadyReplicas: 2, - }, - }, - expectedResult: false, - expectedError: errors.New("StatefulSet default/test-sts did not become ready: timed out waiting for the condition"), + name: "Pod is not ready", + pod: notReadyPod("test-pod", "default"), + expectedError: errors.New("pod default/test-pod did not become ready"), }, { - name: "StatefulSet does not exist", - statefulSet: nil, - expectedResult: false, - expectedError: errors.New("statefulsets.apps \"test-sts\" not found"), + name: "Pod does not exist", + pod: nil, + expectedError: errors.New("not found"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - var clientBuilder *fake.ClientBuilder - if tt.statefulSet != nil { - clientBuilder = fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.statefulSet) - } else { - clientBuilder = fake.NewClientBuilder().WithScheme(scheme) + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.pod != nil { + builder = builder.WithObjects(tt.pod) } - fakeClient := clientBuilder.Build() + fakeClient := builder.Build() ctx := t.Context() logger := log.FromContext(ctx) - - err := waitForStatefulSetReady(ctx, logger, fakeClient, "test-sts", "default") + err := waitForPodReady(ctx, logger, fakeClient, "test-pod", "default") if tt.expectedError != nil { assert.Error(t, err) assert.Contains(t, err.Error(), tt.expectedError.Error()) @@ -141,31 +221,28 @@ func TestWaitForStatefulSetReady(t *testing.T) { } } +// --------------------------------------------------------------------------- +// createHeadlessServiceIfNotExist +// --------------------------------------------------------------------------- + func TestCreateHeadlessServiceIfNotExist(t *testing.T) { ctx := t.Context() logger := log.FromContext(ctx) - // Create a scheme and register the necessary types scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) - // Create a fake client fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - // Create an EtcdCluster instance ec := &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-etcd", - Namespace: "default", - }, + ObjectMeta: metav1.ObjectMeta{Name: "test-etcd", Namespace: "default"}, } t.Run("creates headless service if it does not exist", func(t *testing.T) { err := createHeadlessServiceIfNotExist(ctx, logger, fakeClient, ec, scheme) assert.NoError(t, err) - // Verify that the service was created service := &corev1.Service{} err = fakeClient.Get(ctx, client.ObjectKey{Name: "test-etcd", Namespace: "default"}, service) assert.NoError(t, err) @@ -174,347 +251,186 @@ func TestCreateHeadlessServiceIfNotExist(t *testing.T) { "app": "test-etcd", "controller": "test-etcd", }, service.Spec.Selector) - // Verify service is controlled by EtcdCluster require.Len(t, service.OwnerReferences, 1) - require.Equal(t, service.OwnerReferences[0].Name, ec.Name) + assert.Equal(t, ec.Name, service.OwnerReferences[0].Name) }) t.Run("does not create service if it already exists", func(t *testing.T) { - // Service was already created in previous test. Call the function again to ensure no error err := createHeadlessServiceIfNotExist(ctx, logger, fakeClient, ec, scheme) assert.NoError(t, err) }) } -func TestClientEndpointForOrdinalIndex(t *testing.T) { - sts := &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - } +// --------------------------------------------------------------------------- +// clientEndpointForOrdinal / clientEndpointsFromPods +// --------------------------------------------------------------------------- +func TestClientEndpointForOrdinal(t *testing.T) { tests := []struct { - index int - expectedResult string + ordinal int + expected string }{ - {index: 0, expectedResult: "http://test-sts-0.test-sts.default.svc.cluster.local:2379"}, - {index: 1, expectedResult: "http://test-sts-1.test-sts.default.svc.cluster.local:2379"}, - {index: 2, expectedResult: "http://test-sts-2.test-sts.default.svc.cluster.local:2379"}, + {0, "http://test-cluster-0.test-cluster.default.svc.cluster.local:2379"}, + {1, "http://test-cluster-1.test-cluster.default.svc.cluster.local:2379"}, + {2, "http://test-cluster-2.test-cluster.default.svc.cluster.local:2379"}, } for _, tt := range tests { - t.Run(fmt.Sprintf("index %d", tt.index), func(t *testing.T) { - result := clientEndpointForOrdinalIndex(sts, tt.index) - assert.Equal(t, tt.expectedResult, result) + t.Run(fmt.Sprintf("ordinal %d", tt.ordinal), func(t *testing.T) { + result := clientEndpointForOrdinal("test-cluster", "default", tt.ordinal) + assert.Equal(t, tt.expected, result) }) } } -func TestIsLearnerReady(t *testing.T) { - tests := []struct { - name string - leaderStatus *clientv3.StatusResponse - learnerStatus *clientv3.StatusResponse - expectedResult bool - }{ - { - name: "Learner is ready", - leaderStatus: &clientv3.StatusResponse{ - Header: &etcdserverpb.ResponseHeader{Revision: 100}, +func TestClientEndpointsFromPods(t *testing.T) { + makePod := func(clusterName, namespace string, ordinal int) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%d", clusterName, ordinal), + Namespace: namespace, }, - learnerStatus: &clientv3.StatusResponse{ - Header: &etcdserverpb.ResponseHeader{Revision: 95}, - }, - expectedResult: true, - }, - { - name: "Learner is not ready", - leaderStatus: &clientv3.StatusResponse{ - Header: &etcdserverpb.ResponseHeader{Revision: 100}, - }, - learnerStatus: &clientv3.StatusResponse{ - Header: &etcdserverpb.ResponseHeader{Revision: 80}, - }, - expectedResult: false, - }, + } } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := etcdutils.IsLearnerReady(tt.leaderStatus, tt.learnerStatus) - assert.Equal(t, tt.expectedResult, result) - }) - } -} - -func TestCheckStatefulSetControlledByEtcdOperator(t *testing.T) { tests := []struct { - name string - ec *ecv1alpha1.EtcdCluster - sts *appsv1.StatefulSet - expectedError error + name string + pods []*corev1.Pod + expected []string }{ { - name: "StatefulSet controlled by EtcdCluster", - ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd-cluster", - Namespace: "default", - UID: "1234", - }, + name: "3 pods", + pods: []*corev1.Pod{ + makePod("test-sts", "default", 0), + makePod("test-sts", "default", 1), + makePod("test-sts", "default", 2), }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "ecv1alpha1/v1alpha1", - Kind: "EtcdCluster", - Name: "etcd-cluster", - UID: "1234", - Controller: pointerToBool(true), - }, - }, - }, + expected: []string{ + "http://test-sts-0.test-sts.default.svc.cluster.local:2379", + "http://test-sts-1.test-sts.default.svc.cluster.local:2379", + "http://test-sts-2.test-sts.default.svc.cluster.local:2379", }, - expectedError: nil, }, { - name: "StatefulSet not controlled by EtcdCluster", - ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd-cluster", - Namespace: "default", - UID: "1234", - }, - }, - sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd-sts", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "ecv1alpha1/v1alpha1", - Kind: "EtcdCluster", - Name: "other-etcd-cluster", - UID: "5678", - Controller: pointerToBool(true), - }, - }, - }, - }, - expectedError: fmt.Errorf("StatefulSet default/etcd-sts is not controlled by EtcdCluster default/etcd-cluster"), + name: "no pods", + pods: nil, + expected: []string(nil), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := checkStatefulSetControlledByEtcdOperator(tt.ec, tt.sts) - - if (err != nil) != (tt.expectedError != nil) { - t.Errorf("expected error: %v, got: %v", tt.expectedError, err) - return - } - - if err != nil && err.Error() != tt.expectedError.Error() { - t.Errorf("unexpected error: got %v, want %v", err, tt.expectedError) - } + result := clientEndpointsFromPods("test-sts", "default", tt.pods) + assert.Equal(t, tt.expected, result) }) } } -func pointerToBool(value bool) *bool { - return &value -} +// --------------------------------------------------------------------------- +// areAllMembersHealthy +// --------------------------------------------------------------------------- -func TestClientEndpointsFromStatefulsets(t *testing.T) { +func TestAreAllMembersHealthy(t *testing.T) { tests := []struct { - name string - statefulSet *appsv1.StatefulSet - expectedResult []string + name string + health []etcdutils.EpHealth + expected bool }{ { - name: "StatefulSet with 3 replicas", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(3), - }, - }, - expectedResult: []string{ - "http://test-sts-0.test-sts.default.svc.cluster.local:2379", - "http://test-sts-1.test-sts.default.svc.cluster.local:2379", - "http://test-sts-2.test-sts.default.svc.cluster.local:2379", - }, + name: "empty slice — no unhealthy members", + health: nil, + expected: true, }, { - name: "StatefulSet with 1 replica", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(1), - }, - }, - expectedResult: []string{ - "http://test-sts-0.test-sts.default.svc.cluster.local:2379", + name: "all healthy", + health: []etcdutils.EpHealth{ + {Health: true}, + {Health: true}, + {Health: true}, }, + expected: true, }, { - name: "StatefulSet with 0 replicas", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(0), - }, + name: "one unhealthy", + health: []etcdutils.EpHealth{ + {Health: true}, + {Health: false}, + {Health: true}, }, - expectedResult: []string(nil), + expected: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := clientEndpointsFromStatefulsets(tt.statefulSet) - assert.Equal(t, tt.expectedResult, result) + assert.Equal(t, tt.expected, areAllMembersHealthy(tt.health)) }) } } -func TestAreAllMembersHealthy(t *testing.T) { +// --------------------------------------------------------------------------- +// IsLearnerReady (delegates to etcdutils) +// --------------------------------------------------------------------------- + +func TestIsLearnerReady(t *testing.T) { tests := []struct { name string - statefulSet *appsv1.StatefulSet - healthInfos []etcdutils.EpHealth + leaderStatus *clientv3.StatusResponse + learnerStatus *clientv3.StatusResponse expectedResult bool - expectedError error }{ - // TODO: Add test cases for healthy members and non healthy members { - name: "Error during health check", - statefulSet: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sts", - Namespace: "default", - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: pointerToInt32(3), - }, - Status: appsv1.StatefulSetStatus{ - ReadyReplicas: 3, - }, + name: "Learner is ready", + leaderStatus: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{Revision: 100}, + }, + learnerStatus: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{Revision: 95}, + }, + expectedResult: true, + }, + { + name: "Learner is not ready", + leaderStatus: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{Revision: 100}, + }, + learnerStatus: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{Revision: 80}, }, - healthInfos: nil, expectedResult: false, - expectedError: errors.New("context deadline exceeded"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - logger := logr.Discard() // Use a no-op logger for testing - - result, err := areAllMembersHealthy(tt.statefulSet, logger) + result := etcdutils.IsLearnerReady(tt.leaderStatus, tt.learnerStatus) assert.Equal(t, tt.expectedResult, result) - if tt.expectedError != nil { - assert.Error(t, err) - assert.Contains(t, err.Error(), tt.expectedError.Error()) - } else { - assert.NoError(t, err) - } }) } } -func TestApplyEtcdClusterState(t *testing.T) { - ctx := t.Context() - logger := log.FromContext(ctx) - - // Create a scheme and register the necessary types - scheme := runtime.NewScheme() - _ = corev1.AddToScheme(scheme) - _ = ecv1alpha1.AddToScheme(scheme) +// --------------------------------------------------------------------------- +// createMemberPod — pod annotations and labels +// --------------------------------------------------------------------------- - // Create a fake client - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - - // Create an EtcdCluster instance - ec := &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-etcd", - Namespace: "default", - }, - } - - t.Run("creates configmap if it does not exist", func(t *testing.T) { - err := applyEtcdClusterState(ctx, ec, 3, fakeClient, scheme, logger) - assert.NoError(t, err) - // Verify that the configmap was created - configMap := &corev1.ConfigMap{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: "default"}, configMap) - t.Cleanup(func() { - err = fakeClient.Delete(ctx, configMap) // Delete the configmap to avoid conflicts in future tests - assert.NoError(t, err) - }) - assert.NoError(t, err) - assert.Equal(t, "existing", configMap.Data["ETCD_INITIAL_CLUSTER_STATE"]) - assert.Contains(t, configMap.Data["ETCD_INITIAL_CLUSTER"], "test-etcd-0=http://test-etcd-0.test-etcd.default.svc.cluster.local:2380") - // Verify configmap is controlled by EtcdCluster - require.Len(t, configMap.OwnerReferences, 1) - require.Equal(t, configMap.OwnerReferences[0].Name, ec.Name) - }) - - t.Run("updates configmap if it already exists", func(t *testing.T) { - // Create the configmap first - configMap := newEtcdClusterState(ec, 3) - err := fakeClient.Create(ctx, configMap) - assert.NoError(t, err) - - // Call the function again to ensure it updates the configmap - err = applyEtcdClusterState(ctx, ec, 3, fakeClient, scheme, logger) - assert.NoError(t, err) - - // Verify that the configmap was updated - updatedConfigMap := &corev1.ConfigMap{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: "default"}, updatedConfigMap) - assert.NoError(t, err) - assert.Equal(t, "existing", updatedConfigMap.Data["ETCD_INITIAL_CLUSTER_STATE"]) - assert.Contains(t, updatedConfigMap.Data["ETCD_INITIAL_CLUSTER"], "test-etcd-0=http://test-etcd-0.test-etcd.default.svc.cluster.local:2380") - // Verify configmap is controlled by EtcdCluster - require.Len(t, updatedConfigMap.OwnerReferences, 1) - require.Equal(t, updatedConfigMap.OwnerReferences[0].Name, ec.Name) - }) -} - -func TestCreateOrPatchStatefulSetWithPodAnnotations(t *testing.T) { +func TestCreateMemberPodWithAnnotations(t *testing.T) { ctx := t.Context() logger := log.FromContext(ctx) - // Create a scheme and register the necessary types scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) tests := []struct { name string - etcdClusterName string + clusterName string podTemplate *ecv1alpha1.PodTemplate expectedAnnotations map[string]string expectNil bool }{ { - name: "creates statefulset with pod annotations", - etcdClusterName: "test-etcd", + name: "creates pod with custom annotations", + clusterName: "test-etcd", podTemplate: &ecv1alpha1.PodTemplate{ Metadata: &ecv1alpha1.PodMetadata{ Annotations: map[string]string{ @@ -530,82 +446,69 @@ func TestCreateOrPatchStatefulSetWithPodAnnotations(t *testing.T) { expectNil: false, }, { - name: "creates statefulset without pod annotations when PodTemplate is nil", - etcdClusterName: "test-etcd-no-podtemplate", - podTemplate: nil, - expectedAnnotations: nil, - expectNil: true, + name: "creates pod without annotations when PodTemplate is nil", + clusterName: "test-etcd-no-podtemplate", + podTemplate: nil, + expectNil: true, }, { - name: "creates statefulset without pod annotations when annotations are empty", - etcdClusterName: "test-etcd-empty-annotations", + name: "creates pod without annotations when annotations map is empty", + clusterName: "test-etcd-empty-annotations", podTemplate: &ecv1alpha1.PodTemplate{ Metadata: &ecv1alpha1.PodMetadata{ Annotations: map[string]string{}, }, }, - expectedAnnotations: nil, - expectNil: true, + expectNil: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Create a fake client for each test case to avoid interference - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - - // Create an EtcdCluster instance ec := &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.etcdClusterName, - Namespace: "default", - }, + ObjectMeta: metav1.ObjectMeta{Name: tt.clusterName, Namespace: "default", UID: "1"}, Spec: ecv1alpha1.EtcdClusterSpec{ Size: 3, Version: "3.5.17", PodTemplate: tt.podTemplate, }, } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + + err := createMemberPod(ctx, logger, fakeClient, ec, 0, scheme) + require.NoError(t, err) - err := createOrPatchStatefulSet(ctx, logger, ec, fakeClient, 3, scheme) - assert.NoError(t, err) + pod := &corev1.Pod{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: tt.clusterName + "-0", Namespace: "default"}, pod)) - // Verify that the StatefulSet was created - sts := &appsv1.StatefulSet{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: tt.etcdClusterName, Namespace: "default"}, sts) - assert.NoError(t, err) - // Check annotations if tt.expectNil { - assert.Nil(t, sts.Spec.Template.Annotations) + assert.Nil(t, pod.Annotations) } else { - assert.Equal(t, tt.expectedAnnotations, sts.Spec.Template.Annotations) + assert.Equal(t, tt.expectedAnnotations, pod.Annotations) } - // Verify statefulset is controlled by EtcdCluster - require.Len(t, sts.OwnerReferences, 1) - require.Equal(t, sts.OwnerReferences[0].Name, ec.Name) + require.Len(t, pod.OwnerReferences, 1) + assert.Equal(t, ec.Name, pod.OwnerReferences[0].Name) }) } } -func TestCreateOrPatchStatefulSetWithPodLabels(t *testing.T) { +func TestCreateMemberPodWithLabels(t *testing.T) { ctx := t.Context() logger := log.FromContext(ctx) - // Create a scheme and register the necessary types scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) _ = ecv1alpha1.AddToScheme(scheme) - _ = appsv1.AddToScheme(scheme) tests := []struct { - name string - etcdClusterName string - podTemplate *ecv1alpha1.PodTemplate - expectedLabels map[string]string + name string + clusterName string + podTemplate *ecv1alpha1.PodTemplate + expectedLabels map[string]string }{ { - name: "creates statefulset with pod labels merged with default labels", - etcdClusterName: "test-etcd", + name: "custom labels merged with default labels", + clusterName: "test-etcd", podTemplate: &ecv1alpha1.PodTemplate{ Metadata: &ecv1alpha1.PodMetadata{ Labels: map[string]string{ @@ -616,31 +519,27 @@ func TestCreateOrPatchStatefulSetWithPodLabels(t *testing.T) { }, }, expectedLabels: map[string]string{ - // Default labels that should always be present - "app": "test-etcd", - "controller": "test-etcd", - // Custom labels from PodTemplate + "app": "test-etcd", + "controller": "test-etcd", "environment": "production", "version": "v1.0.0", "team": "platform", }, }, { - name: "creates statefulset with default labels when PodTemplate is nil", - etcdClusterName: "test-etcd-no-podtemplate", - podTemplate: nil, + name: "only default labels when PodTemplate is nil", + clusterName: "test-etcd-no-podtemplate", + podTemplate: nil, expectedLabels: map[string]string{ "app": "test-etcd-no-podtemplate", "controller": "test-etcd-no-podtemplate", }, }, { - name: "creates statefulset with default labels when labels are empty", - etcdClusterName: "test-etcd-empty-labels", + name: "only default labels when labels map is empty", + clusterName: "test-etcd-empty-labels", podTemplate: &ecv1alpha1.PodTemplate{ - Metadata: &ecv1alpha1.PodMetadata{ - Labels: map[string]string{}, - }, + Metadata: &ecv1alpha1.PodMetadata{Labels: map[string]string{}}, }, expectedLabels: map[string]string{ "app": "test-etcd-empty-labels", @@ -648,20 +547,20 @@ func TestCreateOrPatchStatefulSetWithPodLabels(t *testing.T) { }, }, { - name: "default labels override custom labels when same key exists", - etcdClusterName: "test-etcd-override", + name: "default labels override conflicting custom labels", + clusterName: "test-etcd-override", podTemplate: &ecv1alpha1.PodTemplate{ Metadata: &ecv1alpha1.PodMetadata{ Labels: map[string]string{ - "app": "custom-app-name", // Override default app label - "controller": "custom-controller", // Override default controller label + "app": "custom-app", + "controller": "custom-controller", "environment": "staging", }, }, }, expectedLabels: map[string]string{ - "app": "test-etcd-override", // Default labels are applied last, so they override custom ones - "controller": "test-etcd-override", // Default labels are applied last, so they override custom ones + "app": "test-etcd-override", + "controller": "test-etcd-override", "environment": "staging", }, }, @@ -669,38 +568,32 @@ func TestCreateOrPatchStatefulSetWithPodLabels(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Create a fake client for each test case to avoid interference - fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() - - // Create an EtcdCluster instance ec := &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.etcdClusterName, - Namespace: "default", - }, + ObjectMeta: metav1.ObjectMeta{Name: tt.clusterName, Namespace: "default", UID: "1"}, Spec: ecv1alpha1.EtcdClusterSpec{ Size: 3, Version: "3.5.17", PodTemplate: tt.podTemplate, }, } + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() - err := createOrPatchStatefulSet(ctx, logger, ec, fakeClient, 3, scheme) - assert.NoError(t, err) - - // Verify that the StatefulSet was created with correct labels - sts := &appsv1.StatefulSet{} - err = fakeClient.Get(ctx, client.ObjectKey{Name: tt.etcdClusterName, Namespace: "default"}, sts) - assert.NoError(t, err) - // Check that pod template has the expected labels - assert.Equal(t, tt.expectedLabels, sts.Spec.Template.Labels) - // Verify statefulset is controlled by EtcdCluster - require.Len(t, sts.OwnerReferences, 1) - require.Equal(t, sts.OwnerReferences[0].Name, ec.Name) + err := createMemberPod(ctx, logger, fakeClient, ec, 0, scheme) + require.NoError(t, err) + + pod := &corev1.Pod{} + require.NoError(t, fakeClient.Get(ctx, client.ObjectKey{Name: tt.clusterName + "-0", Namespace: "default"}, pod)) + assert.Equal(t, tt.expectedLabels, pod.Labels) + require.Len(t, pod.OwnerReferences, 1) + assert.Equal(t, ec.Name, pod.OwnerReferences[0].Name) }) } } +// --------------------------------------------------------------------------- +// createArgs +// --------------------------------------------------------------------------- + func TestCreatingArgs(t *testing.T) { tests := []struct { testName string @@ -786,15 +679,19 @@ func TestCreatingArgs(t *testing.T) { }, }, } + for _, tt := range tests { t.Run(tt.testName, func(t *testing.T) { result := createArgs(tt.clusterName, tt.etcdOptions) assert.Equal(t, tt.expectedResult, result) }) } - } +// --------------------------------------------------------------------------- +// validateEtcdUpgradePath +// --------------------------------------------------------------------------- + func TestValidateEtcdUpgradePath(t *testing.T) { etcdVersions := []semver.Version{ {Major: 3, Minor: 0}, @@ -815,90 +712,27 @@ func TestValidateEtcdUpgradePath(t *testing.T) { canParse bool expectErr bool }{ - { - name: "equal versions", - current: "3.2.0", - target: "3.2.0", - canParse: true, - expectErr: false, - }, - { - name: "valid minor level upgrade", - current: "3.4.0", - target: "3.5.0", - canParse: true, - expectErr: false, - }, - { - name: "valid patch level upgrade", - current: "3.4.0", - target: "3.4.1", - canParse: true, - expectErr: false, - }, - { - name: "invalid current version", - current: "invalid", - target: "3.1.0", - canParse: false, - expectErr: true, - }, - { - name: "invalid target version", - current: "3.1.0", - target: "invalid", - canParse: false, - expectErr: true, - }, - { - name: "minor downgrade not allowed", - current: "3.2.0", - target: "3.1.0", - canParse: true, - expectErr: true, - }, - { - name: "patch downgrade not allowed", - current: "3.5.1", - target: "3.5.0", - canParse: true, - expectErr: true, - }, - { - name: "unknown current version", - current: "3.9.0", - target: "4.0.0", - canParse: true, - expectErr: true, - }, - { - name: "unknown target version", - current: "4.0.0", - target: "4.1.0", - canParse: true, - expectErr: true, - }, - { - name: "invalid upgrade skipping minor", - current: "3.4.0", - target: "3.6.0", - canParse: true, - expectErr: true, - }, + {name: "equal versions", current: "3.2.0", target: "3.2.0", canParse: true, expectErr: false}, + {name: "valid minor level upgrade", current: "3.4.0", target: "3.5.0", canParse: true, expectErr: false}, + {name: "valid patch level upgrade", current: "3.4.0", target: "3.4.1", canParse: true, expectErr: false}, + {name: "invalid current version", current: "invalid", target: "3.1.0", canParse: false, expectErr: true}, + {name: "invalid target version", current: "3.1.0", target: "invalid", canParse: false, expectErr: true}, + {name: "minor downgrade not allowed", current: "3.2.0", target: "3.1.0", canParse: true, expectErr: true}, + {name: "patch downgrade not allowed", current: "3.5.1", target: "3.5.0", canParse: true, expectErr: true}, + {name: "unknown current version", current: "3.9.0", target: "4.0.0", canParse: true, expectErr: true}, + {name: "unknown target version", current: "4.0.0", target: "4.1.0", canParse: true, expectErr: true}, + {name: "invalid upgrade skipping minor", current: "3.4.0", target: "3.6.0", canParse: true, expectErr: true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { canParse, err := validateEtcdUpgradePath(etcdVersions, tt.current, tt.target) - if canParse != tt.canParse { t.Fatalf("expected canParse=%v, got %v", tt.canParse, canParse) } - if tt.expectErr && err == nil { t.Fatalf("expected error, got nil") } - if !tt.expectErr && err != nil { t.Fatalf("did not expect error, got %v", err) } @@ -906,6 +740,10 @@ func TestValidateEtcdUpgradePath(t *testing.T) { } } +// --------------------------------------------------------------------------- +// Certificate config helpers +// --------------------------------------------------------------------------- + func TestCreateAutoCertificateConfig(t *testing.T) { tests := []struct { name string @@ -916,10 +754,7 @@ func TestCreateAutoCertificateConfig(t *testing.T) { { name: "auto config with all fields set", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-namespace", - }, + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}, Spec: ecv1alpha1.EtcdClusterSpec{ TLS: &ecv1alpha1.TLSCertificate{ Provider: string(certificate.Auto), @@ -928,7 +763,7 @@ func TestCreateAutoCertificateConfig(t *testing.T) { CommonConfig: ecv1alpha1.CommonConfig{ CommonName: "custom.example.com", Organization: []string{"Test Org"}, - ValidityDuration: "720h", // 30 days + ValidityDuration: "720h", AltNames: ecv1alpha1.AltNames{ DNSNames: []string{"custom1.example.com", "custom2.example.com"}, }, @@ -941,7 +776,7 @@ func TestCreateAutoCertificateConfig(t *testing.T) { expected: &certInterface.Config{ CommonName: "custom.example.com", Organization: []string{"Test Org"}, - ValidityDuration: 720 * time.Hour, // 30 days + ValidityDuration: 720 * time.Hour, AltNames: certInterface.AltNames{ DNSNames: []string{"custom1.example.com", "custom2.example.com"}, IPs: make([]net.IP, 2), @@ -950,18 +785,13 @@ func TestCreateAutoCertificateConfig(t *testing.T) { wantErr: false, }, { - name: "auto config with nil AutoCfg - should use defaults", + name: "auto config with nil AutoCfg — uses defaults", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-namespace", - }, + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}, Spec: ecv1alpha1.EtcdClusterSpec{ TLS: &ecv1alpha1.TLSCertificate{ - Provider: string(certificate.Auto), - ProviderCfg: ecv1alpha1.ProviderConfig{ - AutoCfg: nil, - }, + Provider: string(certificate.Auto), + ProviderCfg: ecv1alpha1.ProviderConfig{AutoCfg: nil}, }, }, }, @@ -983,7 +813,6 @@ func TestCreateAutoCertificateConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result, err := createAutoCertificateConfig(tt.ec) - if tt.wantErr { require.Error(t, err) assert.Nil(t, result) @@ -1010,10 +839,7 @@ func TestCreateCMCertificateConfig(t *testing.T) { { name: "cert-manager config with all fields set", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-namespace", - }, + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}, Spec: ecv1alpha1.EtcdClusterSpec{ TLS: &ecv1alpha1.TLSCertificate{ Provider: string(certificate.CertManager), @@ -1022,7 +848,7 @@ func TestCreateCMCertificateConfig(t *testing.T) { CommonConfig: ecv1alpha1.CommonConfig{ CommonName: "cm.example.com", Organization: []string{"CM Org"}, - ValidityDuration: "1440h", // 60 days + ValidityDuration: "1440h", AltNames: ecv1alpha1.AltNames{ DNSNames: []string{"cm1.example.com", "cm2.example.com"}, }, @@ -1037,7 +863,7 @@ func TestCreateCMCertificateConfig(t *testing.T) { expected: &certInterface.Config{ CommonName: "cm.example.com", Organization: []string{"CM Org"}, - ValidityDuration: 1440 * time.Hour, // 60 days + ValidityDuration: 1440 * time.Hour, AltNames: certInterface.AltNames{ DNSNames: []string{"cm1.example.com", "cm2.example.com"}, IPs: make([]net.IP, 2), @@ -1052,16 +878,11 @@ func TestCreateCMCertificateConfig(t *testing.T) { { name: "cert-manager config with nil CertManagerCfg", ec: &ecv1alpha1.EtcdCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-namespace", - }, + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}, Spec: ecv1alpha1.EtcdClusterSpec{ TLS: &ecv1alpha1.TLSCertificate{ - Provider: string(certificate.CertManager), - ProviderCfg: ecv1alpha1.ProviderConfig{ - CertManagerCfg: nil, - }, + Provider: string(certificate.CertManager), + ProviderCfg: ecv1alpha1.ProviderConfig{CertManagerCfg: nil}, }, }, }, @@ -1073,7 +894,6 @@ func TestCreateCMCertificateConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result, err := createCMCertificateConfig(tt.ec) - if tt.wantErr { require.Error(t, err) assert.Nil(t, result)