diff --git a/controllers/add_process_groups.go b/controllers/add_process_groups.go index aa8f60f08..9feeffaf7 100644 --- a/controllers/add_process_groups.go +++ b/controllers/add_process_groups.go @@ -35,7 +35,7 @@ type addProcessGroups struct{} // reconcile runs the reconciler's work. func (a addProcessGroups) reconcile( - ctx context.Context, + _ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, @@ -64,7 +64,6 @@ func (a addProcessGroups) reconcile( logger.Error(err, "Error getting exclusion list") } - hasNewProcessGroups := false for _, processClass := range fdbv1beta2.ProcessClasses { desiredCount := desiredCounts[processClass] if desiredCount < 0 { @@ -80,7 +79,6 @@ func (a addProcessGroups) reconcile( processGroupIDs[processClass] = map[int]bool{} } - hasNewProcessGroups = true logger.Info( "Adding new Process Groups", "processClass", @@ -120,13 +118,6 @@ func (a addProcessGroups) reconcile( } } - if hasNewProcessGroups { - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } - } - if getLocalitiesErr != nil { return &requeue{curError: getLocalitiesErr, delayedRequeue: true} } diff --git a/controllers/add_process_groups_test.go b/controllers/add_process_groups_test.go index e6b1285c4..41c312c5a 100644 --- a/controllers/add_process_groups_test.go +++ b/controllers/add_process_groups_test.go @@ -71,8 +71,6 @@ var _ = Describe("add_process_groups", func() { Expect(requeue.curError).NotTo(HaveOccurred()) } - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) newProcessCounts = fdbv1beta2.CreateProcessCountsFromProcessGroupStatus( cluster.Status.ProcessGroups, true, diff --git a/controllers/backup_controller.go b/controllers/backup_controller.go index 3a7afb869..d33e78172 100644 --- a/controllers/backup_controller.go +++ b/controllers/backup_controller.go @@ -111,7 +111,7 @@ func (r *FoundationDBBackupReconciler) Reconcile( continue } - return processRequeue(req, subReconciler, backup, r.Recorder, backupLog) + return processResult(processRequeue(req, subReconciler, backup, r.Recorder, backupLog)) } if backup.Status.Generations.Reconciled < originalGeneration { diff --git a/controllers/change_coordinators.go b/controllers/change_coordinators.go index 51aa50333..69031e206 100644 --- a/controllers/change_coordinators.go +++ b/controllers/change_coordinators.go @@ -39,7 +39,7 @@ type changeCoordinators struct{} // reconcile runs the reconciler's work. func (c changeCoordinators) reconcile( - ctx context.Context, + _ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, @@ -130,10 +130,5 @@ func (c changeCoordinators) reconcile( // Reset the SecondsSinceLastRecovered sine the operator just changed the coordinators, which will cause a recovery. status.Cluster.RecoveryState.SecondsSinceLastRecovered = 0.0 - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err, delayedRequeue: true} - } - return nil } diff --git a/controllers/choose_removals.go b/controllers/choose_removals.go index c2d1b0c83..a25eb863f 100644 --- a/controllers/choose_removals.go +++ b/controllers/choose_removals.go @@ -38,7 +38,7 @@ type chooseRemovals struct{} // reconcile runs the reconciler's work. func (c chooseRemovals) reconcile( - ctx context.Context, + _ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, @@ -151,10 +151,6 @@ func (c chooseRemovals) reconcile( processGroup.MarkForRemoval() } } - err := r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err, delayedRequeue: true} - } } return nil diff --git a/controllers/choose_removals_test.go b/controllers/choose_removals_test.go index c7011bb9b..84b31a80d 100644 --- a/controllers/choose_removals_test.go +++ b/controllers/choose_removals_test.go @@ -35,7 +35,6 @@ import ( var _ = Describe("choose_removals", func() { var cluster *fdbv1beta2.FoundationDBCluster var adminClient *mock.AdminClient - var err error var requeue *requeue var removals []fdbv1beta2.ProcessGroupID @@ -63,9 +62,6 @@ var _ = Describe("choose_removals", func() { nil, globalControllerLogger, ) - Expect(err).NotTo(HaveOccurred()) - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) removals = nil for _, processGroup := range cluster.Status.ProcessGroups { diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index 83f7023c9..46d6016c4 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -30,6 +30,7 @@ import ( "syscall" "time" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/utils/ptr" "github.com/FoundationDB/fdb-kubernetes-operator/v2/pkg/fdbadminclient" @@ -180,15 +181,23 @@ func (r *FoundationDBClusterReconciler) Reconcile( ctx context.Context, request ctrl.Request, ) (ctrl.Result, error) { + return processResult(r.doReconcile(ctx, request)) +} + +// doReconcile will run the reconcile logic for the FoundationDBClusterReconciler. +func (r *FoundationDBClusterReconciler) doReconcile( + ctx context.Context, + request ctrl.Request, +) (*ctrl.Result, error) { cluster := &fdbv1beta2.FoundationDBCluster{} err := r.Get(ctx, request.NamespacedName, cluster) if err != nil { if k8serrors.IsNotFound(err) { - return ctrl.Result{}, nil + return nil, nil } // Error reading the object - requeue the request. - return ctrl.Result{}, err + return nil, err } clusterLog := globalControllerLogger.WithValues( @@ -202,9 +211,28 @@ func (r *FoundationDBClusterReconciler) Reconcile( cacheStatus := cluster.CacheDatabaseStatusForReconciliation( r.CacheDatabaseStatusForReconciliationDefault, ) - // Printout the duration of the reconciliation, independent if the reconciliation was successful or had an error. + + originalStatus := cluster.Status.DeepCopy() startTime := time.Now() + result := &ctrl.Result{} defer func() { + // If the cluster.Status has changed compared to the original Status, we have to update the status. + // See: https://github.com/kubernetes-sigs/kubebuilder/issues/592 + // If we use the default reflect.DeepEqual method it will be recreating the clusterStatus multiple times + // because the pointers are different. + if !equality.Semantic.DeepEqual(cluster.Status, *originalStatus) { + clusterLog.Info("cluster status was changed, will be updating the cluster status") + err = r.updateOrApply(ctx, cluster) + if err != nil { + clusterLog.Error(err, "Error updating cluster clusterStatus") + // If no requeue is planned, we should target a requeue to ensure that the status will be updated. + if result.IsZero() || result.RequeueAfter == 0 { + result.RequeueAfter = 2 * time.Second + } + } + } + + // Printout the duration of the reconciliation, independent if the reconciliation was successful or had an error. clusterLog.Info( "Reconciliation run finished", "duration_seconds", @@ -217,33 +245,33 @@ func (r *FoundationDBClusterReconciler) Reconcile( if cluster.Spec.Skip { clusterLog.Info("Skipping cluster with skip value true", "skip", cluster.Spec.Skip) // Don't requeue - return ctrl.Result{}, nil + return nil, nil } err = internal.NormalizeClusterSpec(cluster, r.DeprecationOptions) if err != nil { - return ctrl.Result{}, err + return nil, nil } err = cluster.Validate() if err != nil { r.Recorder.Event(cluster, corev1.EventTypeWarning, "ClusterSpec not valid", err.Error()) - return ctrl.Result{}, fmt.Errorf("ClusterSpec is not valid: %w", err) + return nil, fmt.Errorf("ClusterSpec is not valid: %w", err) } adminClient, err := r.getAdminClient(clusterLog, cluster) if err != nil { - return ctrl.Result{}, err + return nil, err } defer func() { _ = adminClient.Close() }() supportedVersion, err := adminClient.VersionSupported(cluster.Spec.Version) if err != nil { - return ctrl.Result{}, err + return nil, err } if !supportedVersion { - return ctrl.Result{}, fmt.Errorf("version %s is not supported", cluster.Spec.Version) + return nil, fmt.Errorf("version %s is not supported", cluster.Spec.Version) } // When using DNS entries in the cluster file, we want to make sure to create pods if required before doing any @@ -321,13 +349,13 @@ func (r *FoundationDBClusterReconciler) Reconcile( process, err := os.FindProcess(os.Getpid()) if err != nil { fmt.Printf("Error finding process: %v\n", err) - return ctrl.Result{RequeueAfter: 5 * time.Second}, err + return &ctrl.Result{RequeueAfter: 5 * time.Second}, err } err = process.Signal(syscall.SIGTERM) if err != nil { fmt.Printf("Error sending signal: %v\n", err) - return ctrl.Result{RequeueAfter: 5 * time.Second}, err + return &ctrl.Result{RequeueAfter: 5 * time.Second}, err } } } @@ -384,7 +412,7 @@ func (r *FoundationDBClusterReconciler) Reconcile( delayedRequeueDuration = 2 * time.Second } - return ctrl.Result{RequeueAfter: delayedRequeueDuration}, nil + return &ctrl.Result{RequeueAfter: delayedRequeueDuration}, nil } clusterLog.Info("Reconciliation complete", "generation", cluster.Status.Generations.Reconciled) @@ -395,7 +423,7 @@ func (r *FoundationDBClusterReconciler) Reconcile( fmt.Sprintf("Reconciled generation %d", cluster.Status.Generations.Reconciled), ) - return ctrl.Result{}, nil + return result, nil } // runClusterSubReconciler will start the subReconciler and will log the duration of the subReconciler. diff --git a/controllers/controllers.go b/controllers/controllers.go index a6ac2b79b..dfb834902 100644 --- a/controllers/controllers.go +++ b/controllers/controllers.go @@ -64,7 +64,7 @@ func processRequeue( object runtime.Object, recorder record.EventRecorder, logger logr.Logger, -) (ctrl.Result, error) { +) (*ctrl.Result, error) { curLog := logger.WithValues( "reconciler", fmt.Sprintf("%T", subReconciler), @@ -90,9 +90,23 @@ func processRequeue( recorder.Event(object, corev1.EventTypeNormal, "ReconciliationTerminatedEarly", requeue.message) if err != nil { curLog.Error(err, "Error in reconciliation") - return ctrl.Result{}, err + return nil, err } curLog.Info("Reconciliation terminated early", "message", requeue.message) - return ctrl.Result{RequeueAfter: requeue.delay}, nil + return &ctrl.Result{RequeueAfter: requeue.delay}, nil +} + +// processResult will return a ctrl.Result and error based on the input, e.g. if the result is nil, it will ensure +// that an empty ctrl.Result is returned. +func processResult(result *ctrl.Result, err error) (ctrl.Result, error) { + if err != nil { + return ctrl.Result{}, err + } + + if result.IsZero() { + return ctrl.Result{}, nil + } + + return *result, nil } diff --git a/controllers/exclude_processes.go b/controllers/exclude_processes.go index 2dc9e9b67..6feb732b3 100644 --- a/controllers/exclude_processes.go +++ b/controllers/exclude_processes.go @@ -47,7 +47,7 @@ type excludeEntry struct { // reconcile runs the reconciler's work. func (e excludeProcesses) reconcile( - ctx context.Context, + _ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, @@ -336,11 +336,6 @@ func (e excludeProcesses) reconcile( if coordinatorErr != nil { return &requeue{curError: coordinatorErr, delayedRequeue: true} } - - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err, delayedRequeue: true} - } } // If not all processes are excluded, ensure we requeue after 5 minutes. diff --git a/controllers/exclude_processes_test.go b/controllers/exclude_processes_test.go index 173d634a9..ae7d83ebb 100644 --- a/controllers/exclude_processes_test.go +++ b/controllers/exclude_processes_test.go @@ -1730,8 +1730,6 @@ var _ = Describe("exclude_processes", func() { Expect(req).To(BeNil()) Expect(adminClient.ExcludedAddresses).To(HaveLen(1)) - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) Expect(initialConnectionString).NotTo(Equal(cluster.Status.ConnectionString)) }) @@ -1749,8 +1747,6 @@ var _ = Describe("exclude_processes", func() { Expect(req).To(BeNil()) Expect(adminClient.ExcludedAddresses).To(HaveLen(1)) - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) Expect( initialConnectionString, ).NotTo(Equal(cluster.Status.ConnectionString)) @@ -1918,9 +1914,6 @@ var _ = Describe("exclude_processes", func() { Expect(req).To(BeNil()) Expect(adminClient.ExcludedAddresses).To(HaveLen(1)) - - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) Expect(initialConnectionString).NotTo(Equal(cluster.Status.ConnectionString)) }) @@ -1937,9 +1930,6 @@ var _ = Describe("exclude_processes", func() { Expect(req).To(BeNil()) Expect(adminClient.ExcludedAddresses).To(HaveLen(1)) - - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) Expect( initialConnectionString, ).NotTo(Equal(cluster.Status.ConnectionString)) diff --git a/controllers/generate_initial_cluster_file.go b/controllers/generate_initial_cluster_file.go index 31556b191..be7107076 100644 --- a/controllers/generate_initial_cluster_file.go +++ b/controllers/generate_initial_cluster_file.go @@ -206,10 +206,6 @@ func (g generateInitialClusterFile) reconcile( } cluster.Status.ConnectionString = connectionString.String() - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } return nil } diff --git a/controllers/remove_process_groups.go b/controllers/remove_process_groups.go index 93574d5af..2adab940f 100644 --- a/controllers/remove_process_groups.go +++ b/controllers/remove_process_groups.go @@ -81,7 +81,7 @@ func (u removeProcessGroups) reconcile( } coordinators := fdbstatus.GetCoordinatorsFromStatus(status) - allExcluded, newExclusions, processGroupsToRemove := r.getProcessGroupsToRemove( + allExcluded, _, processGroupsToRemove := r.getProcessGroupsToRemove( logger, cluster, remainingMap, @@ -99,14 +99,6 @@ func (u removeProcessGroups) reconcile( return nil } - // Update the cluster to reflect the new exclusions in our status - if newExclusions { - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } - } - // Ensure we only remove process groups that are not blocked to be removed by the buggify config. processGroupsToRemove = buggify.FilterBlockedRemovals(cluster, processGroupsToRemove) // If all of the process groups are filtered out we can stop doing the next steps. @@ -352,7 +344,7 @@ func confirmRemoval( } func includeProcessGroup( - ctx context.Context, + _ context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, status *fdbv1beta2.FoundationDBStatus, @@ -366,11 +358,9 @@ func includeProcessGroup( return err } - // Update here for ready inclusion --> Check here var readyForInclusion map[fdbv1beta2.ProcessGroupID]time.Time readyForInclusionUpdates := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.UpdateAction{} if cluster.GetSynchronizationMode() == fdbv1beta2.SynchronizationModeGlobal { - var err error readyForInclusion, err = adminClient.GetReadyForInclusion(cluster.Spec.ProcessGroupIDPrefix) if err != nil { return err @@ -398,7 +388,6 @@ func includeProcessGroup( // We can update the process groups at this stage, as no other processes must be included. if len(cluster.Status.ProcessGroups) != len(newProcessGroups) && len(newProcessGroups) > 0 { cluster.Status.ProcessGroups = newProcessGroups - return r.updateOrApply(ctx, cluster) } return nil @@ -467,7 +456,7 @@ func includeProcessGroup( // Update the process group list and remove all removed and included process groups. cluster.Status.ProcessGroups = newProcessGroups - return r.updateOrApply(ctx, cluster) + return nil } // filterAddressesToInclude will remove all addresses that are part of the fdbProcessesToInclude slice but are not excluded in FDB itself. diff --git a/controllers/replace_failed_process_groups.go b/controllers/replace_failed_process_groups.go index a28a7ec74..42d36ec4c 100644 --- a/controllers/replace_failed_process_groups.go +++ b/controllers/replace_failed_process_groups.go @@ -37,7 +37,7 @@ type replaceFailedProcessGroups struct{} // return non-nil requeue if a process has been replaced func (c replaceFailedProcessGroups) reconcile( - ctx context.Context, + _ context.Context, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, status *fdbv1beta2.FoundationDBStatus, @@ -93,11 +93,6 @@ func (c replaceFailedProcessGroups) reconcile( ) // If the reconciler replaced at least one process group we want to update the status and requeue. if hasReplacement { - err := r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } - return &requeue{message: "Removals have been updated in the cluster status"} } diff --git a/controllers/replace_misconfigured_process_groups.go b/controllers/replace_misconfigured_process_groups.go index afc54aa71..99420c06e 100644 --- a/controllers/replace_misconfigured_process_groups.go +++ b/controllers/replace_misconfigured_process_groups.go @@ -70,7 +70,7 @@ func (c replaceMisconfiguredProcessGroups) reconcile( } } - hasReplacements, err := replacements.ReplaceMisconfiguredProcessGroups( + _, err := replacements.ReplaceMisconfiguredProcessGroups( ctx, r.PodLifecycleManager, r, @@ -82,14 +82,5 @@ func (c replaceMisconfiguredProcessGroups) reconcile( return &requeue{curError: err} } - if hasReplacements { - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } - - logger.Info("Replacements have been updated in the cluster status") - } - return nil } diff --git a/controllers/restore_controller.go b/controllers/restore_controller.go index 6ef949a4b..388e3a6df 100644 --- a/controllers/restore_controller.go +++ b/controllers/restore_controller.go @@ -89,7 +89,8 @@ func (r *FoundationDBRestoreReconciler) Reconcile( if req == nil { continue } - return processRequeue(req, subReconciler, restore, r.Recorder, restoreLog) + + return processResult(processRequeue(req, subReconciler, restore, r.Recorder, restoreLog)) } if restore.Status.State != fdbv1beta2.CompletedFoundationDBRestoreState { diff --git a/controllers/update_pod_config.go b/controllers/update_pod_config.go index 9db67dd73..dff40cffb 100644 --- a/controllers/update_pod_config.go +++ b/controllers/update_pod_config.go @@ -29,8 +29,6 @@ import ( "github.com/go-logr/logr" - "k8s.io/apimachinery/pkg/api/equality" - "github.com/FoundationDB/fdb-kubernetes-operator/v2/pkg/podmanager" "github.com/FoundationDB/fdb-kubernetes-operator/v2/internal" @@ -55,7 +53,6 @@ func (updatePodConfig) reconcile( return &requeue{curError: err} } - originalStatus := cluster.Status.DeepCopy() allSynced := true delayedRequeue := true var errs []error @@ -165,13 +162,6 @@ func (updatePodConfig) reconcile( processGroup.UpdateCondition(fdbv1beta2.SidecarUnreachable, false) } - if !equality.Semantic.DeepEqual(cluster.Status, *originalStatus) { - err = r.updateOrApply(ctx, cluster) - if err != nil { - return &requeue{curError: err} - } - } - // If any error has happened requeue. // We don't provide an error here since we log all errors above. if len(errs) > 0 { diff --git a/controllers/update_status.go b/controllers/update_status.go index 21bf06552..214d3e8cd 100644 --- a/controllers/update_status.go +++ b/controllers/update_status.go @@ -56,7 +56,6 @@ func (c updateStatus) reconcile( databaseStatus *fdbv1beta2.FoundationDBStatus, logger logr.Logger, ) *requeue { - originalStatus := cluster.Status.DeepCopy() clusterStatus := fdbv1beta2.FoundationDBClusterStatus{} clusterStatus.Generations.Reconciled = cluster.Status.Generations.Reconciled clusterStatus.ProcessGroups = cluster.Status.ProcessGroups @@ -71,6 +70,12 @@ func (c updateStatus) reconcile( var err error databaseStatus, err = r.getStatusFromClusterOrDummyStatus(logger, cluster) if err != nil { + // If the machine-readable status cannot be fetched we have to update it here. + clusterStatus.Health.Available = false + clusterStatus.Health.Healthy = false + clusterStatus.Health.FullReplication = false + clusterStatus.Health.DataMovementPriority = 0 + return &requeue{ curError: fmt.Errorf("update_status error fetching status: %w", err), delayedRequeue: true, @@ -330,17 +335,6 @@ func (c updateStatus) reconcile( } } - // See: https://github.com/kubernetes-sigs/kubebuilder/issues/592 - // If we use the default reflect.DeepEqual method it will be recreating the - // clusterStatus multiple times because the pointers are different. - if !equality.Semantic.DeepEqual(cluster.Status, *originalStatus) { - err = r.updateOrApply(ctx, cluster) - if err != nil { - logger.Error(err, "Error updating cluster clusterStatus") - return &requeue{curError: err} - } - } - // If the cluster is not reconciled, make sure we trigger a new reconciliation loop. if !reconciled { return &requeue{ diff --git a/controllers/update_status_test.go b/controllers/update_status_test.go index ab4f7466a..331136214 100644 --- a/controllers/update_status_test.go +++ b/controllers/update_status_test.go @@ -1374,7 +1374,6 @@ var _ = Describe("update_status", func() { Describe("Reconcile", func() { var cluster *fdbv1beta2.FoundationDBCluster - var err error var requeue *requeue BeforeEach(func() { @@ -1404,8 +1403,6 @@ var _ = Describe("update_status", func() { if requeue != nil { Expect(requeue.curError).NotTo(HaveOccurred()) } - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) }) It("should mark the cluster as reconciled", func() { @@ -1494,9 +1491,6 @@ var _ = Describe("update_status", func() { Expect(adminClient.KillProcesses(addresses)).NotTo(HaveOccurred()) adminClient.VersionProcessGroups = versions - - _, err = reloadCluster(cluster) - Expect(err).NotTo(HaveOccurred()) }) It("should update the incorrect sidecar image condition", func() {