diff --git a/api/v4/zz_generated.deepcopy.go b/api/v4/zz_generated.deepcopy.go index c698411c7..722cbf214 100644 --- a/api/v4/zz_generated.deepcopy.go +++ b/api/v4/zz_generated.deepcopy.go @@ -1588,11 +1588,6 @@ func (in *PostgresDatabaseStatus) DeepCopyInto(out *PostgresDatabaseStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.ObservedGeneration != nil { - in, out := &in.ObservedGeneration, &out.ObservedGeneration - *out = new(int64) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabaseStatus. diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 8a480abc2..de3bf8646 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -18,7 +18,6 @@ package controller import ( "context" - "reflect" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" @@ -29,6 +28,7 @@ import ( sharedreconcile "github.com/splunk/splunk-operator/pkg/postgresql/shared/reconcile" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -105,31 +105,45 @@ func (r *PostgresDatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } return ctrl.NewControllerManagedBy(mgr). - For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates( - predicate.Or( - predicate.GenerationChangedPredicate{}, - predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - return !reflect.DeepEqual( - e.ObjectOld.GetFinalizers(), - e.ObjectNew.GetFinalizers(), - ) - }, - }, - ), - )). - Owns(&cnpgv1.Database{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.Secret{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). + WithEventFilter(predicate.Funcs{GenericFunc: func(event.GenericEvent) bool { return false }}). + For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates(postgresDatabasePredicator())). + Owns(&cnpgv1.Database{}, builder.WithPredicates(postgresDatabaseCNPGDatabasePredicator())). + Owns(&corev1.Secret{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). + Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). Named("postgresdatabase"). WithOptions(controller.Options{ MaxConcurrentReconciles: DatabaseTotalWorker, }). Complete(r) } + +func postgresDatabasePredicator() predicate.Predicate { + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + if !equality.Semantic.DeepEqual(e.ObjectOld.GetDeletionTimestamp(), e.ObjectNew.GetDeletionTimestamp()) { + return true + } + return !equality.Semantic.DeepEqual(e.ObjectOld.GetFinalizers(), e.ObjectNew.GetFinalizers()) + }, + }, + ) +} + +func postgresDatabaseCNPGDatabasePredicator() predicate.Predicate { + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, okOld := e.ObjectOld.(*cnpgv1.Database) + newObj, okNew := e.ObjectNew.(*cnpgv1.Database) + if !okOld || !okNew { + return true + } + return !equality.Semantic.DeepEqual(oldObj.Status.Applied, newObj.Status.Applied) || + ownerReferencesChanged(oldObj, newObj) + }, + }, + ) +} diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 44143919f..012f02fad 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -36,6 +36,8 @@ import ( "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -317,7 +319,9 @@ func seedConflictScenario(ctx context.Context, namespace, resourceName, clusterN } func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, clusterName string, postgresDB *enterprisev4.PostgresDatabase, dbNames ...string) { + ownerReferences := ownedByPostgresDatabase(postgresDB) + for _, dbName := range dbNames { Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ @@ -325,6 +329,10 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Namespace: namespace, OwnerReferences: ownerReferences, }, + Data: map[string][]byte{ + "username": []byte(adminRoleNameForTest(dbName)), + "password": []byte("test-password"), + }, })).To(Succeed()) Expect(k8sClient.Create(ctx, &corev1.Secret{ @@ -333,6 +341,10 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Namespace: namespace, OwnerReferences: ownerReferences, }, + Data: map[string][]byte{ + "username": []byte(rwRoleNameForTest(dbName)), + "password": []byte("test-password"), + }, })).To(Succeed()) Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ @@ -392,14 +404,41 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { expectStatusPhase(current, phaseReady) - Expect(current.Status.ObservedGeneration).NotTo(BeNil()) - Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) Expect(current.Status.Databases[0].Name).To(Equal(expectedDatabase.Name)) Expect(current.Status.Databases[0].Ready).To(Equal(expectedDatabase.Ready)) Expect(current.Status.Databases[0].AdminUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].RWUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) + Expect(current.Status.ObservedGeneration).NotTo(BeNil()) + Expect(*current.Status.ObservedGeneration).To(Equal(generation)) +} + +func reconcilePostgresDatabaseToReady(ctx context.Context, scenario readyClusterScenario, poolerEnabled bool) *enterprisev4.PostgresDatabase { + seedReadyClusterScenario(ctx, scenario, poolerEnabled) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + cnpgDatabase := expectCNPGDatabaseCreated(ctx, scenario, current) + markCNPGDatabaseApplied(ctx, cnpgDatabase) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + return current } var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { @@ -504,6 +543,164 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { }) }) + When("owned resource drift occurs after the PostgresDatabase is ready", func() { + It("repairs configmap content drift", func() { + scenario := newReadyClusterScenario(namespace, "configmap-drift", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + configMap.Data["rw-host"] = "unexpected.example" + Expect(k8sClient.Update(ctx, configMap)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + Expect(metav1.IsControlledBy(configMap, owner)).To(BeTrue()) + }) + + It("recreates a deleted configmap", func() { + scenario := newReadyClusterScenario(namespace, "configmap-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMapName := fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: configMapName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + }) + + It("does not recreate a deleted managed user secret", func() { + scenario := newReadyClusterScenario(namespace, "secret-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secretName := fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusPhase(current, "Provisioning") + expectStatusCondition(current, "SecretsReady", metav1.ConditionFalse, "SecretsDriftDetected") + + missing := &corev1.Secret{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, missing) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + It("re-attaches ownership when a managed user secret loses its owner reference", func() { + scenario := newReadyClusterScenario(namespace, "secret-adopt", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, secret)).To(Succeed()) + secret.OwnerReferences = nil + Expect(k8sClient.Update(ctx, secret)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secret.Name, Namespace: secret.Namespace}, secret)).To(Succeed()) + Expect(metav1.IsControlledBy(secret, owner)).To(BeTrue()) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + }) + + It("creates secrets and configmaps for a newly added database while preserving existing ones", func() { + scenario := newReadyClusterScenario(namespace, "new-database", "tenant-cluster", "tenant-cnpg", "appdb") + current := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + current.Spec.Databases = append(current.Spec.Databases, enterprisev4.DatabaseDefinition{Name: "analytics"}) + Expect(k8sClient.Update(ctx, current)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + for _, secretName := range []string{ + fmt.Sprintf("%s-analytics-admin", scenario.resourceName), + fmt.Sprintf("%s-analytics-rw", scenario.resourceName), + } { + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, secret)).To(Succeed()) + } + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-analytics-config", scenario.resourceName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("dbname", "analytics")) + + existingSecret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, existingSecret)).To(Succeed()) + }) + }) + + When("postgresdatabase secondary-resource predicates run", func() { + It("treats cnpg database applied-state, create, and delete changes as drift triggers", func() { + pred := postgresDatabaseCNPGDatabasePredicator() + + oldApplied := true + newApplied := false + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{Status: cnpgv1.DatabaseStatus{Applied: &oldApplied}}, + ObjectNew: &cnpgv1.Database{Status: cnpgv1.DatabaseStatus{Applied: &newApplied}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + + It("ignores cnpg database updates that do not change readiness or ownership", func() { + pred := postgresDatabaseCNPGDatabasePredicator() + + applied := true + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "test"}, + Status: cnpgv1.DatabaseStatus{Applied: &applied}, + }, + ObjectNew: &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "test"}, + Status: cnpgv1.DatabaseStatus{Applied: &applied}, + }, + })).To(BeFalse()) + }) + + It("treats secret create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + + It("treats configmap create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + }) + When("role ownership conflicts exist", func() { It("marks the resource failed and stops provisioning dependent resources", func() { resourceName := "conflict-cluster" diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index ba9030f6f..5f42910da 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -43,7 +43,6 @@ import ( func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.Request) (ctrl.Result, error) { c := rc.Client logger := log.FromContext(ctx) - logger.Info("Reconciling PostgresCluster") var cnpgCluster *cnpgv1.Cluster var poolerEnabled bool @@ -66,6 +65,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger = logger.WithValues("postgresCluster", postgresCluster.Name) ctx = log.IntoContext(ctx, logger) + logger.Info("Reconciling PostgresCluster") updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { return setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase) @@ -84,7 +84,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, errors.Join(err, statusErr) } if postgresCluster.GetDeletionTimestamp() != nil { - logger.Info("Deletion cleanup complete, finalizer removed") + logger.Info("PostgresCluster cleanup completed") return ctrl.Result{}, nil } @@ -92,10 +92,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if !controllerutil.ContainsFinalizer(postgresCluster, PostgresClusterFinalizerName) { controllerutil.AddFinalizer(postgresCluster, PostgresClusterFinalizerName) if err := c.Update(ctx, postgresCluster); err != nil { - logger.Error(err, "Failed to add finalizer to PostgresCluster") + logger.Error(err, "Failed to add finalizer") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } - logger.Info("Finalizer added") + logger.Info("Finalizer added successfully") return ctrl.Result{}, nil } @@ -130,7 +130,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) if secretErr != nil { - logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) + logger.Error(secretErr, "Failed to check superuser secret existence", "name", postgresSecretName) rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase) @@ -139,40 +139,40 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if !secretExists { logger.Info("Superuser secret creation started", "name", postgresSecretName) if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { - logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) + logger.Error(err, "Failed to create superuser secret", "name", postgresSecretName) rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) } if err := c.Status().Update(ctx, postgresCluster); err != nil { - logger.Error(err, "Failed to update status after secret creation") + logger.Error(err, "Failed to persist superuser secret status") return ctrl.Result{}, err } rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) - logger.Info("Superuser secret ref persisted to status") + logger.Info("Superuser secret status persisted") } // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, rc.Scheme) if ownerRefErr != nil { - logger.Error(ownerRefErr, "Failed to check owner reference on Secret") + logger.Error(ownerRefErr, "Failed to check Secret owner reference") return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) } if secretExists && !hasOwnerRef { - logger.Info("Existing secret linked to PostgresCluster", "name", postgresSecretName) rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) originalSecret := secret.DeepCopy() if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) } if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { - logger.Error(err, "Failed to patch existing secret with controller reference") + logger.Error(err, "Failed to patch Secret") rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) } + logger.Info("Secret adopted", "name", postgresSecretName) } if postgresCluster.Status.Resources.SuperUserSecretRef == nil { @@ -208,10 +208,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "CNPG Cluster created", pendingClusterPhase); statusErr != nil { return ctrl.Result{}, statusErr } - logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) + logger.Info("CNPG Cluster created", "name", postgresCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil case err != nil: - logger.Error(err, "Failed to get CNPG Cluster") + logger.Error(err, "Failed to fetch CNPG Cluster") statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) @@ -223,7 +223,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { - logger.Info("CNPG Cluster spec drift detected, patch started", "name", cnpgCluster.Name) + logger.Info("CNPG Cluster patch started", "name", cnpgCluster.Name) originalCluster := cnpgCluster.DeepCopy() cnpgCluster.Spec = desiredSpec @@ -240,7 +240,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, statusErr } rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) + logger.Info("CNPG Cluster patched", "name", cnpgCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil } } @@ -259,14 +259,14 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) if err != nil { - logger.Error(err, "Failed to check RW pooler existence") + logger.Error(err, "Failed to check CNPG Pooler existence", "type", readWriteEndpoint) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) } roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) if err != nil { - logger.Error(err, "Failed to check RO pooler existence") + logger.Error(err, "Failed to check CNPG Pooler existence", "type", readOnlyEndpoint) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) @@ -286,7 +286,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. case !rwPoolerExists || !roPoolerExists: if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", - "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) + "class", postgresCluster.Spec.Class) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase) @@ -299,14 +299,14 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{RequeueAfter: retryDelay}, statusErr } if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { - logger.Error(err, "Failed to reconcile connection pooler") + logger.Error(err, "Failed to reconcile connection poolers") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase) return ctrl.Result{}, errors.Join(err, statusErr) } rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") - logger.Info("Connection pooler creation started, requeueing") + logger.Info("Connection pooler creation started") if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { return ctrl.Result{}, statusErr @@ -326,7 +326,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. }, roPooler) return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) }(): - logger.Info("Connection Poolers are not ready yet, requeueing") + logger.Info("Connection poolers are not ready yet") statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", pendingClusterPhase) return ctrl.Result{RequeueAfter: retryDelay}, statusErr @@ -335,7 +335,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(oldConditions, postgresCluster.Status.Conditions) if err := syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster); err != nil { - logger.Error(err, "Failed to sync pooler status") + logger.Error(err, "Failed to sync connection pooler status") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase) @@ -395,7 +395,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. oldPhase = *postgresCluster.Status.Phase } if err := syncStatus(ctx, c, rc.Metrics, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") + logger.Error(err, "Failed to sync PostgresCluster status") return ctrl.Result{}, err } var newPhase string @@ -415,14 +415,14 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. Namespace: postgresCluster.Namespace, }, roPooler) if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") + logger.Info("Connection poolers ready, syncing status") poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(poolerOldConditions, postgresCluster.Status.Conditions) _ = syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster) rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) } } - logger.Info("Reconciliation complete") + logger.Info("PostgresCluster reconciliation completed") return ctrl.Result{}, nil } @@ -569,7 +569,7 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp return nil } - logger.Info("CNPG Cluster roles drift detected, update started", + logger.Info("Managed roles patch started", "currentCount", len(currentRoles), "desiredCount", len(desiredRoles)) originalCluster := cnpgCluster.DeepCopy() @@ -581,7 +581,7 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp if err := c.Patch(ctx, cnpgCluster, client.MergeFrom(originalCluster)); err != nil { return fmt.Errorf("patching CNPG Cluster managed roles: %w", err) } - logger.Info("CNPG Cluster managed roles updated", "roleCount", len(desiredRoles)) + logger.Info("Managed roles patched", "roleCount", len(desiredRoles)) return nil } @@ -806,7 +806,9 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } - metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + if metrics != nil { + metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + } if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) @@ -918,11 +920,11 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr scheme := rc.Scheme logger := log.FromContext(ctx) if cluster.GetDeletionTimestamp() == nil { - logger.Info("PostgresCluster not marked for deletion, skipping finalizer logic") + logger.Info("PostgresCluster not marked for deletion, skipping finalizer cleanup") return nil } if !controllerutil.ContainsFinalizer(cluster, PostgresClusterFinalizerName) { - logger.Info("Finalizer not present on PostgresCluster, skipping finalizer logic") + logger.Info("Finalizer not present on PostgresCluster, skipping finalizer cleanup") return nil } @@ -931,7 +933,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr if err != nil { if apierrors.IsNotFound(err) { cnpgCluster = nil - logger.Info("CNPG cluster not found during cleanup") + logger.Info("CNPG Cluster not found during cleanup") } else { return fmt.Errorf("fetching CNPG cluster: %w", err) } @@ -949,7 +951,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr switch policy { case clusterDeletionPolicyDelete: - logger.Info("ClusterDeletionPolicy 'Delete', CNPG Cluster deletion started") + logger.Info("Cluster deletion policy is Delete") if cnpgCluster != nil { if err := deleteCNPGCluster(ctx, c, cnpgCluster); err != nil { return fmt.Errorf("deleting CNPG Cluster: %w", err) @@ -959,7 +961,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr } case clusterDeletionPolicyRetain: - logger.Info("ClusterDeletionPolicy 'Retain', orphaning CNPG Cluster") + logger.Info("Cluster deletion policy is Retain") if cnpgCluster != nil { originalCNPG := cnpgCluster.DeepCopy() refRemoved, err := removeOwnerRef(scheme, cluster, cnpgCluster) @@ -972,7 +974,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr if err := patchObject(ctx, c, originalCNPG, cnpgCluster, "CNPGCluster"); err != nil { return fmt.Errorf("patching CNPG cluster after removing owner reference: %w", err) } - logger.Info("Removed owner reference from CNPG Cluster") + logger.Info("CNPG Cluster owner reference removed") } // Remove owner reference from the superuser Secret to prevent cascading deletion. @@ -982,7 +984,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr if !apierrors.IsNotFound(err) { return fmt.Errorf("fetching secret during cleanup: %w", err) } - logger.Info("Secret not found, skipping owner reference removal", "secret", secretName) + logger.Info("Secret not found, skipping owner reference removal", "name", secretName) } else { originalSecret := secret.DeepCopy() refRemoved, err := removeOwnerRef(scheme, cluster, secret) @@ -994,12 +996,12 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr return fmt.Errorf("patching Secret after removing owner reference: %w", err) } } - logger.Info("Removed owner reference from Secret") + logger.Info("Secret owner reference removed") } } default: - logger.Info("Unknown ClusterDeletionPolicy", "policy", policy) + logger.Info("Unknown cluster deletion policy", "policy", policy) } controllerutil.RemoveFinalizer(cluster, PostgresClusterFinalizerName) @@ -1011,7 +1013,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr return fmt.Errorf("removing finalizer: %w", err) } rc.emitNormal(cluster, EventCleanupComplete, fmt.Sprintf("Cleanup complete (policy: %s)", policy)) - logger.Info("Finalizer removed, cleanup complete") + logger.Info("Finalizer removed successfully") return nil } diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 362b2939c..f6791ddac 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -30,6 +30,36 @@ import ( // Injected by the controller so the core never imports the pgx adapter directly. type NewDBRepoFunc func(ctx context.Context, host, dbName, password string) (DBRepo, error) +type secretReconcileError struct { + message string + reason conditionReasons +} + +type secretMissingPolicy int + +const ( + createSecretIfMissing secretMissingPolicy = iota + reportSecretDriftIfMissing +) + +func (e *secretReconcileError) Error() string { + return e.message +} + +func requeueOnConflict(ctx context.Context, err error, category reconcileConflictCategory, action string) (ctrl.Result, error, bool) { + if !errors.IsConflict(err) { + return ctrl.Result{}, err, false + } + + // Keep the category stable so future metrics or events can aggregate conflict sources. + log.FromContext(ctx).Info( + "Conflict during PostgresDatabase reconciliation, will requeue", + "category", category, + "action", action, + ) + return ctrl.Result{Requeue: true}, nil, true +} + // PostgresDatabaseService is the application service entry point called by the primary adapter (reconciler). // newDBRepo is injected to keep the core free of pgx imports. func PostgresDatabaseService( @@ -42,6 +72,8 @@ func PostgresDatabaseService( logger := log.FromContext(ctx).WithValues("postgresDatabase", postgresDB.Name) ctx = log.IntoContext(ctx, logger) logger.Info("Reconciling PostgresDatabase") + wasReady := postgresDB.Status.Phase != nil && *postgresDB.Status.Phase == string(readyDBPhase) + previouslyProvisionedDatabases := existingDatabaseStatus(postgresDB) updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { return persistStatus(ctx, c, rc.Metrics, postgresDB, conditionType, conditionStatus, reason, message, phase) @@ -50,6 +82,9 @@ func PostgresDatabaseService( // Finalizer: cleanup on deletion, register on creation. if postgresDB.GetDeletionTimestamp() != nil { if err := handleDeletion(ctx, rc, postgresDB); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDeletion, "handling deletion"); ok { + return result, conflictErr + } logger.Error(err, "Failed to clean up PostgresDatabase") rc.emitWarning(postgresDB, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) return ctrl.Result{}, err @@ -60,16 +95,13 @@ func PostgresDatabaseService( if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) if err := c.Update(ctx, postgresDB); err != nil { - logger.Error(err, "Failed to add finalizer to PostgresDatabase") + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictFinalizer, "adding finalizer"); ok { + return result, conflictErr + } + logger.Error(err, "Failed to add finalizer") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } - logger.Info("Finalizer added") - return ctrl.Result{}, nil - } - - // ObservedGeneration equality means all phases completed on the current spec — nothing to do. - if postgresDB.Status.ObservedGeneration != nil && *postgresDB.Status.ObservedGeneration == postgresDB.Generation { - logger.Info("Spec unchanged and all phases complete, skipping") + logger.Info("Finalizer added successfully") return ctrl.Result{}, nil } @@ -79,23 +111,32 @@ func PostgresDatabaseService( if errors.IsNotFound(err) { rc.emitWarning(postgresDB, EventClusterNotFound, fmt.Sprintf("PostgresCluster %s not found", postgresDB.Spec.ClusterRef.Name)) if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster not found status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil } if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterInfoFetchFailed, "Can't reach Cluster CR due to transient errors", pendingDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictClusterStatus, "persisting cluster fetch failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist cluster status") } return ctrl.Result{}, err } clusterStatus := getClusterReadyStatus(cluster) - logger.Info("Cluster validation complete", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) + logger.Info("Cluster validation completed", "clusterRef", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) switch clusterStatus { case ClusterNotReady, ClusterNoProvisionerRef: rc.emitWarning(postgresDB, EventClusterNotReady, "Referenced PostgresCluster is not ready yet") if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster provisioning status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -103,6 +144,9 @@ func PostgresDatabaseService( case ClusterReady: rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, clusterReady, EventClusterValidated, "Referenced PostgresCluster is ready") if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonClusterAvailable, "Cluster is operational", provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } @@ -114,11 +158,14 @@ func PostgresDatabaseService( "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", strings.Join(roleConflicts, ", ")) conflictErr := fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", ")) - logger.Error(conflictErr, conflictMsg) + logger.Error(conflictErr, "Failed to validate managed role ownership", "conflicts", roleConflicts) rc.emitWarning(postgresDB, EventRoleConflict, conflictMsg) errs := []error{conflictErr} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictRoleConflictStatus, "persisting role conflict status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist role conflict status") errs = append(errs, fmt.Errorf("failed to update status: %w", statusErr)) } return ctrl.Result{}, stderrors.Join(errs...) @@ -131,23 +178,47 @@ func PostgresDatabaseService( Name: cluster.Status.ProvisionerRef.Name, Namespace: cluster.Status.ProvisionerRef.Namespace, }, cnpgCluster); err != nil { - logger.Error(err, "Failed to fetch CNPG Cluster") + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictCNPGClusterFetch, "fetching CNPG cluster"); ok { + return result, conflictErr + } + logger.Error(err, "Failed to fetch CNPG Cluster", "cluster", cluster.Status.ProvisionerRef.Name) return ctrl.Result{}, err } // Phase: CredentialProvisioning — secrets must exist before roles are patched. // CNPG rejects a PasswordSecretRef pointing at a missing secret. - if err := reconcileUserSecrets(ctx, c, rc.Scheme, postgresDB); err != nil { - rc.emitWarning(postgresDB, EventUserSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) + if err := reconcileRoleSecrets(ctx, c, rc.Scheme, postgresDB, previouslyProvisionedDatabases); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictSecretsReconcile, "reconciling user secrets"); ok { + return result, conflictErr + } + var secretErr *secretReconcileError + if stderrors.As(err, &secretErr) { + rc.emitWarning(postgresDB, EventRolesSecretsDriftDetected, secretErr.message) + if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, secretErr.reason, + secretErr.message, provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictSecretsStatus, "persisting secret drift status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist secret drift status") + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + rc.emitWarning(postgresDB, EventRoleSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, reasonSecretsCreationFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictSecretsStatus, "persisting secret failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist secrets status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, secretsReady, EventSecretsReady, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(secretsReady, metav1.ConditionTrue, reasonSecretsCreated, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictSecretsStatus, "persisting secrets ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -155,16 +226,25 @@ func PostgresDatabaseService( // as databases are ready, so they are created alongside secrets. endpoints := resolveClusterEndpoints(cluster, cnpgCluster, postgresDB.Namespace) if err := reconcileRoleConfigMaps(ctx, c, rc.Scheme, postgresDB, endpoints); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictConfigMapsReconcile, "reconciling configmaps"); ok { + return result, conflictErr + } rc.emitWarning(postgresDB, EventAccessConfigFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err)) if statusErr := updateStatus(configMapsReady, metav1.ConditionFalse, reasonConfigMapsCreationFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictConfigMapsStatus, "persisting configmaps failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist configmaps status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, configMapsReady, EventConfigMapsReady, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(configMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictConfigMapsStatus, "persisting configmaps ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -176,55 +256,74 @@ func PostgresDatabaseService( allRoles := append(desired, rolesToRemove...) if len(rolesToAdd) > 0 || len(rolesToRemove) > 0 { - logger.Info("CNPG Cluster patch started, role drift detected", "toAdd", len(rolesToAdd), "toRemove", len(rolesToRemove)) + logger.Info("Managed roles patch started", "addCount", len(rolesToAdd), "removeCount", len(rolesToRemove)) if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { - logger.Error(err, "Failed to patch users in CNPG Cluster") + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictManagedRolesPatch, "patching managed roles"); ok { + return result, conflictErr + } + logger.Error(err, "Failed to patch managed roles", "roleCount", len(allRoles)) rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) - if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRolesCreationFailed, fmt.Sprintf("Failed to patch managed roles: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + logger.Error(statusErr, "Failed to persist roles status") } return ctrl.Result{}, err } + logger.Info("Managed roles patched", "roleCount", len(allRoles)) rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for roles to be reconciled: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles waiting status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } - roleNames := getDesiredUsers(postgresDB) + roleNames := getDesiredRoles(postgresDB) notReadyRoles, err := verifyRolesReady(ctx, roleNames, cnpgCluster) if err != nil { rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) - if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRolesCreationFailed, fmt.Sprintf("Role creation failed: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictRolesStatus, "persisting role failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist roles status") } return ctrl.Result{}, err } if len(notReadyRoles) > 0 { if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for roles to be reconciled: %v", notReadyRoles), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove))) - if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, + if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonRolesAvailable, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } // Phase: DatabaseProvisioning adopted, err := reconcileCNPGDatabases(ctx, c, rc.Scheme, postgresDB, cluster) if err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictCNPGDatabasesReconcile, "reconciling CNPG databases"); ok { + return result, conflictErr + } logger.Error(err, "Failed to reconcile CNPG Databases") rc.emitWarning(postgresDB, EventDatabasesReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err)) if statusErr := updateStatus(databasesReady, metav1.ConditionFalse, reasonDatabaseReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + logger.Error(statusErr, "Failed to persist databases status") } return ctrl.Result{}, err } @@ -234,13 +333,16 @@ func PostgresDatabaseService( notReadyDBs, err := verifyDatabasesReady(ctx, c, postgresDB) if err != nil { - logger.Error(err, "Failed to verify database status") + logger.Error(err, "Failed to verify database readiness") return ctrl.Result{}, err } if len(notReadyDBs) > 0 { rc.emitOnceBeforeWait(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabaseReconciliationStarted, fmt.Sprintf("Reconciling %d databases, waiting for readiness", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for databases to be ready: %v", notReadyDBs), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDatabasesStatus, "persisting databases pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -248,6 +350,9 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabasesReady, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionTrue, reasonDatabasesAvailable, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDatabasesStatus, "persisting databases ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -284,26 +389,37 @@ func PostgresDatabaseService( rc.emitWarning(postgresDB, EventPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err)) if statusErr := updateStatus(privilegesReady, metav1.ConditionFalse, reasonPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictPrivilegesStatus, "persisting privileges failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist privileges status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, privilegesReady, EventPrivilegesReady, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(privilegesReady, metav1.ConditionTrue, reasonPrivilegesGranted, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictPrivilegesStatus, "persisting privileges ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } - rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + if !wasReady { + rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + } postgresDB.Status.Databases = populateDatabaseStatus(postgresDB) postgresDB.Status.ObservedGeneration = &postgresDB.Generation if err := c.Status().Update(ctx, postgresDB); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictFinalStatus, "persisting final status"); ok { + return result, conflictErr + } return ctrl.Result{}, fmt.Errorf("failed to persist final status: %w", err) } - logger.Info("All phases complete") + logger.Info("PostgresDatabase reconciliation completed") return ctrl.Result{}, nil } @@ -350,7 +466,7 @@ func getClusterReadyStatus(cluster *enterprisev4.PostgresCluster) clusterReadySt return ClusterReady } -func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { +func getDesiredRoles(postgresDB *enterprisev4.PostgresDatabase) []string { users := make([]string, 0, len(postgresDB.Spec.Databases)*2) for _, dbSpec := range postgresDB.Spec.Databases { users = append(users, adminRoleName(dbSpec.Name), rwRoleName(dbSpec.Name)) @@ -358,7 +474,18 @@ func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { return users } -func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { +func existingDatabaseStatus(postgresDB *enterprisev4.PostgresDatabase) map[string]struct{} { + if postgresDB.Status.Phase == nil || *postgresDB.Status.Phase != string(readyDBPhase) { + return map[string]struct{}{} + } + existing := make(map[string]struct{}, len(postgresDB.Status.Databases)) + for _, database := range postgresDB.Status.Databases { + existing[database.Name] = struct{}{} + } + return existing +} + +func getRolesInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { users := make([]string, 0, len(cluster.Spec.ManagedRoles)) for _, role := range cluster.Spec.ManagedRoles { users = append(users, role.Name) @@ -417,7 +544,6 @@ func parseRoleNames(raw []byte) []string { } func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string, cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) error { - logger := log.FromContext(ctx) rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { return fmt.Errorf("building managed roles patch: %w", err) @@ -425,14 +551,12 @@ func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { return fmt.Errorf("patching managed roles: %w", err) } - logger.Info("Managed roles patched", "count", len(roles)) return nil } -func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster *cnpgv1.Cluster) ([]string, error) { - logger := log.FromContext(ctx) +func verifyRolesReady(_ context.Context, expectedRoles []string, cnpgCluster *cnpgv1.Cluster) ([]string, error) { if cnpgCluster.Status.ManagedRolesStatus.CannotReconcile != nil { - for _, userName := range expectedUsers { + for _, userName := range expectedRoles { if errs, exists := cnpgCluster.Status.ManagedRolesStatus.CannotReconcile[userName]; exists { return nil, fmt.Errorf("reconciling user %s: %v", userName, errs) } @@ -440,14 +564,11 @@ func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster * } reconciled := cnpgCluster.Status.ManagedRolesStatus.ByStatus[cnpgv1.RoleStatusReconciled] var notReady []string - for _, userName := range expectedUsers { + for _, userName := range expectedRoles { if !slices.Contains(reconciled, userName) { notReady = append(notReady, userName) } } - if len(notReady) > 0 { - logger.Info("Users not reconciled yet", "pending", notReady) - } return notReady, nil } @@ -456,18 +577,18 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim var adopted []string for _, dbSpec := range postgresDB.Spec.Databases { cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + reAdopted := false cnpgDB := &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: cnpgDBName, Namespace: postgresDB.Namespace}, } _, err := controllerutil.CreateOrUpdate(ctx, c, cnpgDB, func() error { cnpgDB.Spec = buildCNPGDatabaseSpec(cluster.Status.ProvisionerRef.Name, dbSpec) - reAdopting := cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name - if reAdopting { - logger.Info("Orphaned CNPG Database re-adopted", "name", cnpgDBName) + reAdopted = cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopted { delete(cnpgDB.Annotations, annotationRetainedFrom) adopted = append(adopted, dbSpec.Name) } - if cnpgDB.CreationTimestamp.IsZero() || reAdopting { + if cnpgDB.CreationTimestamp.IsZero() || reAdopted { return controllerutil.SetControllerReference(postgresDB, cnpgDB, scheme) } return nil @@ -475,6 +596,9 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim if err != nil { return adopted, fmt.Errorf("reconciling CNPG Database %s: %w", cnpgDBName, err) } + if reAdopted { + logger.Info("CNPG Database re-adopted", "name", cnpgDBName) + } } return adopted, nil } @@ -485,6 +609,10 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) cnpgDB := &cnpgv1.Database{} if err := c.Get(ctx, types.NamespacedName{Name: cnpgDBName, Namespace: postgresDB.Namespace}, cnpgDB); err != nil { + if errors.IsNotFound(err) { + notReady = append(notReady, dbSpec.Name) + continue + } return nil, fmt.Errorf("getting CNPG Database %s: %w", cnpgDBName, err) } if cnpgDB.Status.Applied == nil || !*cnpgDB.Status.Applied { @@ -496,7 +624,9 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente func persistStatus(ctx context.Context, c client.Client, metrics ports.Recorder, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { applyStatus(db, conditionType, conditionStatus, reason, message, phase) - metrics.IncStatusTransition(ports.ControllerDatabase, string(conditionType), string(conditionStatus), string(reason)) + if metrics != nil { + metrics.IncStatusTransition(ports.ControllerDatabase, string(conditionType), string(conditionStatus), string(reason)) + } return c.Status().Update(ctx, db) } @@ -510,6 +640,7 @@ func applyStatus(db *enterprisev4.PostgresDatabase, conditionType conditionTypes }) p := string(phase) db.Status.Phase = &p + db.Status.ObservedGeneration = &db.Generation } func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan { @@ -545,7 +676,7 @@ func handleDeletion(ctx context.Context, rc *ReconcileContext, postgresDB *enter return fmt.Errorf("removing finalizer: %w", err) } rc.emitNormal(postgresDB, EventCleanupComplete, fmt.Sprintf("Cleanup complete (%d retained, %d deleted)", len(plan.retained), len(plan.deleted))) - logger.Info("Cleanup complete", "retained", len(plan.retained), "deleted", len(plan.deleted)) + logger.Info("Cleanup completed", "retained", len(plan.retained), "deleted", len(plan.deleted)) return nil } @@ -579,7 +710,7 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter if !errors.IsNotFound(err) { return fmt.Errorf("getting PostgresCluster for role cleanup: %w", err) } - logger.Info("PostgresCluster already deleted, skipping role cleanup") + logger.Info("PostgresCluster already deleted, skipping managed roles cleanup") return nil } fieldManager := fieldManagerName(postgresDB.Name) @@ -589,7 +720,6 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { return err } - logger.Info("Managed roles patched on deletion", "retained", len(retainedRoles), "removed", len(rolesToRemove)) return nil } @@ -615,7 +745,7 @@ func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enter if err := c.Update(ctx, db); err != nil { return fmt.Errorf("orphaning CNPG Database %s: %w", name, err) } - logger.Info("Orphaned CNPG Database CR", "name", name) + logger.Info("CNPG Database orphaned", "name", name) } return nil } @@ -642,7 +772,7 @@ func orphanConfigMaps(ctx context.Context, c client.Client, postgresDB *enterpri if err := c.Update(ctx, cm); err != nil { return fmt.Errorf("orphaning ConfigMap %s: %w", name, err) } - logger.Info("Orphaned ConfigMap", "name", name) + logger.Info("ConfigMap orphaned", "name", name) } return nil } @@ -670,7 +800,7 @@ func orphanSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev if err := c.Update(ctx, secret); err != nil { return fmt.Errorf("orphaning Secret %s: %w", name, err) } - logger.Info("Orphaned Secret", "name", name) + logger.Info("Secret orphaned", "name", name) } } return nil @@ -687,7 +817,7 @@ func deleteCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enter } return fmt.Errorf("deleting CNPG Database %s: %w", name, err) } - logger.Info("Deleted CNPG Database CR", "name", name) + logger.Info("CNPG Database deleted", "name", name) } return nil } @@ -703,7 +833,7 @@ func deleteConfigMaps(ctx context.Context, c client.Client, postgresDB *enterpri } return fmt.Errorf("deleting ConfigMap %s: %w", name, err) } - logger.Info("Deleted ConfigMap", "name", name) + logger.Info("ConfigMap deleted", "name", name) } return nil } @@ -720,7 +850,7 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev } return fmt.Errorf("deleting Secret %s: %w", name, err) } - logger.Info("Deleted Secret", "name", name) + logger.Info("Secret deleted", "name", name) } } return nil @@ -843,33 +973,82 @@ func adoptResource(ctx context.Context, c client.Client, scheme *runtime.Scheme, return c.Update(ctx, obj) } -func reconcileUserSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase) error { +func secretMissingPolicyForDB(dbName string, existingDBs map[string]struct{}) secretMissingPolicy { + if _, exists := existingDBs[dbName]; exists { + return reportSecretDriftIfMissing + } + return createSecretIfMissing +} + +func reconcileRoleSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, existingDatabases map[string]struct{}) error { for _, dbSpec := range postgresDB.Spec.Databases { - if err := ensureSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)); err != nil { + missingPolicy := secretMissingPolicyForDB(dbSpec.Name, existingDatabases) + if err := reconcileRoleSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin), missingPolicy); err != nil { return err } - if err := ensureSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)); err != nil { + if err := reconcileRoleSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW), missingPolicy); err != nil { return err } } return nil } +func reconcileRoleSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string, missingPolicy secretMissingPolicy) error { + if missingPolicy == reportSecretDriftIfMissing { + return ensureProvisionedSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + return ensureSecret(ctx, c, scheme, postgresDB, roleName, secretName) +} + func ensureSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) if err != nil { return err } + if secret == nil { + return createRoleSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, secretName, secret) +} + +func ensureProvisionedSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { + secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) + if err != nil { + return err + } + if secret == nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is missing for previously provisioned role %s", secretName, roleName), + reason: reasonSecretsDriftDetected, + } + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, secretName, secret) +} + +func reconcileExistingSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, secretName string, secret *corev1.Secret) error { logger := log.FromContext(ctx) switch { - case secret == nil: - logger.Info("User secret creation started", "name", secretName) - return createUserSecret(ctx, c, scheme, postgresDB, roleName, secretName) case secret.Annotations[annotationRetainedFrom] == postgresDB.Name: - logger.Info("Orphaned secret re-adopted", "name", secretName) - return adoptResource(ctx, c, scheme, postgresDB, secret) + if err := adoptResource(ctx, c, scheme, postgresDB, secret); err != nil { + return err + } + logger.Info("Secret re-adopted", "name", secretName) + return nil + case metav1.IsControlledBy(secret, postgresDB): + return nil + case metav1.GetControllerOf(secret) == nil: + if err := adoptResource(ctx, c, scheme, postgresDB, secret); err != nil { + return err + } + logger.Info("Secret adopted", "name", secretName) + return nil + default: + owner := metav1.GetControllerOf(secret) + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is controlled by %s %s", secretName, owner.Kind, owner.Name), + reason: reasonSecretsDriftDetected, + } } - return nil } func getSecret(ctx context.Context, c client.Client, namespace, name string) (*corev1.Secret, error) { @@ -884,7 +1063,7 @@ func getSecret(ctx context.Context, c client.Client, namespace, name string) (*c return secret, nil } -func createUserSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { +func createRoleSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { pw, err := generatePassword() if err != nil { return err @@ -899,6 +1078,7 @@ func createUserSecret(ctx context.Context, c client.Client, scheme *runtime.Sche } return err } + log.FromContext(ctx).Info("Role secret created", "name", secretName) return nil } @@ -930,6 +1110,7 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti logger := log.FromContext(ctx) for _, dbSpec := range postgresDB.Spec.Databases { cmName := configMapName(postgresDB.Name, dbSpec.Name) + reAdopted := false cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: cmName, @@ -939,12 +1120,11 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti } _, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { cm.Data = buildDatabaseConfigMapBody(dbSpec.Name, endpoints) - reAdopting := cm.Annotations[annotationRetainedFrom] == postgresDB.Name - if reAdopting { - logger.Info("Orphaned ConfigMap re-adopted", "name", cmName) + reAdopted = cm.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopted { delete(cm.Annotations, annotationRetainedFrom) } - if cm.CreationTimestamp.IsZero() || reAdopting { + if !metav1.IsControlledBy(cm, postgresDB) { return controllerutil.SetControllerReference(postgresDB, cm, scheme) } return nil @@ -952,6 +1132,9 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti if err != nil { return fmt.Errorf("reconciling ConfigMap %s: %w", cmName, err) } + if reAdopted { + logger.Info("ConfigMap re-adopted", "name", cmName) + } } return nil } diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index c41d2dd59..2e2f4d276 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -3,7 +3,6 @@ package core // The following functions are intentionally not tested directly here. // Their business logic is covered by narrower helper tests where practical, // and the remaining behavior is mostly controller-runtime orchestration: -// - PostgresDatabaseService // - patchManagedRoles // - reconcileCNPGDatabases // - handleDeletion @@ -27,9 +26,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -124,7 +126,160 @@ func testClient(t *testing.T, scheme *runtime.Scheme, objs ...client.Object) cli return builder.Build() } -func TestGetDesiredUsers(t *testing.T) { +func postgresDatabaseConflict(name string) error { + return apierrors.NewConflict( + schema.GroupResource{ + Group: enterprisev4.GroupVersion.Group, + Resource: "postgresdatabases", + }, + name, + errors.New("resource version conflict"), + ) +} + +func TestPostgresDatabaseServiceRequeuesOnConflict(t *testing.T) { + scheme := testScheme(t) + tests := []struct { + name string + existing *enterprisev4.PostgresDatabase + build func(*enterprisev4.PostgresDatabase) client.Client + }{ + { + name: "when adding the finalizer", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Update: func(_ context.Context, _ client.WithWatch, obj client.Object, _ ...client.UpdateOption) error { + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when persisting status", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "missing-cluster"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when status update conflicts while handling another error", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "primary"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*enterprisev4.PostgresCluster); ok { + return errors.New("temporary get failure") + } + return client.Get(ctx, key, obj, opts...) + }, + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + c := tst.build(tst.existing) + + postgresDB := &enterprisev4.PostgresDatabase{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: tst.existing.Name, Namespace: tst.existing.Namespace}, postgresDB)) + + result, err := PostgresDatabaseService( + context.Background(), + &ReconcileContext{Client: c, Scheme: scheme, Recorder: record.NewFakeRecorder(10)}, + postgresDB, + nil, + ) + + require.NoError(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, result) + }) + } +} + +func TestSecretMissingPolicyForDB(t *testing.T) { + tests := []struct { + name string + dbName string + existingDBs map[string]struct{} + want secretMissingPolicy + }{ + { + name: "creates secrets for new databases", + dbName: "payments", + existingDBs: map[string]struct{}{}, + want: createSecretIfMissing, + }, + { + name: "reports drift for previously provisioned databases", + dbName: "payments", + existingDBs: map[string]struct{}{ + "payments": {}, + }, + want: reportSecretDriftIfMissing, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + assert.Equal(t, tst.want, secretMissingPolicyForDB(tst.dbName, tst.existingDBs)) + }) + } +} + +func TestGetDesiredRoles(t *testing.T) { postgresDB := &enterprisev4.PostgresDatabase{ Spec: enterprisev4.PostgresDatabaseSpec{ Databases: []enterprisev4.DatabaseDefinition{ @@ -140,12 +295,12 @@ func TestGetDesiredUsers(t *testing.T) { "secondary_db_rw", } - got := getDesiredUsers(postgresDB) + got := getDesiredRoles(postgresDB) assert.Equal(t, want, got) } -func TestGetUsersInClusterSpec(t *testing.T) { +func TestGetRolesInClusterSpec(t *testing.T) { cluster := &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ ManagedRoles: []enterprisev4.ManagedRole{ @@ -156,7 +311,7 @@ func TestGetUsersInClusterSpec(t *testing.T) { } want := []string{"main_db_admin", "main_db_rw"} - got := getUsersInClusterSpec(cluster) + got := getRolesInClusterSpec(cluster) assert.Equal(t, want, got) } @@ -290,14 +445,14 @@ func TestGetRoleConflicts(t *testing.T) { func TestVerifyRolesReady(t *testing.T) { tests := []struct { name string - expectedUsers []string + expectedRoles []string cluster *cnpgv1.Cluster wantNotReady []string wantErr string }{ { name: "returns error when a role cannot reconcile", - expectedUsers: []string{"main_db_admin", "main_db_rw"}, + expectedRoles: []string{"main_db_admin", "main_db_rw"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -311,7 +466,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns missing roles that are not reconciled yet", - expectedUsers: []string{"main_db_admin", "main_db_rw", "analytics_admin"}, + expectedRoles: []string{"main_db_admin", "main_db_rw", "analytics_admin"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -325,7 +480,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns pending reconciliation roles as not ready", - expectedUsers: []string{"main_db_admin", "main_db_rw"}, + expectedRoles: []string{"main_db_admin", "main_db_rw"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -340,7 +495,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns empty when all roles are reconciled", - expectedUsers: []string{"main_db_admin"}, + expectedRoles: []string{"main_db_admin"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -357,7 +512,7 @@ func TestVerifyRolesReady(t *testing.T) { for _, tst := range tests { t.Run(tst.name, func(t *testing.T) { - gotNotReady, err := verifyRolesReady(context.Background(), tst.expectedUsers, tst.cluster) + gotNotReady, err := verifyRolesReady(context.Background(), tst.expectedRoles, tst.cluster) if tst.wantErr != "" { require.Error(t, err) assert.Equal(t, tst.wantErr, err.Error()) @@ -666,14 +821,14 @@ func TestVerifyDatabasesReady(t *testing.T) { wantNotReady: []string{"payments", "analytics"}, }, { - name: "returns error when a database is missing", + name: "returns not ready when a database is missing", objects: []client.Object{ &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, }, }, - wantErr: "getting CNPG Database primary-analytics", + wantNotReady: []string{"analytics"}, }, } @@ -858,7 +1013,7 @@ func TestGeneratePassword(t *testing.T) { } // Uses a fake client because the helper creates Secret objects and persists owner references through the Kubernetes API. -func TestCreateUserSecret(t *testing.T) { +func TestCreateRoleSecret(t *testing.T) { scheme := testScheme(t) postgresDB := &enterprisev4.PostgresDatabase{ TypeMeta: metav1.TypeMeta{ @@ -877,13 +1032,13 @@ func TestCreateUserSecret(t *testing.T) { secretName := "primary-payments-admin" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := roleName + wantRolename := roleName wantOwnerUID := postgresDB.UID wantPasswordLength := passwordLength wantPasswordDigits := passwordDigits c := testClient(t, scheme) - err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + err := createRoleSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) require.NoError(t, err) @@ -893,7 +1048,7 @@ func TestCreateUserSecret(t *testing.T) { assert.Equal(t, postgresDB.Namespace, got.Namespace) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) @@ -902,18 +1057,18 @@ func TestCreateUserSecret(t *testing.T) { t.Run("returns nil when secret already exists", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" - existing := buildPasswordSecret(postgresDB, secretName, wantUsername, wantPassword) + existing := buildPasswordSecret(postgresDB, secretName, wantRolename, wantPassword) c := testClient(t, scheme, existing) - err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + err := createRoleSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) require.NoError(t, err) got := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) assert.Empty(t, got.OwnerReferences) }) @@ -939,7 +1094,7 @@ func TestEnsureSecret(t *testing.T) { secretName := "primary-payments-admin" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := roleName + wantRolename := roleName wantOwnerUID := postgresDB.UID wantPasswordLength := passwordLength wantPasswordDigits := passwordDigits @@ -953,7 +1108,7 @@ func TestEnsureSecret(t *testing.T) { require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) @@ -962,7 +1117,7 @@ func TestEnsureSecret(t *testing.T) { t.Run("re-adopts retained secret", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" wantOwnerUID := postgresDB.UID wantKeep := "true" @@ -979,7 +1134,7 @@ func TestEnsureSecret(t *testing.T) { }, }, Data: map[string][]byte{ - "username": []byte(wantUsername), + "username": []byte(wantRolename), secretKeyPassword: []byte(wantPassword), }, } @@ -994,7 +1149,7 @@ func TestEnsureSecret(t *testing.T) { assert.Equal(t, wantKeep, got.Annotations["keep"]) _, hasRetainedAnnotation := got.Annotations[annotationRetainedFrom] assert.False(t, hasRetainedAnnotation) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) assert.Contains(t, got.OwnerReferences, metav1.OwnerReference{ APIVersion: enterprisev4.GroupVersion.String(), @@ -1009,7 +1164,7 @@ func TestEnsureSecret(t *testing.T) { t.Run("does nothing for existing managed secret", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" wantKeep := "true" wantOwnerUID := postgresDB.UID @@ -1021,11 +1176,18 @@ func TestEnsureSecret(t *testing.T) { "keep": wantKeep, }, OwnerReferences: []metav1.OwnerReference{ - {UID: wantOwnerUID, Name: postgresDB.Name}, + { + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: wantOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, }, }, Data: map[string][]byte{ - "username": []byte(wantUsername), + "username": []byte(wantRolename), secretKeyPassword: []byte(wantPassword), }, } @@ -1038,15 +1200,89 @@ func TestEnsureSecret(t *testing.T) { got := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) assert.Equal(t, wantKeep, got.Annotations["keep"]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) }) + + t.Run("returns drift error when a previously provisioned secret is missing", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + c := testClient(t, scheme) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + assert.ErrorContains(t, err, secretName) + }) + + t.Run("re-attaches owner reference when ownership was manually stripped", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{ + labelManagedBy: "splunk-operator", + labelCNPGReload: "true", + }, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string][]byte{ + "username": []byte(roleName), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + }) + + t.Run("accepts an existing secret with mutated data without rewriting it", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantUsername := "wrong_user" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + OwnerReferences: []metav1.OwnerReference{ + {UID: postgresDB.UID, Name: postgresDB.Name}, + }, + }, + Data: map[string][]byte{ + "username": []byte(wantUsername), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, "existing-password", string(got.Data[secretKeyPassword])) + }) } // Uses a fake client because the helper reconciles multiple Secret objects through the Kubernetes API. -func TestReconcileUserSecrets(t *testing.T) { +func TestReconcileRoleSecrets(t *testing.T) { scheme := testScheme(t) postgresDB := &enterprisev4.PostgresDatabase{ TypeMeta: metav1.TypeMeta{ @@ -1078,7 +1314,7 @@ func TestReconcileUserSecrets(t *testing.T) { {name: "primary-analytics-rw", username: "analytics_rw"}, } - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) for _, want := range wantSecrets { @@ -1094,13 +1330,13 @@ func TestReconcileUserSecrets(t *testing.T) { t.Run("is idempotent when secrets already exist", func(t *testing.T) { c := testClient(t, scheme) - require.NoError(t, reconcileUserSecrets(context.Background(), c, scheme, postgresDB)) + require.NoError(t, reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB))) before := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: "primary-payments-admin", Namespace: postgresDB.Namespace}, before)) beforePassword := append([]byte(nil), before.Data[secretKeyPassword]...) - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) @@ -1110,6 +1346,19 @@ func TestReconcileUserSecrets(t *testing.T) { require.Len(t, after.OwnerReferences, 1) assert.Equal(t, postgresDB.UID, after.OwnerReferences[0].UID) }) + + t.Run("does not recreate missing secrets for previously provisioned databases", func(t *testing.T) { + postgresDB.Status.Phase = strPtr(string(readyDBPhase)) + postgresDB.Status.Databases = []enterprisev4.DatabaseInfo{{Name: "payments"}} + c := testClient(t, scheme) + + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + }) } // Uses a fake client because the helper reconciles ConfigMaps through CreateOrUpdate and persists re-adoption metadata. @@ -1228,6 +1477,47 @@ func TestReconcileRoleConfigMaps(t *testing.T) { BlockOwnerDeletion: boolPtr(true), }) }) + + t.Run("re-attaches owner reference when configmap ownership was manually stripped", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + }, + }, + } + cmName := "primary-payments-config" + existing := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: "splunk-operator"}, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string]string{"dbname": "payments"}, + } + c := testClient(t, scheme, existing) + + err := reconcileRoleConfigMaps(context.Background(), c, scheme, postgresDB, endpoints) + + require.NoError(t, err) + + got := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cmName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + assert.Equal(t, buildDatabaseConfigMapBody("payments", endpoints), got.Data) + }) } func TestBuildDeletionPlan(t *testing.T) { @@ -1485,16 +1775,16 @@ func TestBuildPasswordSecret(t *testing.T) { wantNamespace := "dbs" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := "payments_admin" + wantRolename := "payments_admin" wantPassword := "topsecret" - got := buildPasswordSecret(postgresDB, wantName, wantUsername, wantPassword) + got := buildPasswordSecret(postgresDB, wantName, wantRolename, wantPassword) assert.Equal(t, wantName, got.Name) assert.Equal(t, wantNamespace, got.Namespace) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) } diff --git a/pkg/postgresql/database/core/events.go b/pkg/postgresql/database/core/events.go index 987b8bbfb..eb5e08d81 100644 --- a/pkg/postgresql/database/core/events.go +++ b/pkg/postgresql/database/core/events.go @@ -22,7 +22,8 @@ const ( EventClusterNotFound = "ClusterNotFound" EventClusterNotReady = "ClusterNotReady" EventRoleConflict = "RoleConflict" - EventUserSecretsFailed = "UserSecretsFailed" + EventRoleSecretsFailed = "RoleSecretsFailed" + EventRolesSecretsDriftDetected = "RolesSecretsDriftDetected" EventAccessConfigFailed = "AccessConfigFailed" EventManagedRolesPatchFailed = "ManagedRolesPatchFailed" EventRoleFailed = "RoleFailed" diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index 6511b502f..88642437a 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -22,6 +22,7 @@ type reconcileDBPhases string type conditionTypes string type conditionReasons string type clusterReadyStatus string +type reconcileConflictCategory string const ( retryDelay = time.Second * 15 @@ -33,7 +34,6 @@ const ( readWriteEndpoint string = "rw" deletionPolicyRetain string = "Retain" - deletionPolicyDelete string = "Delete" postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" @@ -74,9 +74,10 @@ const ( reasonDatabasesAvailable conditionReasons = "DatabasesAvailable" reasonSecretsCreated conditionReasons = "SecretsCreated" reasonSecretsCreationFailed conditionReasons = "SecretsCreationFailed" + reasonSecretsDriftDetected conditionReasons = "SecretsDriftDetected" reasonWaitingForCNPG conditionReasons = "WaitingForCNPG" - reasonUsersCreationFailed conditionReasons = "UsersCreationFailed" - reasonUsersAvailable conditionReasons = "UsersAvailable" + reasonRolesCreationFailed conditionReasons = "RolesCreationFailed" + reasonRolesAvailable conditionReasons = "RolesAvailable" reasonRoleConflict conditionReasons = "RoleConflict" reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" @@ -90,6 +91,22 @@ const ( ClusterNotReady clusterReadyStatus = "NotReady" ClusterNoProvisionerRef clusterReadyStatus = "NoProvisionerRef" ClusterReady clusterReadyStatus = "Ready" + + conflictDeletion reconcileConflictCategory = "deletion" + conflictFinalizer reconcileConflictCategory = "finalizer" + conflictClusterStatus reconcileConflictCategory = "cluster_status" + conflictRoleConflictStatus reconcileConflictCategory = "role_conflict_status" + conflictCNPGClusterFetch reconcileConflictCategory = "cnpg_cluster_fetch" + conflictSecretsReconcile reconcileConflictCategory = "secrets_reconcile" + conflictSecretsStatus reconcileConflictCategory = "secrets_status" + conflictConfigMapsReconcile reconcileConflictCategory = "configmaps_reconcile" + conflictConfigMapsStatus reconcileConflictCategory = "configmaps_status" + conflictManagedRolesPatch reconcileConflictCategory = "managed_roles_patch" + conflictRolesStatus reconcileConflictCategory = "roles_status" + conflictCNPGDatabasesReconcile reconcileConflictCategory = "cnpg_databases_reconcile" + conflictDatabasesStatus reconcileConflictCategory = "databases_status" + conflictPrivilegesStatus reconcileConflictCategory = "privileges_status" + conflictFinalStatus reconcileConflictCategory = "final_status" ) // clusterEndpoints holds fully-resolved connection hostnames for a cluster.