1- /*
2- Copyright 2021 The Kubernetes Authors.
3-
4- Licensed under the Apache License, Version 2.0 (the "License");
5- you may not use this file except in compliance with the License.
6- You may obtain a copy of the License at
7-
8- http://www.apache.org/licenses/LICENSE-2.0
9-
10- Unless required by applicable law or agreed to in writing, software
11- distributed under the License is distributed on an "AS IS" BASIS,
12- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- See the License for the specific language governing permissions and
14- limitations under the License.
15- */
16-
171/*
182Copyright 2025 The KubeFleet Authors.
193
@@ -34,6 +18,7 @@ package workapplier
3418
3519import (
3620 "context"
21+ "fmt"
3722 "time"
3823
3924 "go.uber.org/atomic"
@@ -212,6 +197,7 @@ type Reconciler struct {
212197
213198 availabilityCheckRequeueAfter time.Duration
214199 driftCheckRequeueAfter time.Duration
200+ deletionWaitTime time.Duration
215201}
216202
217203func NewReconciler (
@@ -222,6 +208,7 @@ func NewReconciler(
222208 workerCount int ,
223209 availabilityCheckRequestAfter time.Duration ,
224210 driftCheckRequestAfter time.Duration ,
211+ deletionWaitTime time.Duration ,
225212 watchWorkWithPriorityQueue bool ,
226213 watchWorkReconcileAgeMinutes int ,
227214) * Reconciler {
@@ -251,6 +238,7 @@ func NewReconciler(
251238 joined : atomic .NewBool (false ),
252239 availabilityCheckRequeueAfter : acRequestAfter ,
253240 driftCheckRequeueAfter : dcRequestAfter ,
241+ deletionWaitTime : deletionWaitTime ,
254242 }
255243}
256244
@@ -417,7 +405,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
417405 Kind : fleetv1beta1 .AppliedWorkKind ,
418406 Name : appliedWork .GetName (),
419407 UID : appliedWork .GetUID (),
420- BlockOwnerDeletion : ptr .To (false ),
408+ BlockOwnerDeletion : ptr .To (true ),
421409 }
422410
423411 // Set the default values for the Work object to avoid additional validation logic in the
@@ -485,29 +473,112 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
485473 return ctrl.Result {RequeueAfter : r .driftCheckRequeueAfter }, nil
486474}
487475
488- // garbageCollectAppliedWork deletes the appliedWork and all the manifests associated with it from the cluster.
489476func (r * Reconciler ) garbageCollectAppliedWork (ctx context.Context , work * fleetv1beta1.Work ) (ctrl.Result , error ) {
490- deletePolicy := metav1 .DeletePropagationBackground
477+ deletePolicy := metav1 .DeletePropagationForeground
491478 if ! controllerutil .ContainsFinalizer (work , fleetv1beta1 .WorkFinalizer ) {
492479 return ctrl.Result {}, nil
493480 }
494- // delete the appliedWork which will remove all the manifests associated with it
495- // TODO: allow orphaned manifest
496- appliedWork := fleetv1beta1.AppliedWork {
481+ appliedWork := & fleetv1beta1.AppliedWork {
497482 ObjectMeta : metav1.ObjectMeta {Name : work .Name },
498483 }
499- err := r .spokeClient .Delete (ctx , & appliedWork , & client.DeleteOptions {PropagationPolicy : & deletePolicy })
500- switch {
501- case apierrors .IsNotFound (err ):
502- klog .V (2 ).InfoS ("The appliedWork is already deleted" , "appliedWork" , work .Name )
503- case err != nil :
504- klog .ErrorS (err , "Failed to delete the appliedWork" , "appliedWork" , work .Name )
484+ // Get the AppliedWork object
485+ if err := r .spokeClient .Get (ctx , types.NamespacedName {Name : work .Name }, appliedWork ); err != nil {
486+ if apierrors .IsNotFound (err ) {
487+ klog .V (2 ).InfoS ("The appliedWork is already deleted, removing the finalizer from the work" , "appliedWork" , work .Name )
488+ return r .removeWorkFinalizer (ctx , work )
489+ }
490+ klog .ErrorS (err , "Failed to get AppliedWork" , "appliedWork" , work .Name )
505491 return ctrl.Result {}, controller .NewAPIServerError (false , err )
506- default :
507- klog .InfoS ("Successfully deleted the appliedWork" , "appliedWork" , work .Name )
508492 }
509- controllerutil .RemoveFinalizer (work , fleetv1beta1 .WorkFinalizer )
510493
494+ // Handle stuck deletion after 5 minutes where the other owner references might not exist or are invalid.
495+ if ! appliedWork .DeletionTimestamp .IsZero () && time .Since (appliedWork .DeletionTimestamp .Time ) >= r .deletionWaitTime {
496+ klog .V (2 ).InfoS ("AppliedWork deletion appears stuck; attempting to patch owner references" , "appliedWork" , work .Name )
497+ if err := r .updateOwnerReference (ctx , work , appliedWork ); err != nil {
498+ klog .ErrorS (err , "Failed to update owner references for AppliedWork" , "appliedWork" , work .Name )
499+ return ctrl.Result {}, controller .NewAPIServerError (false , err )
500+ }
501+ return ctrl.Result {}, fmt .Errorf ("AppliedWork %s is being deleted, waiting for the deletion to complete" , work .Name )
502+ }
503+
504+ if err := r .spokeClient .Delete (ctx , appliedWork , & client.DeleteOptions {PropagationPolicy : & deletePolicy }); err != nil {
505+ if apierrors .IsNotFound (err ) {
506+ klog .V (2 ).InfoS ("AppliedWork already deleted" , "appliedWork" , work .Name )
507+ return r .removeWorkFinalizer (ctx , work )
508+ }
509+ klog .V (2 ).ErrorS (err , "Failed to delete the appliedWork" , "appliedWork" , work .Name )
510+ return ctrl.Result {}, controller .NewAPIServerError (false , err )
511+ }
512+
513+ klog .V (2 ).InfoS ("AppliedWork deletion in progress" , "appliedWork" , work .Name )
514+ return ctrl.Result {}, fmt .Errorf ("AppliedWork %s is being deleted, waiting for the deletion to complete" , work .Name )
515+ }
516+
517+ // updateOwnerReference updates the AppliedWork owner reference in the manifest objects.
518+ // It changes the blockOwnerDeletion field to false, so that the AppliedWork can be deleted in cases where
519+ // the other owner references do not exist or are invalid.
520+ // https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/#owner-references-in-object-specifications
521+ func (r * Reconciler ) updateOwnerReference (ctx context.Context , work * fleetv1beta1.Work , appliedWork * fleetv1beta1.AppliedWork ) error {
522+ appliedWorkOwnerRef := & metav1.OwnerReference {
523+ APIVersion : fleetv1beta1 .GroupVersion .String (),
524+ Kind : "AppliedWork" ,
525+ Name : appliedWork .Name ,
526+ UID : appliedWork .UID ,
527+ }
528+
529+ if err := r .hubClient .Get (ctx , types.NamespacedName {Name : work .Name , Namespace : work .Namespace }, work ); err != nil {
530+ if apierrors .IsNotFound (err ) {
531+ klog .V (2 ).InfoS ("Work object not found, skipping owner reference update" , "work" , work .Name , "namespace" , work .Namespace )
532+ return nil
533+ }
534+ klog .ErrorS (err , "Failed to get Work object for owner reference update" , "work" , work .Name , "namespace" , work .Namespace )
535+ return controller .NewAPIServerError (false , err )
536+ }
537+
538+ for _ , cond := range work .Status .ManifestConditions {
539+ res := cond .Identifier
540+ gvr := schema.GroupVersionResource {
541+ Group : res .Group ,
542+ Version : res .Version ,
543+ Resource : res .Resource ,
544+ }
545+
546+ var obj * unstructured.Unstructured
547+ var err error
548+ if obj , err = r .spokeDynamicClient .Resource (gvr ).Namespace (res .Namespace ).Get (ctx , res .Name , metav1.GetOptions {}); err != nil {
549+ if apierrors .IsNotFound (err ) {
550+ continue
551+ }
552+ klog .ErrorS (err , "Failed to get manifest" , "gvr" , gvr , "name" , res .Name , "namespace" , res .Namespace )
553+ return err
554+ }
555+ // Check if there is more than one owner reference. If there is only one owner reference, it is the appliedWork itself.
556+ // Otherwise, at least one other owner reference exists, and we need to leave resource alone.
557+ if len (obj .GetOwnerReferences ()) > 1 {
558+ ownerRefs := obj .GetOwnerReferences ()
559+ updated := false
560+ for idx := range ownerRefs {
561+ if areOwnerRefsEqual (& ownerRefs [idx ], appliedWorkOwnerRef ) {
562+ ownerRefs [idx ].BlockOwnerDeletion = ptr .To (false )
563+ updated = true
564+ }
565+ }
566+ if updated {
567+ obj .SetOwnerReferences (ownerRefs )
568+ if _ , err = r .spokeDynamicClient .Resource (gvr ).Namespace (obj .GetNamespace ()).Update (ctx , obj , metav1.UpdateOptions {}); err != nil {
569+ klog .ErrorS (err , "Failed to update manifest owner references" , "gvr" , gvr , "name" , res .Name , "namespace" , res .Namespace )
570+ return err
571+ }
572+ klog .V (4 ).InfoS ("Patched manifest owner references" , "gvr" , gvr , "name" , res .Name , "namespace" , res .Namespace )
573+ }
574+ }
575+ }
576+ return nil
577+ }
578+
579+ // removeWorkFinalizer removes the finalizer from the work and updates it in the hub.
580+ func (r * Reconciler ) removeWorkFinalizer (ctx context.Context , work * fleetv1beta1.Work ) (ctrl.Result , error ) {
581+ controllerutil .RemoveFinalizer (work , fleetv1beta1 .WorkFinalizer )
511582 if err := r .hubClient .Update (ctx , work , & client.UpdateOptions {}); err != nil {
512583 klog .ErrorS (err , "Failed to remove the finalizer from the work" , "work" , klog .KObj (work ))
513584 return ctrl.Result {}, controller .NewAPIServerError (false , err )
0 commit comments