Skip to content

Commit 2347b2f

Browse files
authored
Timeout (#136)
* add processing timeout * tweak backoff * cleanup tests * add docs
1 parent 5cf9db1 commit 2347b2f

File tree

11 files changed

+304
-166
lines changed

11 files changed

+304
-166
lines changed

internal/backoff/backoff.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@ type Backoff struct {
2222
func NewBackoff(maxDelay time.Duration) *Backoff {
2323
return &Backoff{
2424
activities: make(map[any]any),
25-
// resulting per-item backoff is the maximum of a 300-times-20ms-then-maxDelay per-item limiter,
26-
// and an overall 10-per-second-burst-20 bucket limiter;
27-
// as a consequence, we have up to 20 almost immediate retries, then a phase of 10 retries per seconnd
28-
// for approximately 30s, and then slow retries at the rate given by maxDelay
25+
// resulting per-item backoff is the maximum of a 200-times-50ms-then-maxDelay per-item limiter,
26+
// and an overall 5-per-second-burst-20 bucket limiter;
27+
// as a consequence, we have up to
28+
// - up to 20 almost immediate retries
29+
// - then then a phase of 5 guaranteed retries per seconnd (could be more if burst capacity is refilled
30+
// because of the duration of the reconcile logic execution itself)
31+
// - finally (after 200 iterations) slow retries at the rate given by maxDelay
2932
limiter: workqueue.NewMaxOfRateLimiter(
30-
workqueue.NewItemFastSlowRateLimiter(20*time.Millisecond, maxDelay, 300),
31-
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(10), 20)},
33+
workqueue.NewItemFastSlowRateLimiter(50*time.Millisecond, maxDelay, 200),
34+
&workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(5), 20)},
3235
),
3336
}
3437
}

pkg/component/component.go

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ import (
1111
"reflect"
1212
"time"
1313

14-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15-
1614
"github.com/sap/component-operator-runtime/internal/walk"
1715
)
1816

@@ -88,7 +86,18 @@ func assertRetryConfiguration[T Component](component T) (RetryConfiguration, boo
8886
return nil, false
8987
}
9088

91-
// Calculate digest of given component, honoring annotations, spec, and references
89+
// Check if given component or its spec implements TimeoutConfiguration (and return it).
90+
func assertTimeoutConfiguration[T Component](component T) (TimeoutConfiguration, bool) {
91+
if timeoutConfiguration, ok := Component(component).(TimeoutConfiguration); ok {
92+
return timeoutConfiguration, true
93+
}
94+
if timeoutConfiguration, ok := getSpec(component).(TimeoutConfiguration); ok {
95+
return timeoutConfiguration, true
96+
}
97+
return nil, false
98+
}
99+
100+
// Calculate digest of given component, honoring annotations, spec, and references.
92101
func calculateComponentDigest[T Component](component T) string {
93102
digestData := make(map[string]any)
94103
spec := getSpec(component)
@@ -120,8 +129,7 @@ func calculateComponentDigest[T Component](component T) string {
120129
// note: this panic is ok because walk.Walk() only produces errors if the given walker function raises any (which ours here does not do)
121130
panic("this cannot happen")
122131
}
123-
// note: this must() is ok because digestData should contain only serializable stuff
124-
return sha256hex(must(json.Marshal(digestData)))
132+
return calculateDigest(digestData)
125133
}
126134

127135
// Implement the PlacementConfiguration interface.
@@ -178,46 +186,62 @@ func (s *Status) IsReady() bool {
178186
return s.State == StateReady
179187
}
180188

181-
// Get state (and related details).
182-
func (s *Status) GetState() (State, string, string) {
183-
var cond *Condition
189+
// Implement the TimeoutConfiguration interface.
190+
func (s *TimeoutSpec) GetTimeout() time.Duration {
191+
if s.Timeout != nil {
192+
return s.Timeout.Duration
193+
}
194+
return time.Duration(0)
195+
}
196+
197+
// Get condition (and return nil if not existing).
198+
// Caveat: the returned pointer might become invalid if further appends happen to the Conditions slice in the status object.
199+
func (s *Status) getCondition(condType ConditionType) *Condition {
184200
for i := 0; i < len(s.Conditions); i++ {
185-
if s.Conditions[i].Type == ConditionTypeReady {
186-
cond = &s.Conditions[i]
187-
break
201+
if s.Conditions[i].Type == condType {
202+
return &s.Conditions[i]
188203
}
189204
}
190-
if cond == nil {
191-
return s.State, "", ""
192-
}
193-
return s.State, cond.Reason, cond.Message
205+
return nil
194206
}
195207

196-
// Set state and ready condition in status (according to the state value provided),
197-
func (s *Status) SetState(state State, reason string, message string) {
208+
// Get condition (adding it with initial values if not existing).
209+
// Caveat: the returned pointer might become invalid if further appends happen to the Conditions slice in the status object.
210+
func (s *Status) getOrAddCondition(condType ConditionType) *Condition {
198211
var cond *Condition
199212
for i := 0; i < len(s.Conditions); i++ {
200-
if s.Conditions[i].Type == ConditionTypeReady {
213+
if s.Conditions[i].Type == condType {
201214
cond = &s.Conditions[i]
202215
break
203216
}
204217
}
205218
if cond == nil {
206-
s.Conditions = append(s.Conditions, Condition{Type: ConditionTypeReady})
219+
s.Conditions = append(s.Conditions, Condition{Type: condType, Status: ConditionUnknown})
207220
cond = &s.Conditions[len(s.Conditions)-1]
208221
}
209-
var status ConditionStatus
222+
return cond
223+
}
224+
225+
// Get state (and related details).
226+
func (s *Status) GetState() (State, string, string) {
227+
cond := s.getCondition(ConditionTypeReady)
228+
if cond == nil {
229+
return s.State, "", ""
230+
}
231+
return s.State, cond.Reason, cond.Message
232+
}
233+
234+
// Set state and ready condition in status (according to the state value provided).
235+
// Note: this method does not touch the condition's LastTransitionTime.
236+
func (s *Status) SetState(state State, reason string, message string) {
237+
cond := s.getOrAddCondition(ConditionTypeReady)
210238
switch state {
211239
case StateReady:
212-
status = ConditionTrue
240+
cond.Status = ConditionTrue
213241
case StateError:
214-
status = ConditionFalse
242+
cond.Status = ConditionFalse
215243
default:
216-
status = ConditionUnknown
217-
}
218-
if status != cond.Status {
219-
cond.Status = status
220-
cond.LastTransitionTime = ref(metav1.Now())
244+
cond.Status = ConditionUnknown
221245
}
222246
cond.Reason = reason
223247
cond.Message = message

pkg/component/reconciler.go

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ const (
6161
readyConditionReasonProcessing = "Processing"
6262
readyConditionReasonReady = "Ready"
6363
readyConditionReasonError = "Error"
64+
readyConditionReasonTimeout = "Timeout"
6465
readyConditionReasonDeletionPending = "DeletionPending"
6566
readyConditionReasonDeletionBlocked = "DeletionBlocked"
6667
readyConditionReasonDeletionProcessing = "DeletionProcessing"
@@ -169,11 +170,7 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
169170
}
170171
component.GetObjectKind().SetGroupVersionKind(r.groupVersionKind)
171172

172-
// convenience accessors
173-
status := component.GetStatus()
174-
savedStatus := status.DeepCopy()
175-
176-
// requeue/retry interval
173+
// fetch requeue interval, retry interval and timeout
177174
requeueInterval := time.Duration(0)
178175
if requeueConfiguration, ok := assertRequeueConfiguration(component); ok {
179176
requeueInterval = requeueConfiguration.GetRequeueInterval()
@@ -188,6 +185,17 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
188185
if retryInterval == 0 {
189186
retryInterval = requeueInterval
190187
}
188+
timeout := time.Duration(0)
189+
if timeoutConfiguration, ok := assertTimeoutConfiguration(component); ok {
190+
timeout = timeoutConfiguration.GetTimeout()
191+
}
192+
if timeout == 0 {
193+
timeout = requeueInterval
194+
}
195+
196+
// convenience accessors
197+
status := component.GetStatus()
198+
savedStatus := status.DeepCopy()
191199

192200
// always attempt to update the status
193201
skipStatusUpdate := false
@@ -197,11 +205,27 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
197205
// re-panic in order skip the remaining steps
198206
panic(r)
199207
}
208+
209+
status.ObservedGeneration = component.GetGeneration()
210+
200211
if status.State == StateReady || err != nil {
212+
// clear backoff if state is ready (obviously) or if there is an error;
213+
// even is the error is a RetriableError which will be turned into a non-error;
214+
// this is correct, because in that case, the RequeueAfter will be determined through the RetriableError
201215
r.backoff.Forget(req)
202216
}
203-
status.ObservedGeneration = component.GetGeneration()
217+
if status.State != StateProcessing || err != nil {
218+
// clear ProcessingDigest and ProcessingSince in all non-error cases where state is StateProcessing
219+
status.ProcessingDigest = ""
220+
status.ProcessingSince = nil
221+
}
222+
if status.State == StateProcessing && now.Sub(status.ProcessingSince.Time) >= timeout {
223+
// TODO: maybe it would be better to have a dedicated StateTimeout?
224+
status.SetState(StateError, readyConditionReasonTimeout, "Reconcilation of dependent resources timed out")
225+
}
226+
204227
if err != nil {
228+
// convert retriable errors into non-errors (Pending or DeletionPending state), and return specified or default backoff
205229
retriableError := &types.RetriableError{}
206230
if errors.As(err, retriableError) {
207231
retryAfter := retriableError.RetryAfter()
@@ -220,10 +244,12 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
220244
status.SetState(StateError, readyConditionReasonError, err.Error())
221245
}
222246
}
247+
223248
if result.RequeueAfter > 0 {
224249
// add jitter of 1-5 percent to RequeueAfter
225250
addJitter(&result.RequeueAfter, 1, 5)
226251
}
252+
227253
log.V(1).Info("reconcile done", "withError", err != nil, "requeue", result.Requeue || result.RequeueAfter > 0, "requeueAfter", result.RequeueAfter.String())
228254
if err != nil {
229255
if status, ok := err.(apierrors.APIStatus); ok || errors.As(err, &status) {
@@ -232,22 +258,34 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
232258
metrics.ReconcileErrors.WithLabelValues(r.controllerName, "other").Inc()
233259
}
234260
}
235-
// TODO: should we move this behind the DeepEqual check below?
236-
// note: it seems that no events will be written if the component's namespace is in deletion
261+
262+
// TODO: should we move this behind the DeepEqual check below to avoid noise?
263+
// also note: it seems that no events will be written if the component's namespace is in deletion
237264
state, reason, message := status.GetState()
238265
if state == StateError {
239266
r.client.EventRecorder().Event(component, corev1.EventTypeWarning, reason, message)
240267
} else {
241268
r.client.EventRecorder().Event(component, corev1.EventTypeNormal, reason, message)
242269
}
270+
243271
if skipStatusUpdate {
244272
return
245273
}
246274
if reflect.DeepEqual(status, savedStatus) {
247275
return
248276
}
249-
// note: it's crucial to set the following timestamp late (otherwise the DeepEqual() check before would always be false)
277+
278+
// note: it's crucial to set the following timestamps late (otherwise the DeepEqual() check above would always be false)
279+
// on the other hand it's a bit weird, because LastObservedAt will not be updated if no other changes have happened to the status;
280+
// and same for the conditions' LastTransitionTime timestamps;
281+
// maybe we should remove this optimization, and always do the Update() call
250282
status.LastObservedAt = &now
283+
for i := 0; i < len(status.Conditions); i++ {
284+
cond := &status.Conditions[i]
285+
if savedCond := savedStatus.getCondition(cond.Type); savedCond == nil || cond.Status != savedCond.Status {
286+
cond.LastTransitionTime = &now
287+
}
288+
}
251289
if updateErr := r.client.Status().Update(ctx, component, client.FieldOwner(r.name)); updateErr != nil {
252290
err = utilerrors.NewAggregate([]error{err, updateErr})
253291
result = ctrl.Result{}
@@ -256,7 +294,7 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
256294

257295
// set a first status (and requeue, because the status update itself will not trigger another reconciliation because of the event filter set)
258296
if status.ObservedGeneration <= 0 {
259-
status.SetState(StateProcessing, readyConditionReasonNew, "First seen")
297+
status.SetState(StatePending, readyConditionReasonNew, "First seen")
260298
return ctrl.Result{Requeue: true}, nil
261299
}
262300

@@ -301,7 +339,8 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
301339
return ctrl.Result{}, errors.Wrap(err, "error adding finalizer")
302340
}
303341
// trigger another round trip
304-
// this is necessary because the update call invalidates potential changes done by the post-read hook above
342+
// this is necessary because the update call invalidates potential changes done to the component by the post-read
343+
// hook above; this means, not to the object itself, but for example to loaded secrets or config maps;
305344
// in the following round trip, the finalizer will already be there, and the update will not happen again
306345
return ctrl.Result{Requeue: true}, nil
307346
}
@@ -312,7 +351,7 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
312351
return ctrl.Result{}, errors.Wrapf(err, "error running pre-reconcile hook (%d)", hookOrder)
313352
}
314353
}
315-
ok, err := target.Apply(ctx, component)
354+
ok, digest, err := target.Apply(ctx, component)
316355
if err != nil {
317356
log.V(1).Info("error while reconciling dependent resources")
318357
return ctrl.Result{}, errors.Wrap(err, "error reconciling dependent resources")
@@ -324,16 +363,21 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
324363
}
325364
}
326365
log.V(1).Info("all dependent resources successfully reconciled")
327-
status.SetState(StateReady, readyConditionReasonReady, "Dependent resources successfully reconciled")
328366
status.AppliedGeneration = component.GetGeneration()
329367
status.LastAppliedAt = &now
368+
status.SetState(StateReady, readyConditionReasonReady, "Dependent resources successfully reconciled")
330369
return ctrl.Result{RequeueAfter: requeueInterval}, nil
331370
} else {
332371
log.V(1).Info("not all dependent resources successfully reconciled")
333-
status.SetState(StateProcessing, readyConditionReasonProcessing, "Reconcilation of dependent resources triggered; waiting until all dependent resources are ready")
372+
if digest != status.ProcessingDigest {
373+
status.ProcessingDigest = digest
374+
status.ProcessingSince = &now
375+
r.backoff.Forget(req)
376+
}
334377
if !reflect.DeepEqual(status.Inventory, savedStatus.Inventory) {
335378
r.backoff.Forget(req)
336379
}
380+
status.SetState(StateProcessing, readyConditionReasonProcessing, "Reconcilation of dependent resources triggered; waiting until all dependent resources are ready")
337381
return ctrl.Result{RequeueAfter: r.backoff.Next(req, readyConditionReasonProcessing)}, nil
338382
}
339383
} else {
@@ -352,16 +396,16 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
352396
log.V(1).Info("deletion not allowed")
353397
// TODO: have an additional StateDeletionBlocked?
354398
// TODO: eliminate this msg logic
355-
status.SetState(StateDeleting, readyConditionReasonDeletionBlocked, "Deletion blocked: "+msg)
356399
r.client.EventRecorder().Event(component, corev1.EventTypeNormal, readyConditionReasonDeletionBlocked, "Deletion blocked: "+msg)
400+
status.SetState(StateDeleting, readyConditionReasonDeletionBlocked, "Deletion blocked: "+msg)
357401
return ctrl.Result{RequeueAfter: 1*time.Second + r.backoff.Next(req, readyConditionReasonDeletionBlocked)}, nil
358402
}
359403
if len(slices.Remove(component.GetFinalizers(), r.name)) > 0 {
360404
// deletion is blocked because of foreign finalizers
361405
log.V(1).Info("deleted blocked due to existence of foreign finalizers")
362406
// TODO: have an additional StateDeletionBlocked?
363-
status.SetState(StateDeleting, readyConditionReasonDeletionBlocked, "Deletion blocked due to existing foreign finalizers")
364407
r.client.EventRecorder().Event(component, corev1.EventTypeNormal, readyConditionReasonDeletionBlocked, "Deletion blocked due to existing foreign finalizers")
408+
status.SetState(StateDeleting, readyConditionReasonDeletionBlocked, "Deletion blocked due to existing foreign finalizers")
365409
return ctrl.Result{RequeueAfter: 1*time.Second + r.backoff.Next(req, readyConditionReasonDeletionBlocked)}, nil
366410
}
367411
// deletion case
@@ -392,10 +436,10 @@ func (r *Reconciler[T]) Reconcile(ctx context.Context, req ctrl.Request) (result
392436
} else {
393437
// deletion triggered for dependent resources, but some are not yet gone
394438
log.V(1).Info("not all dependent resources are successfully deleted")
395-
status.SetState(StateDeleting, readyConditionReasonDeletionProcessing, "Deletion of dependent resources triggered; waiting until dependent resources are deleted")
396439
if !reflect.DeepEqual(status.Inventory, savedStatus.Inventory) {
397440
r.backoff.Forget(req)
398441
}
442+
status.SetState(StateDeleting, readyConditionReasonDeletionProcessing, "Deletion of dependent resources triggered; waiting until dependent resources are deleted")
399443
return ctrl.Result{RequeueAfter: r.backoff.Next(req, readyConditionReasonDeletionProcessing)}, nil
400444
}
401445
}

pkg/component/reference.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package component
77

88
import (
99
"context"
10-
"encoding/json"
1110
"fmt"
1211
"reflect"
1312
"strings"
@@ -69,8 +68,7 @@ func (r *ConfigMapReference) digest() string {
6968
if !r.loaded {
7069
return ""
7170
}
72-
// note: this must() is ok because marshalling map[string]string should always work
73-
return sha256hex(must(json.Marshal(r.data)))
71+
return calculateDigest(r.data)
7472
}
7573

7674
// Return the previously loaded configmap data.
@@ -176,8 +174,7 @@ func (r *SecretReference) digest() string {
176174
if !r.loaded {
177175
return ""
178176
}
179-
// note: this must() is ok because marshalling map[string][]byte should always work
180-
return sha256hex(must(json.Marshal(r.data)))
177+
return calculateDigest(r.data)
181178
}
182179

183180
// Return the previously loaded secret data.

0 commit comments

Comments
 (0)