@@ -117,18 +117,20 @@ type AtomicRelease struct {
117117 strategy releaseStrategy
118118 fieldManager string
119119 disallowedFieldManagers []string
120+ defaultToRetryOnFailure bool
120121}
121122
122123// NewAtomicRelease returns a new AtomicRelease reconciler configured with the
123124// provided values.
124- func NewAtomicRelease (patchHelper * patch.SerialPatcher , cfg * action.ConfigFactory , recorder record.EventRecorder , fieldManager string , disallowedFieldManagers []string ) * AtomicRelease {
125+ func NewAtomicRelease (patchHelper * patch.SerialPatcher , cfg * action.ConfigFactory , recorder record.EventRecorder , fieldManager string , disallowedFieldManagers []string , defaultToRetryOnFailure bool ) * AtomicRelease {
125126 return & AtomicRelease {
126127 patchHelper : patchHelper ,
127128 eventRecorder : recorder ,
128129 configFactory : cfg ,
129130 strategy : & cleanReleaseStrategy {},
130131 fieldManager : fieldManager ,
131132 disallowedFieldManagers : disallowedFieldManagers ,
133+ defaultToRetryOnFailure : defaultToRetryOnFailure ,
132134 }
133135}
134136
@@ -230,7 +232,7 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
230232 fmt .Sprintf ("instructed to stop before running %s action reconciler %s" , next .Type (), next .Name ()),
231233 )
232234
233- if retry := req .Object .GetActiveRetry (); retry != nil {
235+ if retry := req .Object .GetActiveRetry (r . defaultToRetryOnFailure ); retry != nil {
234236 conditions .MarkReconciling (req .Object , meta .ProgressingWithRetryReason , "retrying after %s" , retry .GetRetryInterval ().String ())
235237 return ErrRetryAfterInterval
236238 }
@@ -270,7 +272,7 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
270272 fmt .Sprintf ("action reconciler %s of type %s returned error: %s" , next .Name (), next .Type (), err ),
271273 )
272274
273- if retry := req .Object .GetActiveRetry (); retry != nil {
275+ if retry := req .Object .GetActiveRetry (r . defaultToRetryOnFailure ); retry != nil {
274276 log .Error (err , fmt .Sprintf ("failed to run '%s' action" , next .Name ()))
275277 conditions .MarkReconciling (req .Object , meta .ProgressingWithRetryReason , "retrying after %s" , retry .GetRetryInterval ().String ())
276278 return ErrRetryAfterInterval
@@ -288,7 +290,7 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
288290 "instructed to stop after running %s action reconciler %s" , next .Type (), next .Name ()),
289291 )
290292
291- if retry := req .Object .GetActiveRetry (); retry != nil {
293+ if retry := req .Object .GetActiveRetry (r . defaultToRetryOnFailure ); retry != nil {
292294 conditions .MarkReconciling (req .Object , meta .ProgressingWithRetryReason , "retrying after %s" , retry .GetRetryInterval ().String ())
293295 return ErrRetryAfterInterval
294296 }
@@ -331,7 +333,7 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
331333 case ReleaseStatusInSync :
332334 log .Info ("release in-sync with desired state" )
333335
334- if retry := req .Object .GetActiveRetry (); retry != nil {
336+ if retry := req .Object .GetActiveRetry (r . defaultToRetryOnFailure ); retry != nil {
335337 req .Object .Status .History .TruncateIgnoringPreviousSnapshots ()
336338 } else {
337339 // Remove all history up to the previous release action.
@@ -347,7 +349,7 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
347349
348350 if forceRequested {
349351 log .Info (msgWithReason ("forcing upgrade for in-sync release" , "force requested through annotation" ))
350- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
352+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
351353 }
352354
353355 // Since the release is in-sync, remove any remediated condition if
@@ -389,33 +391,33 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
389391 if req .Object .GetInstall ().GetRemediation ().RetriesExhausted (req .Object ) {
390392 if forceRequested {
391393 log .Info (msgWithReason ("forcing install while out of retries" , "force requested through annotation" ))
392- return NewInstall (r .configFactory , r .eventRecorder ), nil
394+ return NewInstall (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
393395 }
394396
395397 return nil , fmt .Errorf ("%w: cannot install release" , ErrExceededMaxRetries )
396398 }
397399
398- return NewInstall (r .configFactory , r .eventRecorder ), nil
400+ return NewInstall (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
399401 case ReleaseStatusUnmanaged :
400402 log .Info (msgWithReason ("release not managed by controller" , state .Reason ))
401403
402404 // Clear the history as we can no longer rely on it.
403405 req .Object .Status .ClearHistory ()
404406
405- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
407+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
406408 case ReleaseStatusOutOfSync :
407409 log .Info (msgWithReason ("release out-of-sync with desired state" , state .Reason ))
408410
409411 if req .Object .GetUpgrade ().GetRemediation ().RetriesExhausted (req .Object ) {
410412 if forceRequested {
411413 log .Info (msgWithReason ("forcing upgrade while out of retries" , "force requested through annotation" ))
412- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
414+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
413415 }
414416
415417 return nil , fmt .Errorf ("%w: cannot upgrade release" , ErrExceededMaxRetries )
416418 }
417419
418- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
420+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
419421 case ReleaseStatusDrifted :
420422 log .Info (msgWithReason ("detected changes in cluster state" , diff .SummarizeDiffSetBrief (state .Diff )))
421423 for _ , change := range state .Diff {
@@ -467,11 +469,11 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
467469
468470 // If the action strategy is to retry (and not remediate), we behave just like
469471 // "flux reconcile hr --force" and .spec.<action>.remediation.retries set to 0.
470- if req .Object .GetActiveRetry () != nil {
472+ if req .Object .GetActiveRetry (r . defaultToRetryOnFailure ) != nil {
471473 req .Object .Status .History .TruncateIgnoringPreviousSnapshots ()
472474
473475 log .V (logger .DebugLevel ).Info ("retrying upgrade for failed release" )
474- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
476+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
475477 }
476478
477479 remediation := req .Object .GetActiveRemediation ()
@@ -480,7 +482,7 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
480482 // upgrade the release to see if that fixes the problem.
481483 if remediation == nil {
482484 log .V (logger .DebugLevel ).Info ("no active remediation strategy" )
483- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
485+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
484486 }
485487
486488 // If there is no failure count, the conditions under which the failure
@@ -490,14 +492,14 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
490492 // attempted again.
491493 if remediation .GetFailureCount (req .Object ) <= 0 {
492494 log .Info ("release conditions have changed since last failure" )
493- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
495+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
494496 }
495497
496498 // If the force annotation is set, we can attempt to upgrade the release
497499 // without any further checks.
498500 if forceRequested {
499501 log .Info (msgWithReason ("forcing upgrade for failed release" , "force requested through annotation" ))
500- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
502+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
501503 }
502504
503505 // We have exhausted the number of retries for the remediation
@@ -526,7 +528,7 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
526528 // If the rollback target is in any way corrupt,
527529 // the most correct remediation is to reattempt the upgrade.
528530 log .Info (msgWithReason ("unable to verify previous release in storage to roll back to" , err .Error ()))
529- return NewUpgrade (r .configFactory , r .eventRecorder ), nil
531+ return NewUpgrade (r .configFactory , r .eventRecorder , r . defaultToRetryOnFailure ), nil
530532 }
531533
532534 // This may be a temporary error, return it to retry.
0 commit comments