Skip to content

Commit 0bcd7a6

Browse files
authored
feat: Add per-workspace backup status tracking with annotations (#1550)
Signed-off-by: Anatolii Bazko <abazko@redhat.com>
1 parent 8941606 commit 0bcd7a6

File tree

10 files changed

+694
-20
lines changed

10 files changed

+694
-20
lines changed

controllers/backupcronjob/backupcronjob_controller.go

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controllers
1818
import (
1919
"context"
2020
"reflect"
21+
"time"
2122

2223
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2324
"sigs.k8s.io/controller-runtime/pkg/reconcile"
@@ -40,6 +41,7 @@ import (
4041
"k8s.io/apimachinery/pkg/runtime"
4142
"k8s.io/utils/ptr"
4243
ctrl "sigs.k8s.io/controller-runtime"
44+
"sigs.k8s.io/controller-runtime/pkg/builder"
4345
"sigs.k8s.io/controller-runtime/pkg/client"
4446
"sigs.k8s.io/controller-runtime/pkg/event"
4547
"sigs.k8s.io/controller-runtime/pkg/handler"
@@ -93,7 +95,8 @@ func (r *BackupCronJobReconciler) SetupWithManager(mgr ctrl.Manager) error {
9395

9496
return ctrl.NewControllerManagedBy(mgr).
9597
Named("BackupCronJob").
96-
Watches(&controllerv1alpha1.DevWorkspaceOperatorConfig{},
98+
Watches(
99+
&controllerv1alpha1.DevWorkspaceOperatorConfig{},
97100
handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
98101
operatorNamespace, err := infrastructure.GetNamespace()
99102
// Ignore events from other namespaces
@@ -111,17 +114,22 @@ func (r *BackupCronJobReconciler) SetupWithManager(mgr ctrl.Manager) error {
111114
},
112115
}
113116
}),
117+
builder.WithPredicates(configPredicate),
118+
).
119+
Watches(
120+
&batchv1.Job{},
121+
r.getBackupJobEventHandler(),
122+
builder.WithPredicates(r.getBackupJobPredicate()),
114123
).
115-
WithEventFilter(configPredicate).
116124
Complete(r)
117125
}
118126

119127
// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list
120128
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;create;update;patch;delete
121129
// +kubebuilder:rbac:groups="",resources=serviceaccounts;,verbs=get;list;create;update;patch;delete
122-
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;create;update;patch;delete
130+
// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;create;update;patch;delete;watch
123131
// +kubebuilder:rbac:groups=controller.devfile.io,resources=devworkspaceoperatorconfigs,verbs=get;list;update;patch;watch
124-
// +kubebuilder:rbac:groups=workspace.devfile.io,resources=devworkspaces,verbs=get;list
132+
// +kubebuilder:rbac:groups=workspace.devfile.io,resources=devworkspaces,verbs=get;list;update;patch
125133
// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;create;update;patch;delete
126134
// +kubebuilder:rbac:groups="",resources=builds,verbs=get
127135
// +kubebuilder:rbac:groups="",resources=builds/details,verbs=update
@@ -215,7 +223,7 @@ func (r *BackupCronJobReconciler) stopCron(log logr.Logger) {
215223
}
216224

217225
// executeBackupSync executes the backup job for all DevWorkspaces in the cluster that
218-
// have been stopped in the last N minutes.
226+
// have been stopped since their last backup.
219227
func (r *BackupCronJobReconciler) executeBackupSync(ctx context.Context, dwOperatorConfig *controllerv1alpha1.DevWorkspaceOperatorConfig, log logr.Logger) error {
220228
log.Info("Executing backup sync for all DevWorkspaces")
221229

@@ -264,31 +272,67 @@ func (r *BackupCronJobReconciler) executeBackupSync(ctx context.Context, dwOpera
264272
return nil
265273
}
266274

267-
// wasStoppedSinceLastBackup checks if the DevWorkspace was stopped since the last backup time.
268-
func (r *BackupCronJobReconciler) wasStoppedSinceLastBackup(workspace *dw.DevWorkspace, lastBackupTime *metav1.Time, log logr.Logger) bool {
269-
if workspace.Status.Phase != dw.DevWorkspaceStatusStopped {
275+
// wasStoppedSinceLastBackup checks if the DevWorkspace was stopped since its last backup.
276+
// It reads the last backup time from the DevWorkspace annotation, or falls back to the
277+
// provided globalLastBackupTime if the annotation doesn't exist.
278+
func (r *BackupCronJobReconciler) wasStoppedSinceLastBackup(
279+
devWorkspace *dw.DevWorkspace,
280+
globalLastBackupTime *metav1.Time,
281+
log logr.Logger,
282+
) bool {
283+
if devWorkspace.Status.Phase != dw.DevWorkspaceStatusStopped {
270284
return false
271285
}
272-
log.Info("DevWorkspace is currently stopped, checking if it was stopped since last backup", "namespace", workspace.Namespace, "name", workspace.Name)
273-
// Check if the workspace was stopped in the last N minutes
274-
if workspace.Status.Conditions != nil {
286+
log.Info("DevWorkspace is currently stopped, checking if it was stopped since last backup", "namespace", devWorkspace.Namespace, "name", devWorkspace.Name)
287+
288+
var lastBackupFinishedAt *metav1.Time
289+
var lastBackupSuccessful bool
290+
291+
// Get the last backup time and success status from the workspace annotations
292+
if devWorkspace.Annotations != nil {
293+
if lastBackupFinishedAtStr, ok := devWorkspace.Annotations[constants.DevWorkspaceLastBackupFinishedAtAnnotation]; ok {
294+
parsedTime, err := time.Parse(time.RFC3339Nano, lastBackupFinishedAtStr)
295+
if err != nil {
296+
log.Error(err, "Failed to parse last backup time annotation, treating as no previous backup", "value", lastBackupFinishedAtStr)
297+
} else {
298+
lastBackupFinishedAt = &metav1.Time{Time: parsedTime}
299+
}
300+
}
301+
302+
lastBackupSuccessful = devWorkspace.Annotations[constants.DevWorkspaceLastBackupSuccessfulAnnotation] == "true"
303+
}
304+
305+
if lastBackupFinishedAt == nil {
306+
// Fall back to globalLastBackupTime if annotation doesn't exist
307+
lastBackupFinishedAt = globalLastBackupTime
308+
lastBackupSuccessful = true
309+
}
310+
311+
if lastBackupFinishedAt == nil {
312+
return true
313+
}
314+
315+
if !lastBackupSuccessful {
316+
return true
317+
}
318+
319+
// Check if the workspace was stopped since the last successful backup
320+
if devWorkspace.Status.Conditions != nil {
275321
lastTimeStopped := metav1.Time{}
276-
for _, condition := range workspace.Status.Conditions {
322+
for _, condition := range devWorkspace.Status.Conditions {
277323
if condition.Type == conditions.Started && condition.Status == corev1.ConditionFalse {
278324
lastTimeStopped = condition.LastTransitionTime
279325
}
280326
}
327+
281328
if !lastTimeStopped.IsZero() {
282-
if lastBackupTime == nil {
283-
// No previous backup, so consider it stopped since last backup
284-
return true
285-
}
286-
if lastTimeStopped.Time.After(lastBackupTime.Time) {
287-
log.Info("DevWorkspace was stopped since last backup", "namespace", workspace.Namespace, "name", workspace.Name)
329+
if lastTimeStopped.Time.After(lastBackupFinishedAt.Time) {
330+
log.Info("DevWorkspace was stopped since last successful backup", "namespace", devWorkspace.Namespace, "name", devWorkspace.Name)
288331
return true
289332
}
290333
}
291334
}
335+
292336
return false
293337
}
294338

@@ -336,6 +380,7 @@ func (r *BackupCronJobReconciler) createBackupJob(
336380
Namespace: workspace.Namespace,
337381
Labels: map[string]string{
338382
constants.DevWorkspaceIDLabel: dwID,
383+
constants.DevWorkspaceNameLabel: workspace.Name,
339384
constants.DevWorkspaceBackupJobLabel: "true",
340385
},
341386
},
@@ -532,7 +577,7 @@ func (r *BackupCronJobReconciler) copySecret(ctx context.Context, workspace *dw.
532577
}
533578
err = r.Create(ctx, namespaceSecret)
534579
if err == nil {
535-
log.Info("Sucesfully created secret", "name", namespaceSecret.Name, "namespace", workspace.Namespace)
580+
log.Info("Successfully created secret", "name", namespaceSecret.Name, "namespace", workspace.Namespace)
536581
}
537582
return namespaceSecret, err
538583
}

0 commit comments

Comments
 (0)