Skip to content

Commit a00f81c

Browse files
committed
feat: Allow device reboot/reset/reprovisioning through annotation
1 parent 191a009 commit a00f81c

File tree

8 files changed

+292
-68
lines changed

8 files changed

+292
-68
lines changed

api/core/v1alpha1/groupversion_info.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,23 @@ const L2VNILabel = "networking.metal.ironcore.dev/evi-name"
6363
// the name of the VRF they belong to.
6464
const VRFLabel = "networking.metal.ironcore.dev/vrf-name"
6565

66+
// DeviceMaintenanceAnnotation is an annotation that can be applied to Device objects
67+
// to trigger certain disruptive operations, such as reboots or firmware upgrades.
68+
const DeviceMaintenanceAnnotation = "networking.metal.ironcore.dev/maintenance"
69+
70+
// Device maintenance actions that can be requested via the DeviceMaintenanceAnnotation.
71+
const (
72+
// DeviceMaintenanceReboot requests a device reboot.
73+
DeviceMaintenanceReboot = "reboot"
74+
// DeviceMaintenanceFactoryReset requests a factory reset of the device. A factory reset
75+
// will erase all configuration and return the device to its original state.
76+
DeviceMaintenanceFactoryReset = "factory-reset"
77+
// DeviceMaintenanceReprovision requests reprovisioning of the device, without completely resetting it.
78+
DeviceMaintenanceReprovision = "reprovision"
79+
// DeviceMaintenanceResetPhaseToProvisioning requests resetting the device's maintenance phase to "provisioning" without rebooting or preparing the device.
80+
DeviceMaintenanceResetPhaseToProvisioning = "reset-phase-to-provisioning"
81+
)
82+
6683
// Condition types that are used across different objects.
6784
const (
6885
// ReadyCondition is the top-level status condition that reports if an object is ready.

config/default/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ resources:
2424
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
2525
- ../certmanager
2626
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
27-
#- ../prometheus
27+
- ../prometheus
2828
# [METRICS] Expose the controller manager metrics service.
2929
- metrics_service.yaml
3030
# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.

internal/controller/core/device_controller.go

Lines changed: 113 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,33 @@ func (r *DeviceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ c
8686
return ctrl.Result{}, err
8787
}
8888

89+
prov, ok := r.Provider().(provider.DeviceProvider)
90+
if !ok {
91+
err := errors.New("provider does not implement DeviceProvider interface")
92+
log.Error(err, "failed to reconcile resource")
93+
return ctrl.Result{}, err
94+
}
95+
conn, err := deviceutil.GetDeviceConnection(ctx, r, obj)
96+
if err != nil {
97+
return ctrl.Result{}, fmt.Errorf("failed to obtain device connection: %w", err)
98+
}
99+
89100
orig := obj.DeepCopy()
101+
90102
if conditions.InitializeConditions(obj, v1alpha1.ReadyCondition) {
91103
log.Info("Initializing status conditions")
92104
return ctrl.Result{}, r.Status().Update(ctx, obj)
93105
}
94106

95-
// Always attempt to update the status after reconciliation
107+
// Always attempt to update the metadata/status after reconciliation
96108
defer func() {
109+
if !equality.Semantic.DeepEqual(orig.ObjectMeta, obj.ObjectMeta) {
110+
// pass obj.DeepCopy() to avoid Patch() modifying obj and interfering with status update below
111+
if err := r.Patch(ctx, obj.DeepCopy(), client.MergeFrom(orig)); err != nil {
112+
log.Error(err, "Failed to update resource metadata")
113+
reterr = kerrors.NewAggregate([]error{reterr, err})
114+
}
115+
}
97116
if !equality.Semantic.DeepEqual(orig.Status, obj.Status) {
98117
if err := r.Status().Patch(ctx, obj, client.MergeFrom(orig)); err != nil {
99118
log.Error(err, "Failed to update status")
@@ -130,6 +149,9 @@ func (r *DeviceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ c
130149
return ctrl.Result{}, nil
131150

132151
case v1alpha1.DevicePhaseProvisioning:
152+
annotations := obj.GetAnnotations()
153+
delete(annotations, v1alpha1.DeviceMaintenanceAnnotation)
154+
obj.SetAnnotations(annotations)
133155
activeProv := obj.GetActiveProvisioning()
134156
if activeProv == nil {
135157
log.Info("Device has not made a provisioning request yet")
@@ -157,10 +179,6 @@ func (r *DeviceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ c
157179
}
158180
log.Info("Device provisioning completed, running post provisioning checks")
159181
prov, _ := r.Provider().(provider.ProvisioningProvider)
160-
conn, err := deviceutil.GetDeviceConnection(ctx, r, obj)
161-
if err != nil {
162-
return ctrl.Result{}, fmt.Errorf("failed to obtain device connection: %w", err)
163-
}
164182
if ok := prov.VerifyProvisioned(ctx, conn, obj); !ok {
165183
return ctrl.Result{RequeueAfter: r.RequeueInterval}, nil
166184
}
@@ -169,7 +187,7 @@ func (r *DeviceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ c
169187
obj.Status.Phase = v1alpha1.DevicePhaseRunning
170188

171189
case v1alpha1.DevicePhaseRunning:
172-
if err := r.reconcile(ctx, obj); err != nil {
190+
if err := r.reconcile(ctx, obj, prov, conn); err != nil {
173191
log.Error(err, "Failed to reconcile resource")
174192
return ctrl.Result{}, err
175193
}
@@ -187,6 +205,10 @@ func (r *DeviceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ c
187205
obj.Status.Phase = v1alpha1.DevicePhaseRunning
188206
}
189207

208+
if err := r.reconcileMaintenance(ctx, obj, prov, conn); err != nil {
209+
return ctrl.Result{}, err
210+
}
211+
190212
return ctrl.Result{}, nil
191213
}
192214

@@ -231,73 +253,67 @@ func (r *DeviceReconciler) SetupWithManager(mgr ctrl.Manager) error {
231253
Complete(r)
232254
}
233255

234-
func (r *DeviceReconciler) reconcile(ctx context.Context, device *v1alpha1.Device) (reterr error) {
235-
if prov, ok := r.Provider().(provider.DeviceProvider); ok {
236-
conn, err := deviceutil.GetDeviceConnection(ctx, r, device)
237-
if err != nil {
238-
return err
256+
func (r *DeviceReconciler) reconcile(ctx context.Context, device *v1alpha1.Device, prov provider.DeviceProvider, conn *deviceutil.Connection) (reterr error) {
257+
if err := prov.Connect(ctx, conn); err != nil {
258+
conditions.Set(device, metav1.Condition{
259+
Type: v1alpha1.ReadyCondition,
260+
Status: metav1.ConditionFalse,
261+
Reason: v1alpha1.UnreachableReason,
262+
Message: fmt.Sprintf("Failed to connect to provider: %v", err),
263+
})
264+
return fmt.Errorf("failed to connect to provider: %w", err)
265+
}
266+
defer func() {
267+
if err := prov.Disconnect(ctx, conn); err != nil {
268+
reterr = kerrors.NewAggregate([]error{reterr, err})
239269
}
270+
}()
240271

241-
if err := prov.Connect(ctx, conn); err != nil {
242-
conditions.Set(device, metav1.Condition{
243-
Type: v1alpha1.ReadyCondition,
244-
Status: metav1.ConditionFalse,
245-
Reason: v1alpha1.UnreachableReason,
246-
Message: fmt.Sprintf("Failed to connect to provider: %v", err),
247-
})
248-
return fmt.Errorf("failed to connect to provider: %w", err)
249-
}
250-
defer func() {
251-
if err := prov.Disconnect(ctx, conn); err != nil {
252-
reterr = kerrors.NewAggregate([]error{reterr, err})
253-
}
254-
}()
272+
ports, err := prov.ListPorts(ctx)
273+
if err != nil {
274+
return fmt.Errorf("failed to list device ports: %w", err)
275+
}
255276

256-
ports, err := prov.ListPorts(ctx)
257-
if err != nil {
258-
return fmt.Errorf("failed to list device ports: %w", err)
259-
}
277+
interfaces := new(v1alpha1.InterfaceList)
278+
if err := r.List(ctx, interfaces, client.InNamespace(device.Namespace), client.MatchingLabels{v1alpha1.DeviceLabel: device.Name}); err != nil {
279+
return fmt.Errorf("failed to list interface resources for device: %w", err)
280+
}
260281

261-
interfaces := new(v1alpha1.InterfaceList)
262-
if err := r.List(ctx, interfaces, client.InNamespace(device.Namespace), client.MatchingLabels{v1alpha1.DeviceLabel: device.Name}); err != nil {
263-
return fmt.Errorf("failed to list interface resources for device: %w", err)
264-
}
282+
m := make(map[string]string) // ID => Resource Name
283+
for _, intf := range interfaces.Items {
284+
m[intf.Spec.Name] = intf.Name
285+
}
265286

266-
m := make(map[string]string) // ID => Resource Name
267-
for _, intf := range interfaces.Items {
268-
m[intf.Spec.Name] = intf.Name
287+
device.Status.Ports = make([]v1alpha1.DevicePort, len(ports))
288+
n := int32(0)
289+
for i, p := range ports {
290+
var ref *v1alpha1.LocalObjectReference
291+
if name, ok := m[p.ID]; ok {
292+
ref = &v1alpha1.LocalObjectReference{Name: name}
293+
n++
269294
}
270-
271-
device.Status.Ports = make([]v1alpha1.DevicePort, len(ports))
272-
n := int32(0)
273-
for i, p := range ports {
274-
var ref *v1alpha1.LocalObjectReference
275-
if name, ok := m[p.ID]; ok {
276-
ref = &v1alpha1.LocalObjectReference{Name: name}
277-
n++
278-
}
279-
device.Status.Ports[i] = v1alpha1.DevicePort{
280-
Name: p.ID,
281-
Type: p.Type,
282-
SupportedSpeedsGbps: p.SupportedSpeedsGbps,
283-
Transceiver: p.Transceiver,
284-
InterfaceRef: ref,
285-
}
286-
slices.Sort(device.Status.Ports[i].SupportedSpeedsGbps)
295+
device.Status.Ports[i] = v1alpha1.DevicePort{
296+
Name: p.ID,
297+
Type: p.Type,
298+
SupportedSpeedsGbps: p.SupportedSpeedsGbps,
299+
Transceiver: p.Transceiver,
300+
InterfaceRef: ref,
287301
}
302+
slices.Sort(device.Status.Ports[i].SupportedSpeedsGbps)
303+
}
288304

289-
device.Status.PostSummary = PortSummary(device.Status.Ports)
290-
291-
info, err := prov.GetDeviceInfo(ctx)
292-
if err != nil {
293-
return fmt.Errorf("failed to get device details: %w", err)
294-
}
305+
device.Status.PostSummary = PortSummary(device.Status.Ports)
295306

296-
device.Status.Manufacturer = info.Manufacturer
297-
device.Status.Model = info.Model
298-
device.Status.SerialNumber = info.SerialNumber
299-
device.Status.FirmwareVersion = info.FirmwareVersion
307+
info, err := prov.GetDeviceInfo(ctx)
308+
if err != nil {
309+
return fmt.Errorf("failed to get device details: %w", err)
300310
}
311+
312+
device.Status.Manufacturer = info.Manufacturer
313+
device.Status.Model = info.Model
314+
device.Status.SerialNumber = info.SerialNumber
315+
device.Status.FirmwareVersion = info.FirmwareVersion
316+
301317
conditions.Set(device, metav1.Condition{
302318
Type: v1alpha1.ReadyCondition,
303319
Status: metav1.ConditionTrue,
@@ -308,6 +324,40 @@ func (r *DeviceReconciler) reconcile(ctx context.Context, device *v1alpha1.Devic
308324
return nil
309325
}
310326

327+
func (r *DeviceReconciler) reconcileMaintenance(ctx context.Context, obj *v1alpha1.Device, prov provider.DeviceProvider, conn *deviceutil.Connection) error {
328+
action, ok := obj.Annotations[v1alpha1.DeviceMaintenanceAnnotation]
329+
if !ok {
330+
return nil
331+
}
332+
delete(obj.Annotations, v1alpha1.DeviceMaintenanceAnnotation)
333+
switch action {
334+
case v1alpha1.DeviceMaintenanceReboot:
335+
r.Recorder.Event(obj, "Normal", "RebootRequested", "Device reboot has been requested")
336+
if err := prov.Reboot(ctx, conn); err != nil {
337+
return fmt.Errorf("failed to reboot device: %w", err)
338+
}
339+
340+
case v1alpha1.DeviceMaintenanceFactoryReset:
341+
r.Recorder.Event(obj, "Normal", "FactoryResetRequested", "Device factory reset has been requested")
342+
if err := prov.FactoryReset(ctx, conn); err != nil {
343+
return fmt.Errorf("failed to reset device to factory defaults: %w", err)
344+
}
345+
346+
case v1alpha1.DeviceMaintenanceReprovision:
347+
r.Recorder.Event(obj, "Normal", "ReprovisioningRequested", "Device reprovisioning has been requested. Preparing the device.")
348+
if err := prov.Reprovision(ctx, conn); err != nil {
349+
return fmt.Errorf("failed to reset device to factory defaults: %w", err)
350+
}
351+
obj.Status.Phase = v1alpha1.DevicePhasePending
352+
case v1alpha1.DeviceMaintenanceResetPhaseToProvisioning:
353+
r.Recorder.Event(obj, "Normal", "ResetPhaseToProvisioningRequested", "Device phase reset to Pending has been requested.")
354+
obj.Status.Phase = v1alpha1.DevicePhasePending
355+
default:
356+
return fmt.Errorf("unknown device action: %s", action)
357+
}
358+
return nil
359+
}
360+
311361
// secretToDevices is a [handler.MapFunc] to be used to enqueue requests for reconciliation
312362
// for a Device to update when one of its referenced Secrets gets updated.
313363
func (r *DeviceReconciler) secretToDevices(ctx context.Context, obj client.Object) []ctrl.Request {

internal/controller/core/device_controller_test.go

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,8 @@ var _ = Describe("Device Controller", func() {
203203
resource.Status.Phase = v1alpha1.DevicePhaseProvisioned
204204
resource.Status.Provisioning = []v1alpha1.ProvisioningInfo{
205205
{
206-
Token: "test-token",
207-
StartTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)),
208-
RebootTime: metav1.NewTime(time.Now().Add(-30 * time.Second)),
206+
Token: "test-token",
207+
StartTime: metav1.NewTime(time.Now().Add(-3 * time.Minute)),
209208
},
210209
}
211210
g.Expect(k8sClient.Status().Patch(ctx, resource, client.MergeFrom(device))).To(Succeed())
@@ -230,5 +229,68 @@ var _ = Describe("Device Controller", func() {
230229
g.Expect(resource.Status.Conditions[0].Reason).To(Equal(v1alpha1.ReadyReason))
231230
}).Should(Succeed())
232231
})
232+
233+
It("Should transition from Active to Provisioning once the reset-phase-to-provisioning annotation is set", func() {
234+
By("Creating a Device")
235+
device := &v1alpha1.Device{
236+
ObjectMeta: metav1.ObjectMeta{
237+
Name: key.Name,
238+
Namespace: key.Namespace,
239+
},
240+
Spec: v1alpha1.DeviceSpec{
241+
Endpoint: v1alpha1.Endpoint{
242+
Address: "192.168.10.5:9339",
243+
SecretRef: &v1alpha1.SecretReference{
244+
Name: name,
245+
},
246+
},
247+
Provisioning: &v1alpha1.Provisioning{
248+
BootScript: v1alpha1.TemplateSource{
249+
Inline: ptr.To("boot nxos.bin"),
250+
},
251+
Image: v1alpha1.Image{
252+
URL: "https://best-vendor-images.to/windows98",
253+
Checksum: "d41d8cd98f00b204e9800998ecf8427e",
254+
ChecksumType: v1alpha1.ChecksumTypeMD5,
255+
},
256+
},
257+
},
258+
}
259+
Expect(k8sClient.Create(ctx, device)).To(Succeed())
260+
261+
By("Setting the device to Running phase")
262+
orig := device.DeepCopy()
263+
device.Status.Phase = v1alpha1.DevicePhaseRunning
264+
Expect(k8sClient.Status().Patch(ctx, device, client.MergeFrom(orig))).To(Succeed())
265+
266+
By("Verifying the device transitions to Running phase")
267+
Eventually(func(g Gomega) {
268+
resource := &v1alpha1.Device{}
269+
g.Expect(k8sClient.Get(ctx, key, resource)).To(Succeed())
270+
g.Expect(resource.Status.Phase).To(Equal(v1alpha1.DevicePhaseRunning))
271+
g.Expect(resource.Status.Conditions).To(HaveLen(1))
272+
g.Expect(resource.Status.Conditions[0].Type).To(Equal(v1alpha1.ReadyCondition))
273+
}).Should(Succeed())
274+
275+
By("Adding the reset-phase-to-provisioning annotation to the device")
276+
Eventually(func(g Gomega) {
277+
resource := &v1alpha1.Device{}
278+
g.Expect(k8sClient.Get(ctx, key, resource)).To(Succeed())
279+
patch := resource.DeepCopy()
280+
annotations := make(map[string]string)
281+
annotations[v1alpha1.DeviceMaintenanceAnnotation] = v1alpha1.DeviceMaintenanceResetPhaseToProvisioning
282+
patch.SetAnnotations(annotations)
283+
g.Expect(k8sClient.Patch(ctx, patch, client.MergeFrom(resource))).To(Succeed())
284+
}).Should(Succeed())
285+
286+
By("Verifying the device transitions to Provisioning phase and the annotation is removed")
287+
Eventually(func(g Gomega) {
288+
resource := &v1alpha1.Device{}
289+
g.Expect(k8sClient.Get(ctx, key, resource)).To(Succeed())
290+
g.Expect(resource.Status.Phase).To(Equal(v1alpha1.DevicePhaseProvisioning))
291+
_, exists := resource.Annotations[v1alpha1.DeviceMaintenanceAnnotation]
292+
g.Expect(exists).To(BeFalse(), "Maintenance annotation should be removed after processing")
293+
}).WithTimeout(time.Second * 10).Should(Succeed())
294+
})
233295
})
234296
})

internal/controller/core/suite_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,18 @@ func (p *Provider) VerifyProvisioned(context.Context, *deviceutil.Connection, *v
466466
return true
467467
}
468468

469+
func (p *Provider) Reboot(ctx context.Context, conn *deviceutil.Connection) error {
470+
return nil
471+
}
472+
473+
func (p *Provider) FactoryReset(ctx context.Context, conn *deviceutil.Connection) error {
474+
return nil
475+
}
476+
477+
func (p *Provider) Reprovision(ctx context.Context, conn *deviceutil.Connection) (reterr error) {
478+
return nil
479+
}
480+
469481
func (p *Provider) EnsureInterface(ctx context.Context, req *provider.EnsureInterfaceRequest) error {
470482
p.Lock()
471483
defer p.Unlock()

0 commit comments

Comments
 (0)