Skip to content

Commit 81af3e7

Browse files
committed
Add remote alert
1 parent bf68115 commit 81af3e7

File tree

7 files changed

+25
-8
lines changed

7 files changed

+25
-8
lines changed

charts/controlplane-operations/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
apiVersion: v2
22
name: controlplane-operations
3-
version: 1.0.1
3+
version: 1.0.2
44
description: A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Controlplane clusters.
55
maintainers:
66
- name: Vladimir Videlov (d051408)

charts/controlplane-operations/alerts/controlplane-bond.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ groups:
2424
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
2525
annotations:
2626
description: Interface `{{`{{ $labels.device }}`}}` on `{{`{{ $labels.node }}`}}` is down. Tenant network outage for this node.
27-
summary: Interface `{{`{{ $labels.device }}`}}` is down. Node network connectivity is degraded.
27+
summary: Interface `{{`{{ $labels.device }}`}}` is down. Node network connectivity is degraded.
2828
{{- end }}

charts/controlplane-operations/alerts/controlplane-node.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ groups:
88
labels:
99
severity: {{ dig "KubernetesNodeBridgeFilterVLANTagged" "severity" "info" .Values.prometheusRules }}
1010
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/KubernetesNodeBridgeFilterVLANTagged.md
11-
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
11+
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
1212
annotations:
1313
description: VLAN-tagged ARP/IP traffic is filtered by ARPtables/IPtables on `{{`{{ $labels.node }}`}}`. Network datapath threatened!
1414
summary: Bridged VLAN-tagged traffic is filtered by IPtables.

charts/controlplane-operations/alerts/controlplane-pvc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ groups:
22
- name: controlplane-pvc
33
rules:
44
- record: kubelet_volume_stats_available_percent
5-
expr: (min by (persistentvolumeclaim, namespace) (100 * kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes))
5+
expr: (min by (persistentvolumeclaim, namespace) (100 * kubelet_volume_stats_available_bytes / kubelet_volume_stats_capacity_bytes))
66

77
- name: controlplane.pvc.alerts
88
rules:
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
groups:
2+
- name: controlplane-remote
3+
rules:
4+
{{- if not (.Values.prometheusRules.disabled.ArgoraUpdateInError | default false) }}
5+
- alert: ArgoraUpdateInError
6+
expr: >
7+
kube_customresource_status_state{customresource_kind="Update",customresource_group="argora.cloud.sap",state=~"Error"}
8+
== 1
9+
for: {{ dig "ArgoraUpdateInError" "for" "1m" .Values.prometheusRules }}
10+
labels:
11+
severity: {{ dig "ArgoraUpdateInError" "severity" "warning" .Values.prometheusRules }}
12+
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/ArgoraUpdateInError.md
13+
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
14+
annotations:
15+
description: Argora Update CR status is in Error state for more than 1 minute.
16+
summary: Update CR in Error state.
17+
{{- end }}

charts/controlplane-operations/plugindefinition.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ kind: PluginDefinition
33
metadata:
44
name: controlplane-operations
55
spec:
6-
version: 1.0.1
6+
version: 1.0.2
77
displayName: Controlplane operations bundle
88
description: Operations bundle for Controlane clusters
99
docMarkDownUrl: https://raw.githubusercontent.com/cloudoperators/controlplane-operations/main/README.md
1010
icon: https://raw.githubusercontent.com/cloudoperators/controlplane-operations/main/charts/controlplane-operations/kubernetes-logo.png
1111
helmChart:
1212
name: controlplane-operations
1313
repository: oci://ghcr.io/cloudoperators/controlplane-operations/charts
14-
version: 1.0.1
14+
version: 1.0.2
1515
options:
1616
- name: prometheusRules.create
1717
description: Create Prometheus rules

charts/controlplane-operations/templates/dashboards.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ kind: ConfigMap
77
metadata:
88
name: {{ printf "%s-%s" $root.Release.Name ($path | replace ".json" "" | replace "/" "-" | trunc 63) }}
99
labels:
10-
{{ include "controlplane-operations.persesDashboardSelectorLabels" (list $path $root) | indent 4 }}
10+
{{ include "controlplane-operations.persesDashboardSelectorLabels" (list $path $root) | indent 4 }}
1111
{{ include "controlplane-operations.labels" (list $path $root) | indent 4 }}
12-
data:
12+
data:
1313
{{ printf "%s: |-" ($path | replace "/" "-" | indent 2) }}
1414
{{ printf "%s" $bytes | indent 4 }}
1515
{{- end }}

0 commit comments

Comments
 (0)