Skip to content

Commit bf68115

Browse files
videlovLicense Botdefo89richardtief
authored
Initial (#1)
* Initial * Automatic application of license header * REUSE.toml * Automatic application of license header * Update * codeql not needed * Update charts/controlplane-operations/alerts/controlplane-node.yaml Co-authored-by: Dmitri Fedotov <13087245+defo89@users.noreply.github.com> * Update charts/controlplane-operations/alerts/controlplane-bond.yaml Co-authored-by: Dmitri Fedotov <13087245+defo89@users.noreply.github.com> * Update * SLO alerts not cp specifics, should be included in other plugin * apply common structure and templates Signed-off-by: Richard Tief <richard.tief@sap.com> * Automatic application of license header * remove Plutono and use Perses Signed-off-by: Richard Tief <richard.tief@sap.com> * example playbook for NodeBondDegradedMain Signed-off-by: Richard Tief <richard.tief@sap.com> * Update charts/controlplane-operations/values.yaml Co-authored-by: Richard Tief <56597015+richardtief@users.noreply.github.com> --------- Signed-off-by: Richard Tief <richard.tief@sap.com> Co-authored-by: License Bot <license_bot@github.com> Co-authored-by: Dmitri Fedotov <13087245+defo89@users.noreply.github.com> Co-authored-by: Richard Tief <richard.tief@sap.com> Co-authored-by: Richard Tief <56597015+richardtief@users.noreply.github.com>
1 parent 5de2fbf commit bf68115

File tree

18 files changed

+561
-50
lines changed

18 files changed

+561
-50
lines changed

.github/configs/helm-lint.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# See https://github.com/helm/chart-testing#configuration
5+
remote: origin
6+
target-branch: main
7+
validate-maintainers: false
8+
check-version-increment: false
9+
chart-dirs:
10+
- charts

.github/licenserc.yaml

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ header:
22
license:
33
spdx-id: Apache-2.0
44
content: |
5-
SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
5+
SPDX-FileCopyrightText: [year] SAP SE or an SAP affiliate company and Greenhouse contributors
66
SPDX-License-Identifier: Apache-2.0
7+
pattern: |
8+
SPDX-FileCopyrightText: [0-9]+ SAP SE or an SAP affiliate company and Greenhouse contributors
9+
SPDX-License-Identifier: Apache-2\.0
710
811
paths: # `paths` are the path list that will be checked (and fixed) by license-eye, default is ['**'].
912
- '**'
@@ -12,34 +15,17 @@ header:
1215
- '.github/**'
1316
- '.reuse/dep5'
1417
- 'LICENSES/*.txt'
15-
- 'grafana/*.json'
16-
- 'website/scripts/**'
17-
- 'hack/**'
18-
- 'hack/openapi-generator/openapi-info.yaml'
19-
- 'hack/boilerplate.go.txt'
20-
- 'docs/**'
2118
- '**/*.md'
2219
- 'LICENSE'
23-
- 'NOTICE'
24-
- 'PROJECT'
2520
- '**/*.gitignore'
2621
- '**/*.helmignore'
2722
- '**/*.tpl'
28-
- '**/go.mod'
29-
- '**/go.sum'
30-
- '**/*.lock'
31-
- '**/*.json'
3223
- '**/.gitkeep'
3324
- '**/*.txt'
3425
- '*Dockerfile*'
3526
- 'Makefile'
36-
- 'pkg/idproxy/web/**'
37-
- 'pkg/apis/scheme_builder.go' # Belongs to the Kubernetes authors
38-
- 'cmd/tcp-proxy/main.go' # MIT License
39-
- 'pkg/tcp-proxy/proxy/*.go' # MIT License
40-
- '**/zz_generated.deepcopy.go' # Generated by Kubebuilder
41-
- 'charts/**/templates/*.yaml' # license headers on helm templates are causing issues
42-
27+
- 'README.md.gotmpl'
28+
- 'charts/**/**/*.yaml' # license headers on helm templates are causing issues
4329

4430
comment: on-failure
4531

.github/workflows/codeql.yaml

Lines changed: 0 additions & 27 deletions
This file was deleted.

.github/workflows/helm-lint.yaml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: "Helm lint and tests"
2+
on:
3+
pull_request:
4+
types: [ opened, synchronize, reopened ]
5+
6+
env:
7+
REGISTRY: ghcr.io
8+
9+
jobs:
10+
helm-lint-test:
11+
runs-on: [ default ]
12+
steps:
13+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
14+
with:
15+
fetch-depth: 0
16+
token: ${{ secrets.GITHUB_TOKEN }}
17+
18+
- name: Set up Helm
19+
uses: azure/setup-helm@b9e51907a09c216f16ebe8536097933489208112 # v4.3.0
20+
21+
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5
22+
with:
23+
python-version: 3.9
24+
check-latest: true
25+
token: ${{ secrets.GITHUB_TOKEN }}
26+
27+
- name: Set up chart-linting and chart-testing
28+
uses: helm/chart-testing-action@0d28d3144d3a25ea2cc349d6e59901c4ff469b3b # v2.7.0
29+
30+
- name: Run chart-linting (list-changed)
31+
id: list-changed
32+
run: |
33+
changed=$(ct list-changed --config .github/configs/helm-lint.yaml --target-branch ${{ github.event.repository.default_branch }})
34+
if [[ -n "$changed" ]]; then
35+
echo "changed=true" >> "$GITHUB_OUTPUT"
36+
fi
37+
38+
- name: Run chart-linting
39+
if: steps.list-changed.outputs.changed == 'true'
40+
run: ct lint --config .github/configs/helm-lint.yaml --target-branch ${{ github.event.repository.default_branch }}
41+
42+
- name: Check version bump
43+
id: check-bump
44+
if: steps.list-changed.outputs.changed == 'true'
45+
continue-on-error: true
46+
run: |
47+
for chart in $(ct list-changed --config .github/configs/helm-lint.yaml --target-branch ${{ github.event.repository.default_branch }}); do
48+
chart_version=$(yq .version "$chart/Chart.yaml")
49+
if helm pull "oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/$(dirname $chart)" --version $chart_version; then
50+
echo "chart=$(dirname $chart)" >> "$GITHUB_OUTPUT"
51+
echo "chart_version=${chart_version}" >> "$GITHUB_OUTPUT"
52+
echo "needsbump=true" >> "$GITHUB_OUTPUT"
53+
fi
54+
done
55+
56+
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
57+
if: steps.check-bump.outputs.needsbump == 'true'
58+
with:
59+
script: |
60+
github.rest.issues.createComment({
61+
issue_number: context.issue.number,
62+
owner: context.repo.owner,
63+
repo: context.repo.repo,
64+
body: ':warning: Chart `oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/${{ steps.check-bump.outputs.chart }}:${{ steps.check-bump.outputs.chart_version }}` already exists in OCI registry. Please increment the chart version.'
65+
})
66+
core.setFailed(`Action failed with error: Chart version bump required`);
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
name: Package Helm Chart and publish to GitHub Packages
2+
3+
on:
4+
workflow_dispatch: {}
5+
push:
6+
branches:
7+
- main
8+
paths:
9+
- .github/workflows/helm-release.yaml
10+
- charts/**
11+
12+
permissions:
13+
contents: write
14+
packages: write
15+
16+
env:
17+
REGISTRY: ghcr.io
18+
ACTIONS_RUNNER_DEBUG: false
19+
20+
jobs:
21+
helm-release:
22+
runs-on: [ default ]
23+
strategy:
24+
fail-fast: false
25+
matrix:
26+
include:
27+
- chartDir: charts/controlplane-operations
28+
chartName: controlplane-operations
29+
30+
steps:
31+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
32+
with:
33+
fetch-depth: 0
34+
token: ${{ secrets.GITHUB_TOKEN }}
35+
36+
- name: Configure Git
37+
run: |
38+
git config user.name "$GITHUB_ACTOR"
39+
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
40+
41+
- name: Set up Helm
42+
uses: azure/setup-helm@b9e51907a09c216f16ebe8536097933489208112 # v4.3.0
43+
44+
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5
45+
with:
46+
python-version: 3.9
47+
check-latest: true
48+
token: ${{ secrets.GITHUB_TOKEN }}
49+
50+
- name: Log into registry ${{ env.REGISTRY }}
51+
if: github.event_name != 'pull_request'
52+
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3
53+
with:
54+
registry: ${{ env.REGISTRY }}
55+
username: ${{ github.actor }}
56+
password: ${{ secrets.GITHUB_TOKEN }}
57+
58+
- name: Get changed files
59+
id: changed-files
60+
uses: tj-actions/changed-files@2f7c5bfce28377bc069a65ba478de0a74aa0ca32 # v46
61+
with:
62+
files: |
63+
${{ matrix.chartDir }}/**
64+
65+
- name: Check if Helm chart with same version already exists
66+
id: check-chart
67+
if: steps.changed-files.outputs.all_changed_files != ''
68+
env:
69+
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
70+
run: |
71+
# List all changed
72+
echo "All changed files: $ALL_CHANGED_FILES"
73+
chartName=$(yq .name "${{ matrix.chartDir }}/Chart.yaml")
74+
chartVersion=$(yq .version "${{ matrix.chartDir }}/Chart.yaml")
75+
echo "chart_version=${chartVersion}" >> "$GITHUB_OUTPUT"
76+
if $(helm pull "oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/${{ matrix.chartName }}" --version $chartVersion); then
77+
echo "bump=true" >> "$GITHUB_OUTPUT"
78+
fi
79+
80+
- name: Chart needs version bump
81+
if: steps.check-chart.outputs.bump == 'true'
82+
env:
83+
CHART_VERSION: ${{ steps.check-chart.outputs.chart_version }}
84+
run: |
85+
echo "Chart ${{ matrix.chartDir }}:${{ env.CHART_VERSION }} already exists in OCI registry. Skipping upload. Please increment the chart version."
86+
exit 1
87+
88+
- name: Push Charts to GHCR
89+
if: steps.changed-files.outputs.all_changed_files != '' && steps.check-chart.outputs.bump != 'true'
90+
run: |
91+
helm package ${{ matrix.chartDir }} -d ${{ matrix.chartDir }}
92+
PKG_NAME=`ls ${{ matrix.chartDir }}/*.tgz`
93+
helm push ${PKG_NAME} oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
94+
95+
- name: Run chart-releaser
96+
uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
97+
env:
98+
CR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
99+
CR_GENERATE_RELEASE_NOTES: true

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,29 @@
66

77
A set of Perses dashboards and Prometheus alert rules combined with playbooks to ensure effective operation of Controlplane clusters within the ApeiroRA stack.
88

9+
# Content
10+
11+
The content is structured as follows:
12+
13+
```
14+
controlplane-operations
15+
16+
├── playbooks/ Step-by-step instructions for troubleshooting.
17+
18+
└── charts/
19+
20+
└── controlplane-operations
21+
22+
├── alerts Prometheus alerts for kubernetes.
23+
24+
├── dashboards Perses dashboards for visualizing key metrics.
25+
26+
└── Chart.yaml Helm chart manifest.
27+
```
28+
929
## Requirements and Setup
1030

11-
*Insert a short description what is required to get your project running...*
31+
The content of the repository can be installed as a [Greenhouse](https://github.com/cloudoperators/greenhouse) Plugin.
1232

1333
## Support, Feedback, Contributing
1434

REUSE.toml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,14 @@
1-
# SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and Greenhouse contributors
2-
# SPDX-License-Identifier: Apache-2.0
1+
# SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and Greenhouse contributors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
version = 1
5+
SPDX-PackageName = "controlplane-operations"
6+
SPDX-PackageSupplier = "Vladimir Videlov <vladimir.videlov@sap.com>"
7+
SPDX-PackageDownloadLocation = "https://github.com/cloudoperators/controlplane-operations"
8+
SPDX-PackageComment = "The code in this project may include calls to APIs (\"API Calls\") of\n SAP or third-party products or services developed outside of this project\n (\"External Products\").\n \"APIs\" means application programming interfaces, as well as their respective\n specifications and implementing code that allows software to communicate with\n other software.\n API Calls to External Products are not licensed under the open source license\n that governs this project. The use of such API Calls and related External\n Products are subject to applicable additional agreements with the relevant\n provider of the External Products. In no event shall the open source license\n that governs this project grant any rights in or to any External Products,or\n alter, expand or supersede any terms of the applicable additional agreements.\n If you have a valid license agreement with SAP for the use of a particular SAP\n External Product, then you may make use of any API Calls included in this\n project's code for that SAP External Product, subject to the terms of such\n license agreement. If you do not have a valid license agreement for the use of\n a particular SAP External Product, then you may only make use of any API Calls\n in this project for that SAP External Product for your internal, non-productive\n and non-commercial test and evaluation of such API Calls. Nothing herein grants\n you any rights to use or access any SAP External Product, or provide any third\n parties the right to use of access any SAP External Product, through API Calls."
9+
10+
[[annotations]]
11+
path = "**"
12+
precedence = "aggregate"
13+
SPDX-FileCopyrightText = "2025 SAP SE or an SAP affiliate company and Greenhouse contributors"
14+
SPDX-License-Identifier = "Apache-2.0"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: v2
2+
name: controlplane-operations
3+
version: 1.0.1
4+
description: A set of Plutono dashboards and Prometheus alerting rules combined with playbooks to ensure effective operations of Controlplane clusters.
5+
maintainers:
6+
- name: Vladimir Videlov (d051408)
7+
email: vladimir.videlov@sap.com
8+
keywords:
9+
- Helm Chart
10+
- Controlplane operations
11+
- Plutono Dashboards
12+
- Prometheus Alerting
13+
- Alert Rules
14+
- Playbooks
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
groups:
2+
- name: controlplane-bond
3+
rules:
4+
{{- if not (.Values.prometheusRules.disabled.NodeBondDegradedMain | default false) }}
5+
- alert: NodeBondDegradedMain
6+
expr: sum(node_bonding_active) by (master, node) < 2
7+
for: {{ dig "NodeBondDegradedMain" "for" "15m" .Values.prometheusRules }}
8+
labels:
9+
severity: {{ dig "NodeBondDegradedMain" "severity" "warning" .Values.prometheusRules }}
10+
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/NodeBondDegradedMain.md
11+
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
12+
annotations:
13+
description: Bond `{{`{{ $labels.master }}`}}` on `{{`{{ $labels.node }}`}}` is degraded. Imminent network outage for this node.
14+
summary: Bond `{{`{{ $labels.master }}`}}` is degraded. Node network connectivity is not HA. Switch failover or upgrade will cause an outage!
15+
{{- end }}
16+
17+
{{- if not (.Values.prometheusRules.disabled.NodeVirtualInterfaceDown | default false) }}
18+
- alert: NodeVirtualInterfaceDown
19+
expr: sum(node_network_up{device=~"bond.*|vlan.*"} == 0) by (node, device)
20+
for: {{ dig "NodeVirtualInterfaceDown" "for" "15m" .Values.prometheusRules }}
21+
labels:
22+
severity: {{ dig "NodeVirtualInterfaceDown" "severity" "warning" .Values.prometheusRules }}
23+
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/NodeVirtualInterfaceDown.md
24+
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
25+
annotations:
26+
description: Interface `{{`{{ $labels.device }}`}}` on `{{`{{ $labels.node }}`}}` is down. Tenant network outage for this node.
27+
summary: Interface `{{`{{ $labels.device }}`}}` is down. Node network connectivity is degraded.
28+
{{- end }}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
groups:
2+
- name: controlplane-node
3+
rules:
4+
{{- if not (.Values.prometheusRules.disabled.KubernetesNodeBridgeFilterVLANTagged | default false) }}
5+
- alert: KubernetesNodeBridgeFilterVLANTagged
6+
expr: kube_node_status_condition{condition="BridgeFilterVLANTagged", status="true"} == 1
7+
for: {{ dig "KubernetesNodeBridgeFilterVLANTagged" "for" "15m" .Values.prometheusRules }}
8+
labels:
9+
severity: {{ dig "KubernetesNodeBridgeFilterVLANTagged" "severity" "info" .Values.prometheusRules }}
10+
playbook: https://github.com/cobaltcore-dev/controlplane-operations/playbooks/KubernetesNodeBridgeFilterVLANTagged.md
11+
{{ include "controlplane-operations.additionalRuleLabels" . | nindent 6 }}
12+
annotations:
13+
description: VLAN-tagged ARP/IP traffic is filtered by ARPtables/IPtables on `{{`{{ $labels.node }}`}}`. Network datapath threatened!
14+
summary: Bridged VLAN-tagged traffic is filtered by IPtables.
15+
{{- end }}

0 commit comments

Comments
 (0)