Skip to content

Commit 2bf0411

Browse files
authored
Add helm chart for kueue-populator (#7814)
* add helm chart and rename kueue-prepopulator -> kueue-populator * add helm unit tests * cleanup in Makefile * remove chart lock * cleanups and readme improvements * update image versions in tests * add comments in values.yaml * cleanup * run e2e tests against Kueue v0.14.4 * remove kueue manager config from values.yaml * cleanup Makefile * encapsulate configs * replace old component name * update deps version level * cleanup Makefile * adjust resources for kueue-prepopulator * keep separate settings for resources of cq and rf * update configs for cq and rf * add reminder to use --wait during installation
1 parent edb4198 commit 2bf0411

File tree

11 files changed

+723
-0
lines changed

11 files changed

+723
-0
lines changed

cmd/experimental/kueue-populator/Makefile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ BASE_BUILDER_IMAGE ?= golang:1.25.0
2626
BUILDER_IMAGE ?= $(BASE_BUILDER_IMAGE)
2727
CGO_ENABLED ?= 0
2828

29+
HELM ?= helm
30+
2931
.PHONY: all
3032
all: fmt vet build
3133

@@ -72,6 +74,9 @@ ENVTEST_K8S_VERSION ?= 1.34
7274
INTEGRATION_NPROCS ?= 4
7375
CONTROLLER_GEN := $(BIN_DIR)/controller-gen
7476

77+
YAML_PROCESSOR = $(BIN_DIR)/yaml-processor
78+
YAML_PROCESSOR_LOG_LEVEL ?= info
79+
7580
.PHONY: manifests
7681
manifests:
7782
$(CONTROLLER_GEN) rbac:roleName=kueue-populator-role output:rbac:artifacts:config=config/rbac paths="./pkg/controller/..."
@@ -90,3 +95,29 @@ test-e2e:
9095
test: ## Run unit tests.
9196
GOWORK=off $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit-unit.xml -- \
9297
-race -coverpkg=./... -coverprofile $(ARTIFACTS)/cover-unit.out ./pkg/controller/...
98+
99+
100+
.PHONY: update-helm
101+
update-helm: manifests $(YAML_PROCESSOR)
102+
$(YAML_PROCESSOR) -zap-log-level=$(YAML_PROCESSOR_LOG_LEVEL) hack/processing-plan.yaml
103+
104+
$(YAML_PROCESSOR):
105+
cd $(ROOT_DIR) && $(MAKE) yaml-processor
106+
107+
.PHONY: helm-lint
108+
helm-lint: ## Run Helm chart lint test.
109+
$(HELM) lint charts/kueue-populator
110+
111+
.PHONY: helm-verify
112+
helm-verify: helm-lint ## run helm template and detect any rendering failures
113+
$(HELM) template charts/kueue-populator > /dev/null
114+
$(HELM) template charts/kueue-populator --set image.repository=my-repo --set image.tag=v0.1.0 > /dev/null
115+
116+
.PHONY: helm-unit-test
117+
helm-unit-test:
118+
$(HELM) unittest charts/kueue-populator --strict --debug
119+
120+
.PHONY: helm-test
121+
helm-test: ## Run Helm chart integration tests (requires active cluster).
122+
$(HELM) test kueue-populator
123+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: v2
2+
name: kueue-populator
3+
description: A Helm chart for Kueue Populator setup including Kueue, LocalQueue Creator, and default resources.
4+
type: application
5+
version: 0.1.0
6+
appVersion: "0.1.0"
7+
dependencies:
8+
- name: kueue
9+
version: "~0.14.4"
10+
repository: "oci://registry.k8s.io/kueue/charts"
11+
condition: kueue.enabled
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# Kueue Populator
2+
3+
This Helm chart installs the Kueue Populator, a component designed to automatically create default LocalQueue resources in namespaces, and sets up initial Kueue resources like a default ClusterQueue and ResourceFlavor. It includes the official Kueue chart as a dependency.
4+
5+
## Purpose
6+
7+
- Deploys the `kueue-populator` controller manager.
8+
- Installs Kueue (via subchart dependency).
9+
- Creates a default `ResourceFlavor` named `tas-gpu-default`.
10+
- Creates a default `ClusterQueue` (name configurable).
11+
- The populator then creates a default `LocalQueue` (name configurable) in namespaces matching the selector, pointing to the default ClusterQueue.
12+
13+
## Prerequisites
14+
15+
- [Helm](https://helm.sh/docs/intro/quickstart/#install-helm)
16+
- Kubernetes cluster
17+
- (Optional) [Cert-manager](https://cert-manager.io/docs/installation/)
18+
- Docker or a compatible container builder.
19+
- A container registry to push the image to.
20+
21+
## Building the Image
22+
23+
You need to build and push the image to your own registry.
24+
25+
From the `cmd/experimental/kueue-populator` directory:
26+
27+
```bash
28+
# Build the image
29+
make image-build IMAGE_REGISTRY=<YOUR_REGISTRY>
30+
31+
# Push the image
32+
make image-push IMAGE_REGISTRY=<YOUR_REGISTRY>
33+
```
34+
35+
This will build and push an image with the tag `<YOUR_REGISTRY>/kueue-populator:<GIT_TAG>`.
36+
37+
If you want to use a specific tag, you can override `GIT_TAG`:
38+
39+
```bash
40+
make image-build image-push IMAGE_REGISTRY=<YOUR_REGISTRY> GIT_TAG=latest
41+
```
42+
43+
## Installation
44+
45+
To install the chart, you MUST override the image repository and tag with the image you built and pushed.
46+
47+
The following commands assume you are in the `cmd/experimental/kueue-populator` directory.
48+
49+
Example using `--set`:
50+
51+
```bash
52+
helm install kueue-populator ./charts/kueue-populator --namespace kueue-system --create-namespace --wait \
53+
--set kueuePopulator.image.repository=<YOUR_REGISTRY>/kueue-populator \
54+
--set kueuePopulator.image.tag=latest
55+
```
56+
57+
> The `--wait` flag is required to ensure that the Kueue controller and webhooks are fully ready before the chart attempts to create Kueue resources (like ClusterQueue) via post-install hooks. Without it, the installation may fail.
58+
59+
Example using a custom `my-values.yaml`:
60+
61+
```yaml
62+
# my-values.yaml
63+
kueuePopulator:
64+
image:
65+
repository: <YOUR_REGISTRY>/kueue-populator
66+
tag: latest
67+
```
68+
69+
```bash
70+
helm install kueue-populator ./charts/kueue-populator --namespace kueue-system --create-namespace --wait -f my-values.yaml
71+
```
72+
73+
## Configuration
74+
75+
### Kueue Populator Configuration
76+
77+
The following table lists the configurable parameters under the `kueuePopulator` key in `values.yaml`:
78+
79+
| Key | Type | Default | Description |
80+
| -------------------------------------------------- | -------- | ----------------- | ---------------------------------------------------------------------------------------------------------- |
81+
| `image.repository` | string | `null` | **Required.** Image repository for the populator (e.g., `<YOUR_REGISTRY>/kueue-populator`) |
82+
| `image.tag` | string | `null` | **Required.** Image tag for the populator (e.g., `latest`) |
83+
| `image.pullPolicy` | string | `IfNotPresent` | Image pull policy |
84+
| `config.localQueue.name` | string | `default` | Name of the default LocalQueue to create in namespaces |
85+
| `config.clusterQueue.name` | string | `cluster-queue` | Name of the default ClusterQueue to create and reference in LocalQueues |
86+
| `config.clusterQueue.resources` | list | (see values.yaml) | Resources to configure in the default ResourceFlavor and ClusterQueue |
87+
| `config.topology.levels` | list | `[]` | Optional list of node labels for Topology Aware Scheduling levels. Enables Topology creation. |
88+
| `config.resourceFlavor.nodeLabels` | object | `{}` | Node labels to associate with the default ResourceFlavor. |
89+
| `config.managedJobsNamespaceSelector` | object | (see values.yaml) | Label selector to filter namespaces where the default LocalQueue will be created. Excludes system namespaces. |
90+
91+
### Kueue Subchart Configuration
92+
93+
This chart includes the official `kueue` chart as a dependency. You can configure it under the `kueue` key in `values.yaml`. Key overrides included in this chart:
94+
95+
- `kueue.enabled: true`: Enables the subchart installation.
96+
- `kueue.controllerManager.featureGates`: Enables `TopologyAwareScheduling`.
97+
- `kueue.managerConfig.controllerManagerConfigYaml`: Provides minimal necessary overrides for `apiVersion` and `managedJobsNamespaceSelector` to ensure compatibility and safe hook execution.
98+
99+
See the [Kueue chart README](https://github.com/kubernetes-sigs/kueue/blob/main/charts/kueue/README.md) for all possible Kueue configuration options.
100+
101+
## Testing
102+
103+
### Unit Tests & Linting
104+
105+
You can run unit tests and linting locally without a cluster using the Makefile targets:
106+
107+
```bash
108+
# Run Helm lint
109+
make helm-lint
110+
111+
# Verify Helm template rendering
112+
make helm-verify
113+
114+
# Run Helm unit tests (requires helm-unittest plugin)
115+
make helm-unit-test
116+
```
117+
118+
### Integration Tests
119+
120+
This chart includes tests to verify the installation. Assuming you have installed the chart as `kueue-populator` in the `kueue-system` namespace, you can run the tests using:
121+
122+
```bash
123+
helm test kueue-populator --namespace kueue-system
124+
```
125+
126+
Or using the Makefile target (requires active cluster):
127+
128+
```bash
129+
make helm-test
130+
```
131+
132+
This will launch a few test pods that check for the health of the deployments and the existence of the expected resources.
133+
134+
## Uninstallation
135+
136+
To uninstall the chart:
137+
138+
```bash
139+
helm uninstall kueue-populator --namespace kueue-system
140+
```
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: {{ .Release.Name }}-kueue-populator
5+
namespace: {{ .Release.Namespace }}
6+
---
7+
apiVersion: rbac.authorization.k8s.io/v1
8+
kind: ClusterRole
9+
metadata:
10+
name: {{ .Release.Name }}-kueue-populator-role
11+
rules:
12+
- apiGroups:
13+
- ""
14+
resources:
15+
- events
16+
verbs:
17+
- create
18+
- patch
19+
- apiGroups:
20+
- ""
21+
resources:
22+
- namespaces
23+
verbs:
24+
- get
25+
- list
26+
- watch
27+
- apiGroups:
28+
- kueue.x-k8s.io
29+
resources:
30+
- clusterqueues
31+
verbs:
32+
- get
33+
- list
34+
- watch
35+
- apiGroups:
36+
- kueue.x-k8s.io
37+
resources:
38+
- localqueues
39+
verbs:
40+
- create
41+
- get
42+
- list
43+
- watch
44+
---
45+
apiVersion: rbac.authorization.k8s.io/v1
46+
kind: ClusterRoleBinding
47+
metadata:
48+
name: {{ .Release.Name }}-kueue-populator-role-binding
49+
roleRef:
50+
apiGroup: rbac.authorization.k8s.io
51+
kind: ClusterRole
52+
name: {{ .Release.Name }}-kueue-populator-role
53+
subjects:
54+
- kind: ServiceAccount
55+
name: {{ .Release.Name }}-kueue-populator
56+
namespace: {{ .Release.Namespace }}
57+
---
58+
apiVersion: v1
59+
kind: ConfigMap
60+
metadata:
61+
name: {{ .Release.Name }}-kueue-populator-config
62+
namespace: {{ .Release.Namespace }}
63+
data:
64+
controller_manager_config.yaml: |
65+
localQueueName: {{ .Values.kueuePopulator.config.localQueue.name }}
66+
{{- if .Values.kueuePopulator.config.managedJobsNamespaceSelector }}
67+
managedJobsNamespaceSelector:
68+
{{- toYaml .Values.kueuePopulator.config.managedJobsNamespaceSelector | nindent 6 }}
69+
{{- end }}
70+
---
71+
apiVersion: apps/v1
72+
kind: Deployment
73+
metadata:
74+
name: {{ .Release.Name }}-kueue-populator
75+
namespace: {{ .Release.Namespace }}
76+
labels:
77+
app.kubernetes.io/name: kueue-populator
78+
app.kubernetes.io/instance: {{ .Release.Name }}
79+
control-plane: kueue-populator
80+
spec:
81+
replicas: 1
82+
selector:
83+
matchLabels:
84+
control-plane: kueue-populator
85+
app.kubernetes.io/name: kueue-populator
86+
app.kubernetes.io/instance: {{ .Release.Name }}
87+
template:
88+
metadata:
89+
labels:
90+
control-plane: kueue-populator
91+
app.kubernetes.io/name: kueue-populator
92+
app.kubernetes.io/instance: {{ .Release.Name }}
93+
spec:
94+
serviceAccountName: {{ .Release.Name }}-kueue-populator
95+
securityContext:
96+
runAsNonRoot: true
97+
seccompProfile:
98+
type: RuntimeDefault
99+
containers:
100+
- args:
101+
- "--config=/etc/kueue/controller_manager_config.yaml"
102+
- "--zap-log-level=2"
103+
image: "{{ .Values.kueuePopulator.image.repository }}:{{ .Values.kueuePopulator.image.tag }}"
104+
imagePullPolicy: {{ .Values.kueuePopulator.image.pullPolicy }}
105+
name: manager
106+
securityContext:
107+
allowPrivilegeEscalation: false
108+
readOnlyRootFilesystem: true
109+
capabilities:
110+
drop:
111+
- ALL
112+
livenessProbe:
113+
httpGet:
114+
path: /healthz
115+
port: 8081
116+
initialDelaySeconds: 15
117+
periodSeconds: 20
118+
readinessProbe:
119+
httpGet:
120+
path: /readyz
121+
port: 8081
122+
initialDelaySeconds: 5
123+
periodSeconds: 10
124+
resources:
125+
limits:
126+
cpu: 100m
127+
memory: 100Mi
128+
requests:
129+
cpu: 100m
130+
memory: 100Mi
131+
volumeMounts:
132+
- mountPath: /etc/kueue
133+
name: manager-config
134+
{{- with .Values.kueuePopulator.imagePullSecrets }}
135+
imagePullSecrets:
136+
{{- toYaml . | nindent 8 }}
137+
{{- end }}
138+
terminationGracePeriodSeconds: 10
139+
volumes:
140+
- configMap:
141+
name: {{ .Release.Name }}-kueue-populator-config
142+
name: manager-config

0 commit comments

Comments
 (0)