Skip to content

Commit b1fa43a

Browse files
committed
ci: isolate GPU runners, respect CUDA_VISIBLE_DEVICES, drop global prunes
* Pass CUDA_VISIBLE_DEVICES and use --gpus "device=${CUDA_VISIBLE_DEVICES:-all}" for NVIDIA jobs * Tag images/containers with ${{ runner.name }} to avoid cross-runner collisions * Remove docker system prune on shared nvidiagpu hosts (keep cache, avoid races) * Add runner label to NVIDIA base builds for traceable cleanup * Minor YAML tidy/comments across workflows
1 parent b54ca3f commit b1fa43a

File tree

3 files changed

+132
-111
lines changed

3 files changed

+132
-111
lines changed

.github/workflows/docker-bases.yml

Lines changed: 53 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ concurrency:
77
on:
88
push:
99
paths:
10-
- '/docker/Dockerfile.nvidia'
11-
- '/docker/Dockerfile.cpu'
12-
- '/docker/Dockerfile.amd'
13-
- '/docker/Dockerfile.intel'
10+
- "/docker/Dockerfile.nvidia"
11+
- "/docker/Dockerfile.cpu"
12+
- "/docker/Dockerfile.amd"
13+
- "/docker/Dockerfile.intel"
1414
workflow_dispatch:
1515
inputs:
1616
cpu:
@@ -27,15 +27,15 @@ on:
2727
default: false
2828

2929
tags:
30-
description: 'Build compiler bases'
30+
description: "Build compiler bases"
3131
schedule:
3232
# Run once a month
3333
- cron: "0 0 1 * *"
3434

3535
jobs:
36-
#######################################################
37-
############## Basic gcc CPU ##########################
38-
#######################################################
36+
#######################################################
37+
############## Basic gcc CPU ##########################
38+
#######################################################
3939
deploy-cpu-bases:
4040
if: inputs.cpu
4141
name: "cpu-base"
@@ -66,22 +66,18 @@ jobs:
6666
username: ${{ secrets.DOCKER_USERNAME }}
6767
password: ${{ secrets.DOCKER_PASSWORD }}
6868

69-
- name: cleanup
70-
run: docker system prune -a -f
71-
7269
- name: GCC image
7370
uses: docker/build-push-action@v6
7471
with:
7572
context: .
76-
file: './docker/Dockerfile.cpu'
73+
file: "./docker/Dockerfile.cpu"
7774
push: true
78-
build-args: 'gcc=${{ matrix.gcc }}'
79-
tags: 'devitocodes/bases:cpu-gcc${{ matrix.gcc }}'
80-
75+
build-args: "gcc=${{ matrix.gcc }}"
76+
tags: "devitocodes/bases:cpu-gcc${{ matrix.gcc }}"
8177

82-
#######################################################
83-
############## Intel OneApi CPU #######################
84-
#######################################################
78+
#######################################################
79+
############## Intel OneApi CPU #######################
80+
#######################################################
8581
deploy-oneapi-bases:
8682
if: inputs.intel
8783
name: "oneapi-base"
@@ -107,43 +103,39 @@ jobs:
107103
with:
108104
username: ${{ secrets.DOCKER_USERNAME }}
109105
password: ${{ secrets.DOCKER_PASSWORD }}
110-
111-
- name: cleanup
112-
run: docker system prune -a -f
113-
114106
- name: ICX image
115107
uses: docker/build-push-action@v6
116108
with:
117109
context: .
118-
file: './docker/Dockerfile.intel'
110+
file: "./docker/Dockerfile.intel"
119111
push: true
120-
target: 'icx'
121-
build-args: 'arch=icx'
122-
tags: 'devitocodes/bases:cpu-icx'
112+
target: "icx"
113+
build-args: "arch=icx"
114+
tags: "devitocodes/bases:cpu-icx"
123115

124116
- name: SYCL CPU image
125117
uses: docker/build-push-action@v6
126118
with:
127119
context: .
128-
file: './docker/Dockerfile.intel'
120+
file: "./docker/Dockerfile.intel"
129121
push: true
130-
target: 'cpu-sycl'
131-
build-args: 'arch=cpu-sycl'
132-
tags: 'devitocodes/bases:cpu-sycl'
122+
target: "cpu-sycl"
123+
build-args: "arch=cpu-sycl"
124+
tags: "devitocodes/bases:cpu-sycl"
133125

134126
- name: SYCL GPU image
135127
uses: docker/build-push-action@v6
136128
with:
137129
context: .
138-
file: './docker/Dockerfile.intel'
130+
file: "./docker/Dockerfile.intel"
139131
push: true
140-
target: 'gpu-sycl'
141-
build-args: 'arch=gpu-sycl'
142-
tags: 'devitocodes/bases:gpu-sycl'
132+
target: "gpu-sycl"
133+
build-args: "arch=gpu-sycl"
134+
tags: "devitocodes/bases:gpu-sycl"
143135

144-
#######################################################
145-
################### Nvidia nvhpc ######################
146-
#######################################################
136+
#######################################################
137+
################### Nvidia nvhpc ######################
138+
#######################################################
147139
deploy-nvidia-bases:
148140
if: inputs.nvidia
149141
name: "nvidia-bases"
@@ -170,42 +162,43 @@ jobs:
170162
username: ${{ secrets.DOCKER_USERNAME }}
171163
password: ${{ secrets.DOCKER_PASSWORD }}
172164

173-
- name: cleanup
174-
run: docker system prune -a -f
175-
176165
- name: NVC image
177166
uses: docker/build-push-action@v6
178167
with:
179168
context: .
180-
file: './docker/Dockerfile.nvidia'
169+
file: "./docker/Dockerfile.nvidia"
181170
push: true
182-
target: 'nvc'
183-
build-args: 'arch=nvc'
184-
tags: 'devitocodes/bases:nvidia-nvc'
171+
target: "nvc"
172+
build-args: "arch=nvc"
173+
# Label (not tag) with runner name for traceability without changing image tags
174+
labels: builder-runner=${{ runner.name }}
175+
tags: "devitocodes/bases:nvidia-nvc"
185176

186177
- name: NVCC image
187178
uses: docker/build-push-action@v6
188179
with:
189180
context: .
190-
file: './docker/Dockerfile.nvidia'
181+
file: "./docker/Dockerfile.nvidia"
191182
push: true
192-
target: 'nvcc'
193-
build-args: 'arch=nvcc'
194-
tags: 'devitocodes/bases:nvidia-nvcc'
183+
target: "nvcc"
184+
build-args: "arch=nvcc"
185+
labels: builder-runner=${{ runner.name }}
186+
tags: "devitocodes/bases:nvidia-nvcc"
195187

196188
- name: NVC host image
197189
uses: docker/build-push-action@v6
198190
with:
199191
context: .
200-
file: './docker/Dockerfile.nvidia'
192+
file: "./docker/Dockerfile.nvidia"
201193
push: true
202-
target: 'nvc-host'
203-
build-args: 'arch=nvc-host'
204-
tags: 'devitocodes/bases:cpu-nvc'
205-
206-
#######################################################
207-
##################### AMD #############################
208-
#######################################################
194+
target: "nvc-host"
195+
build-args: "arch=nvc-host"
196+
labels: builder-runner=${{ runner.name }}
197+
tags: "devitocodes/bases:cpu-nvc"
198+
199+
#######################################################
200+
##################### AMD #############################
201+
#######################################################
209202
deploy-amd-bases:
210203
if: inputs.amd
211204
name: "amd-base"
@@ -232,16 +225,13 @@ jobs:
232225
username: ${{ secrets.DOCKER_USERNAME }}
233226
password: ${{ secrets.DOCKER_PASSWORD }}
234227

235-
- name: cleanup
236-
run: docker system prune -a -f
237-
238228
- name: AMD image
239229
uses: docker/build-push-action@v6
240230
with:
241231
context: .
242-
file: './docker/Dockerfile.amd'
232+
file: "./docker/Dockerfile.amd"
243233
push: true
244-
target: 'amdclang'
234+
target: "amdclang"
245235
build-args: |
246236
ROCM_VERSION=5.5.1
247237
UCX_BRANCH=v1.13.1
@@ -252,9 +242,9 @@ jobs:
252242
uses: docker/build-push-action@v6
253243
with:
254244
context: .
255-
file: './docker/Dockerfile.amd'
245+
file: "./docker/Dockerfile.amd"
256246
push: true
257-
target: 'hip'
247+
target: "hip"
258248
build-args: |
259249
ROCM_VERSION=6.3.4
260250
tags: devitocodes/bases:amd-hip

.github/workflows/docker-devito.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,22 @@ jobs:
1313
env:
1414
# Use buildkit https://docs.docker.com/develop/develop-images/build_enhancements/ for better build
1515
DOCKER_BUILDKIT: "1"
16+
# Unique container name to avoid clashes across concurrent self‑hosted runners
17+
CONTAINER_NAME: testrun-${{ matrix.tag }}-${{ runner.name }}
18+
1619

1720
strategy:
1821
fail-fast: false
1922
matrix:
2023
include:
2124
- base: 'bases:nvidia-nvc'
2225
tag: 'nvidia-nvc'
23-
flag: '--init --gpus all'
26+
# Respect CUDA_VISIBLE_DEVICES set by the runner and hard‑limit docker to that device.
27+
# (--env without value forwards host var; --gpus maps only that device)
28+
flag: |
29+
--init
30+
--env CUDA_VISIBLE_DEVICES
31+
--gpus "device=${CUDA_VISIBLE_DEVICES:-all}"
2432
test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py'
2533
runner: ["self-hosted", "nvidiagpu"]
2634

@@ -102,8 +110,11 @@ jobs:
102110
build-args: base=devitocodes/${{ matrix.base }}
103111

104112
- name: Remove dangling layers
113+
if: ${{ !contains(matrix.runner, 'nvidiagpu') }}
105114
run: docker system prune -f
106115

107116
- name: Run tests
108117
run: |
109-
docker run ${{ matrix.flag }} --rm -t --name testrun 'devitocodes/devito:${{ matrix.tag }}-dev' pytest ${{ matrix.test }}
118+
docker run ${{ matrix.flag }} --rm -t --name "${CONTAINER_NAME}" \
119+
devitocodes/devito:${{ matrix.tag }}-dev \
120+
pytest ${{ matrix.test }}

0 commit comments

Comments
 (0)