Skip to content

Commit f11a1f5

Browse files
authored
[RayCluster] yunikorn batchscheduler respect gang scheduling (#4075)
Signed-off-by: fscnick <[email protected]>
1 parent 9a4de56 commit f11a1f5

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_scheduler_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,12 @@ func TestPopulateGangSchedulingAnnotations(t *testing.T) {
142142
// memory: 10Gi
143143
// nvidia.com/gpu: 1
144144
addWorkerPodSpec(rayClusterWithGangScheduling,
145-
"worker-group-1", 1, 1, 2, v1.ResourceList{
145+
"worker-group-1", 2, 2, 2, v1.ResourceList{
146146
v1.ResourceCPU: resource.MustParse("2"),
147147
v1.ResourceMemory: resource.MustParse("10Gi"),
148148
"nvidia.com/gpu": resource.MustParse("1"),
149149
})
150+
rayClusterWithGangScheduling.Spec.WorkerGroupSpecs[0].NumOfHosts = 3
150151

151152
// gang-scheduling enabled case, the plugin should populate the taskGroup annotation to the app
152153
rayPod := createPod("ray-pod", "default")
@@ -173,7 +174,7 @@ func TestPopulateGangSchedulingAnnotations(t *testing.T) {
173174
// verify the correctness of worker group
174175
workerGroup := taskGroups.getTaskGroup("worker-group-1")
175176
assert.NotNil(t, workerGroup)
176-
assert.Equal(t, int32(1), workerGroup.MinMember)
177+
assert.Equal(t, int32(6), workerGroup.MinMember)
177178
assert.Equal(t, resource.MustParse("2"), workerGroup.MinResource[v1.ResourceCPU.String()])
178179
assert.Equal(t, resource.MustParse("10Gi"), workerGroup.MinResource[v1.ResourceMemory.String()])
179180
assert.Equal(t, resource.MustParse("1"), workerGroup.MinResource["nvidia.com/gpu"])

ray-operator/controllers/ray/batchscheduler/yunikorn/yunikorn_task_groups.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,11 @@ func newTaskGroupsFromApp(app *v1.RayCluster) *TaskGroups {
5252
// worker groups
5353
for _, workerGroupSpec := range app.Spec.WorkerGroupSpecs {
5454
workerMinResource := utils.CalculatePodResource(workerGroupSpec.Template.Spec)
55-
minWorkers := workerGroupSpec.MinReplicas
55+
minWorkers := (*workerGroupSpec.MinReplicas) * workerGroupSpec.NumOfHosts
5656
taskGroups.addTaskGroup(
5757
TaskGroup{
5858
Name: workerGroupSpec.GroupName,
59-
MinMember: *minWorkers,
59+
MinMember: minWorkers,
6060
MinResource: utils.ConvertResourceListToMapString(workerMinResource),
6161
NodeSelector: workerGroupSpec.Template.Spec.NodeSelector,
6262
Tolerations: workerGroupSpec.Template.Spec.Tolerations,

0 commit comments

Comments
 (0)