Skip to content

Commit 9e53f36

Browse files
committed
resource reservations
1 parent a9a720f commit 9e53f36

File tree

17 files changed

+2533
-152
lines changed

17 files changed

+2533
-152
lines changed

cmd/uncloud/service/run.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ type runOptions struct {
2626
image string
2727
machines []string
2828
memory dockeropts.MemBytes
29+
memoryReservation dockeropts.MemBytes
30+
cpuReservation dockeropts.NanoCPUs
2931
mode string
3032
name string
3133
privileged bool
@@ -62,6 +64,8 @@ func NewRunCommand() *cobra.Command {
6264
cmd.Flags().VarP(&opts.cpu, "cpu", "",
6365
"Maximum number of CPU cores a service container can use. Fractional values are allowed: "+
6466
"0.5 for half a core or 2.25 for two and a quarter cores.")
67+
cmd.Flags().Var(&opts.cpuReservation, "reserve-cpu",
68+
"Minimum CPU cores to reserve for placement (nanocores). Fractional values are allowed, e.g. 0.5 for half a core.")
6569
cmd.Flags().StringVar(&opts.entrypoint, "entrypoint", "",
6670
"Overwrite the default ENTRYPOINT of the image. Pass an empty string \"\" to reset it.")
6771
cmd.Flags().StringSliceVarP(&opts.env, "env", "e", nil,
@@ -78,6 +82,8 @@ func NewRunCommand() *cobra.Command {
7882
"Maximum amount of memory a service container can use. Value is a positive integer with optional unit suffix "+
7983
"(b, k, m, g). Default unit is bytes if no suffix specified.\n"+
8084
"Examples: 1073741824, 1024m, 1g (all equal 1 gibibyte)")
85+
cmd.Flags().Var(&opts.memoryReservation, "reserve-memory",
86+
"Minimum memory to reserve for placement. Value is a positive integer with optional unit suffix (b, k, m, g).")
8187
cmd.Flags().StringVarP(&opts.name, "name", "n", "",
8288
"Assign a name to the service. A random name is generated if not specified.")
8389
cmd.Flags().BoolVar(&opts.privileged, "privileged", false,
@@ -210,8 +216,10 @@ func prepareServiceSpec(opts runOptions) (api.ServiceSpec, error) {
210216
Privileged: opts.privileged,
211217
PullPolicy: opts.pull,
212218
Resources: api.ContainerResources{
213-
CPU: opts.cpu.Value(),
214-
Memory: opts.memory.Value(),
219+
CPU: opts.cpu.Value(),
220+
Memory: opts.memory.Value(),
221+
CPUReservation: opts.cpuReservation.Value(),
222+
MemoryReservation: opts.memoryReservation.Value(),
215223
},
216224
User: opts.user,
217225
VolumeMounts: mounts,

internal/machine/api/pb/machine.pb.go

Lines changed: 140 additions & 95 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/machine/api/pb/machine.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ message MachineInfo {
2525
string name = 2;
2626
NetworkConfig network = 3;
2727
IP public_ip = 4;
28+
// Resource capacity and reservations for scheduling.
29+
int64 total_cpu_nanos = 5; // Total CPU in nanocores (1e9 = 1 core)
30+
int64 total_memory_bytes = 6;
31+
int64 reserved_cpu_nanos = 7;
32+
int64 reserved_memory_bytes = 8;
2833
}
2934

3035
message NetworkConfig {

internal/machine/machine.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"strconv"
1616
"sync"
1717

18+
"github.com/docker/docker/api/types/container"
1819
"github.com/docker/docker/client"
1920
"github.com/docker/go-connections/sockets"
2021
"github.com/psviderski/uncloud/internal/corrosion"
@@ -864,16 +865,50 @@ func (m *Machine) Token(_ context.Context, _ *emptypb.Empty) (*pb.TokenResponse,
864865
return &pb.TokenResponse{Token: tokenStr}, nil
865866
}
866867

867-
func (m *Machine) Inspect(_ context.Context, _ *emptypb.Empty) (*pb.MachineInfo, error) {
868-
return &pb.MachineInfo{
868+
func (m *Machine) Inspect(ctx context.Context, _ *emptypb.Empty) (*pb.MachineInfo, error) {
869+
info := &pb.MachineInfo{
869870
Id: m.state.ID,
870871
Name: m.state.Name,
871872
Network: &pb.NetworkConfig{
872873
Subnet: pb.NewIPPrefix(m.state.Network.Subnet),
873874
ManagementIp: pb.NewIP(m.state.Network.ManagementIP),
874875
PublicKey: m.state.Network.PublicKey,
875876
},
876-
}, nil
877+
}
878+
879+
// Populate resource capacity and reservations for scheduling.
880+
if m.dockerService != nil {
881+
if err := m.populateResources(ctx, info); err != nil {
882+
slog.Warn("Failed to populate machine resources.", "err", err)
883+
}
884+
}
885+
886+
return info, nil
887+
}
888+
889+
// populateResources fills in the resource capacity and reservation fields of MachineInfo.
890+
func (m *Machine) populateResources(ctx context.Context, info *pb.MachineInfo) error {
891+
// Get system info for total CPU and memory.
892+
dockerInfo, err := m.dockerService.Client.Info(ctx)
893+
if err != nil {
894+
return fmt.Errorf("get docker info: %w", err)
895+
}
896+
897+
info.TotalCpuNanos = int64(dockerInfo.NCPU) * 1e9
898+
info.TotalMemoryBytes = dockerInfo.MemTotal
899+
900+
// Sum up reserved resources from running containers.
901+
containers, err := m.dockerService.ListServiceContainers(ctx, "", container.ListOptions{})
902+
if err != nil {
903+
return fmt.Errorf("list containers: %w", err)
904+
}
905+
906+
for _, ctr := range containers {
907+
info.ReservedCpuNanos += ctr.ServiceSpec.Container.Resources.CPUReservation
908+
info.ReservedMemoryBytes += ctr.ServiceSpec.Container.Resources.MemoryReservation
909+
}
910+
911+
return nil
877912
}
878913

879914
// IsNetworkReady returns true if the Docker network is ready for containers.

pkg/api/resources.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@ const (
1212
type ContainerResources struct {
1313
// CPU is the maximum amount of CPU nanocores (1000000000 = 1 CPU core) the container can use.
1414
CPU int64
15+
// CPUReservation is the minimum amount of CPU nanocores the container needs to run efficiently.
16+
// Used by the scheduler to ensure machines have sufficient available CPU before placement.
17+
CPUReservation int64
1518
// Memory is the maximum amount of memory (in bytes) the container can use.
1619
Memory int64
1720
// MemoryReservation is the minimum amount of memory (in bytes) the container needs to run efficiently.
18-
// TODO: implement a placement constraint that checks available memory on machines.
21+
// Used by the scheduler to ensure machines have sufficient available memory before placement.
1922
MemoryReservation int64
2023
// Device reservations/requests for access to things like GPUs
2124
DeviceReservations []container.DeviceRequest

pkg/client/compose/service.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ func resourcesFromCompose(service types.ServiceConfig) api.ContainerResources {
140140
}
141141
}
142142
if service.Deploy.Resources.Reservations != nil {
143+
if service.Deploy.Resources.Reservations.NanoCPUs > 0 {
144+
// NanoCPUs is actually a CPU fraction, not nanocores.
145+
resources.CPUReservation = int64(service.Deploy.Resources.Reservations.NanoCPUs * 1e9)
146+
}
143147
if service.Deploy.Resources.Reservations.MemoryBytes > 0 {
144148
resources.MemoryReservation = int64(service.Deploy.Resources.Reservations.MemoryBytes)
145149
}

pkg/client/deploy/scheduler/constraint.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package scheduler
22

33
import (
4+
"fmt"
45
"reflect"
56
"slices"
67
"strings"
@@ -45,6 +46,15 @@ func constraintsFromSpec(spec api.ServiceSpec) []Constraint {
4546
})
4647
}
4748

49+
// Add resource constraint if CPU or memory reservations are specified.
50+
resources := spec.Container.Resources
51+
if resources.CPUReservation > 0 || resources.MemoryReservation > 0 {
52+
constraints = append(constraints, &ResourceConstraint{
53+
RequiredCPU: resources.CPUReservation,
54+
RequiredMemory: resources.MemoryReservation,
55+
})
56+
}
57+
4858
return constraints
4959
}
5060

@@ -138,3 +148,45 @@ func (c *VolumesConstraint) Description() string {
138148

139149
return "Volumes: " + strings.Join(volumeNames, ", ")
140150
}
151+
152+
// ResourceConstraint restricts container placement to machines that have sufficient available resources.
153+
// This is opt-in: if no reservations are set (both values are 0), the constraint always passes.
154+
type ResourceConstraint struct {
155+
// RequiredCPU is the CPU reservation in nanocores (1e9 = 1 core).
156+
RequiredCPU int64
157+
// RequiredMemory is the memory reservation in bytes.
158+
RequiredMemory int64
159+
}
160+
161+
// Evaluate determines if a machine has sufficient available resources.
162+
// Returns true if the machine has enough unreserved CPU and memory, or if no reservations are required.
163+
// This accounts for both running containers and containers scheduled during this planning session.
164+
func (c *ResourceConstraint) Evaluate(machine *Machine) bool {
165+
// If no reservations are set, constraint always passes (opt-in behavior).
166+
if c.RequiredCPU == 0 && c.RequiredMemory == 0 {
167+
return true
168+
}
169+
170+
if c.RequiredCPU > 0 && machine.AvailableCPU() < c.RequiredCPU {
171+
return false
172+
}
173+
if c.RequiredMemory > 0 && machine.AvailableMemory() < c.RequiredMemory {
174+
return false
175+
}
176+
return true
177+
}
178+
179+
func (c *ResourceConstraint) Description() string {
180+
if c.RequiredCPU == 0 && c.RequiredMemory == 0 {
181+
return "No resource constraint"
182+
}
183+
184+
var parts []string
185+
if c.RequiredCPU > 0 {
186+
parts = append(parts, fmt.Sprintf("CPU: %.2f cores", float64(c.RequiredCPU)/1e9))
187+
}
188+
if c.RequiredMemory > 0 {
189+
parts = append(parts, fmt.Sprintf("Memory: %d MB", c.RequiredMemory/(1024*1024)))
190+
}
191+
return "Resource reservation: " + strings.Join(parts, ", ")
192+
}

0 commit comments

Comments
 (0)