Skip to content

Commit 83c5782

Browse files
Merge pull request #212 from circleci/cs/flaky-zombie-test
[ONPREM-2536] Fix child process cleanup on Windows
2 parents c035178 + 4179307 commit 83c5782

File tree

6 files changed

+98
-15
lines changed

6 files changed

+98
-15
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ By following these guidelines, we can easily determine which changes should be i
1010

1111
## Edge
1212

13+
- [#212](https://github.com/circleci/runner-init/pull/212) Fix child process cleanup on Windows using job objects. This ensures that child processes are destroyed when the parent process (task-agent) terminates.
1314
- [#197](https://github.com/circleci/runner-init/pull/197) Fix `%PATH%` on Windows by using the OS-specific path list separator.
1415
- [#133](https://github.com/circleci/runner-init/pull/133) Don't re-handle task errors. If GOAT handles a task error (either with an infra-fail or retry), don't exit with a nonzero status code. Doing so causes container agent to overwrite the original error message in the UI.
1516
- [#98](https://github.com/circleci/runner-init/pull/98) [INTERNAL] A small refactor to the builds and Dockerfiles in preparation for adding Windows support.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ require (
623623
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
624624
golang.org/x/net v0.43.0 // indirect
625625
golang.org/x/sync v0.16.0 // indirect
626-
golang.org/x/sys v0.35.0 // indirect
626+
golang.org/x/sys v0.35.0
627627
golang.org/x/text v0.28.0 // indirect
628628
google.golang.org/genproto/googleapis/api v0.0.0-20250728155136-f173205681a0 // indirect
629629
google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 // indirect

task/cmd/cmd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func New(ctx context.Context, cmd []string, forwardSignals bool, user string, en
3333
func (c *Command) Start() error {
3434
cmd := c.cmd
3535

36-
if err := cmd.Start(); err != nil {
36+
if err := c.start(); err != nil {
3737
return err
3838
}
3939

task/cmd/cmd_unix.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ func additionalSetup(_ context.Context, cmd *exec.Cmd) {
4949
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
5050
}
5151
}
52+
53+
func (c *Command) start() error {
54+
return c.cmd.Start()
55+
}

task/cmd/cmd_windows.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,20 @@ import (
55
"os"
66
"os/exec"
77
"os/signal"
8+
"sync"
9+
"sync/atomic"
10+
"unsafe"
811

912
"github.com/circleci/ex/o11y"
13+
"golang.org/x/sys/windows"
1014
)
1115

1216
func forwardSignals(cmd *exec.Cmd) {
1317
ch := make(chan os.Signal, 1)
1418
signal.Notify(ch, os.Interrupt)
1519

1620
go func() {
21+
defer signal.Stop(ch)
1722
for range ch {
1823
_ = cmd.Process.Kill()
1924
}
@@ -24,4 +29,67 @@ func switchUser(ctx context.Context, _ *exec.Cmd, user string) {
2429
o11y.Log(ctx, "switching users is unsupported on windows", o11y.Field("username", user))
2530
}
2631

27-
func additionalSetup(_ context.Context, _ *exec.Cmd) {}
32+
func additionalSetup(_ context.Context, cmd *exec.Cmd) {
33+
cmd.SysProcAttr.CreationFlags = windows.CREATE_NEW_PROCESS_GROUP
34+
}
35+
36+
func (c *Command) start() error {
37+
g, err := newProcessExitGroup()
38+
if err != nil {
39+
return err
40+
}
41+
42+
c.cmd.Cancel = func() error {
43+
return g.Dispose()
44+
}
45+
46+
if err := c.cmd.Start(); err != nil {
47+
return err
48+
}
49+
50+
return g.AddProcess(c.cmd.Process)
51+
}
52+
53+
// Process struct matches os.Process layout to extract handle via unsafe pointer
54+
// https://stackoverflow.com/a/56739249
55+
type process struct {
56+
Pid int
57+
_ uint8
58+
_ atomic.Uint64
59+
_ sync.RWMutex
60+
Handle uintptr
61+
}
62+
63+
type processExitGroup windows.Handle
64+
65+
func newProcessExitGroup() (processExitGroup, error) {
66+
handle, err := windows.CreateJobObject(nil, nil)
67+
if err != nil {
68+
return 0, err
69+
}
70+
71+
info := windows.JOBOBJECT_EXTENDED_LIMIT_INFORMATION{
72+
BasicLimitInformation: windows.JOBOBJECT_BASIC_LIMIT_INFORMATION{
73+
LimitFlags: windows.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE,
74+
},
75+
}
76+
if _, err := windows.SetInformationJobObject(
77+
handle,
78+
windows.JobObjectExtendedLimitInformation,
79+
uintptr(unsafe.Pointer(&info)),
80+
uint32(unsafe.Sizeof(info))); err != nil {
81+
return 0, err
82+
}
83+
84+
return processExitGroup(handle), nil
85+
}
86+
87+
func (g processExitGroup) Dispose() error {
88+
return windows.CloseHandle(windows.Handle(g))
89+
}
90+
91+
func (g processExitGroup) AddProcess(p *os.Process) error {
92+
return windows.AssignProcessToJobObject(
93+
windows.Handle(g),
94+
windows.Handle((*process)(unsafe.Pointer(p)).Handle))
95+
}

task/orchestrator_test.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package task
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"io"
78
"net/http/httptest"
@@ -111,23 +112,26 @@ func TestOrchestrator(t *testing.T) {
111112
pid, err := strconv.Atoi(strings.TrimSpace(string(b)))
112113
assert.NilError(t, err)
113114

115+
time.Sleep(20 * time.Second)
116+
114117
check := func(poll.LogT) poll.Result {
118+
t.Logf("Checking for PID: %d", pid)
115119
p, err := os.FindProcess(pid)
116120
if runtime.GOOS == "windows" {
117-
assert.Check(t, cmp.Nil(p))
118-
} else {
119-
assert.NilError(t, err)
120-
121+
if err != nil {
122+
assert.Check(t, cmp.ErrorContains(err, "OpenProcess: The parameter is incorrect."),
123+
"an error implies the process is dead")
124+
return poll.Success()
125+
}
126+
} else if err == nil {
121127
err = p.Signal(syscall.Signal(0))
122-
assert.Check(t, cmp.ErrorIs(err, os.ErrProcessDone))
123-
}
124-
125-
if t.Failed() {
126-
return poll.Continue("process checks failed")
128+
if errors.Is(err, os.ErrProcessDone) {
129+
return poll.Success()
130+
}
127131
}
128-
return poll.Success()
132+
return poll.Continue("process checks failed")
129133
}
130-
poll.WaitOn(t, check, poll.WithTimeout(1*time.Minute))
134+
poll.WaitOn(t, check, poll.WithTimeout(2*time.Minute))
131135
},
132136
},
133137
},
@@ -402,7 +406,13 @@ func beFakeTaskAgent(t *testing.T) {
402406
}
403407

404408
if pidfile := os.Getenv("SIMULATE_A_ZOMBIE_PROCESS"); pidfile != "" {
405-
c := exec.Command(shell(t), "-c", "echo $$ >"+pidfile+" && sleep 300") //nolint:gosec // this is a test
409+
pidCmd := "echo $$ >"
410+
if runtime.GOOS == "windows" {
411+
// https://stackoverflow.com/a/77115597
412+
pidCmd = "ps -p $$ | awk 'NR ==2{print $4}' >"
413+
}
414+
pidCmd += pidfile + " && sleep 300"
415+
c := exec.Command(shell(t), "-c", pidCmd) //nolint:gosec // this is a test
406416
assert.NilError(t, c.Start())
407417
}
408418

0 commit comments

Comments
 (0)