Skip to content

Commit 64a3282

Browse files
committed
Fix exit non-zero exit codes when running as pid1
(v4 branch) Prior to this we would swallow the exit code and always exit(0).
1 parent 6f5c1a9 commit 64a3282

File tree

5 files changed

+71
-30
lines changed

5 files changed

+71
-30
lines changed

cmd/git-sync/main.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -269,12 +269,9 @@ func main() {
269269
// In case we come up as pid 1, act as init.
270270
if os.Getpid() == 1 {
271271
fmt.Fprintf(os.Stderr, "INFO: detected pid 1, running init handler\n")
272-
err := pid1.ReRun()
272+
code, err := pid1.ReRun()
273273
if err == nil {
274-
os.Exit(0)
275-
}
276-
if exerr, ok := err.(*exec.ExitError); ok {
277-
os.Exit(exerr.ExitCode())
274+
os.Exit(code)
278275
}
279276
fmt.Fprintf(os.Stderr, "ERROR: unhandled pid1 error: %v\n", err)
280277
os.Exit(127)

pkg/pid1/pid1.go

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,60 +11,73 @@ import (
1111
// ReRun converts the current process into a bare-bones init, runs the current
1212
// commandline as a child process, and waits for it to complete. The new child
1313
// process shares stdin/stdout/stderr with the parent. When the child exits,
14-
// this will return the same value as exec.Command.Run(). If there is an error
15-
// in reaping children that this can not handle, it will panic.
16-
func ReRun() error {
14+
// this will return the exit code. If this returns an error, the child process
15+
// may not be terminated.
16+
func ReRun() (int, error) {
1717
bin, err := os.Readlink("/proc/self/exe")
1818
if err != nil {
19-
return err
19+
return 0, err
2020
}
2121
cmd := exec.Command(bin, os.Args[1:]...)
2222
cmd.Stdin = os.Stdin
2323
cmd.Stdout = os.Stdout
2424
cmd.Stderr = os.Stderr
2525
if err := cmd.Start(); err != nil {
26-
return err
26+
return 0, err
2727
}
28-
runInit(cmd.Process.Pid)
29-
return nil
28+
return runInit(cmd.Process.Pid)
3029
}
3130

32-
// runInit runs a bare-bones init process. This will return when firstborn
33-
// exits. In case of truly unknown errors it will panic.
34-
func runInit(firstborn int) {
31+
// runInit runs a bare-bones init process. When firstborn exits, this will
32+
// return the exit code. If this returns an error, the child process may not
33+
// be terminated.
34+
func runInit(firstborn int) (int, error) {
3535
sigs := make(chan os.Signal, 8)
3636
signal.Notify(sigs)
3737
for sig := range sigs {
3838
if sig != syscall.SIGCHLD {
3939
// Pass it on to the real process.
40-
syscall.Kill(firstborn, sig.(syscall.Signal))
40+
if err := syscall.Kill(firstborn, sig.(syscall.Signal)); err != nil {
41+
return 0, err
42+
}
4143
}
4244
// Always try to reap a child - empirically, sometimes this gets missed.
43-
if sigchld(firstborn) {
44-
return
45+
die, status, err := sigchld(firstborn)
46+
if err != nil {
47+
return 0, err
48+
}
49+
if die {
50+
if status.Signaled() {
51+
return 128 + int(status.Signal()), nil
52+
}
53+
if status.Exited() {
54+
return status.ExitStatus(), nil
55+
}
56+
return 0, fmt.Errorf("unhandled exit status: 0x%x\n", status)
4557
}
4658
}
59+
return 0, fmt.Errorf("signal handler terminated unexpectedly")
4760
}
4861

49-
// sigchld handles a SIGCHLD. This will return true when firstborn exits. In
50-
// case of truly unknown errors it will panic.
51-
func sigchld(firstborn int) bool {
62+
// sigchld handles a SIGCHLD. This will return true only when firstborn exits.
63+
// For any other process thihs will return false and a 0 status.
64+
func sigchld(firstborn int) (bool, syscall.WaitStatus, error) {
5265
// Loop to handle multiple child processes.
5366
for {
5467
var status syscall.WaitStatus
5568
pid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil)
5669
if err != nil {
57-
panic(fmt.Sprintf("failed to wait4(): %v\n", err))
70+
return false, 0, fmt.Errorf("wait4(): %v\n", err)
5871
}
5972

6073
if pid == firstborn {
61-
return true
74+
return true, status, nil
6275
}
6376
if pid <= 0 {
6477
// No more children to reap.
6578
break
6679
}
6780
// Must have found one, see if there are more.
6881
}
69-
return false
82+
return false, 0, nil
7083
}

pkg/pid1/test/fast-exit/main.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package main
44
import (
55
"fmt"
66
"os"
7-
"os/exec"
87

98
"k8s.io/git-sync/pkg/pid1"
109
)
@@ -13,12 +12,9 @@ func main() {
1312
// In case we come up as pid 1, act as init.
1413
if os.Getpid() == 1 {
1514
fmt.Printf("detected pid 1, running as init\n")
16-
err := pid1.ReRun()
15+
code, err := pid1.ReRun()
1716
if err == nil {
18-
os.Exit(0)
19-
}
20-
if exerr, ok := err.(*exec.ExitError); ok {
21-
os.Exit(exerr.ExitCode())
17+
os.Exit(code)
2218
}
2319
fmt.Printf("unhandled pid1 error: %v\n", err)
2420
os.Exit(127)

pkg/pid1/test/fast-exit/test.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33
go build
44
docker build -t example.com/fast-exit .
55

6+
# This should exit(42)
7+
docker run -ti --rm example.com/fast-exit
8+
RET=$?
9+
if [ "$RET" != 42 ]; then
10+
echo "FAIL: exit code was not preserved: $RET"
11+
exit 1
12+
fi
13+
614
# In the past we have observed hangs and missed signals. This *should* run
715
# forever.
816
while true; do

test_e2e.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,33 @@ assert_file_eq "$ROOT"/link/file "$TESTCASE"
254254
# Wrap up
255255
pass
256256

257+
##############################################
258+
# Test non-zero exit
259+
##############################################
260+
testcase "non-zero-exit"
261+
echo "$TESTCASE" > "$REPO"/file
262+
git -C "$REPO" commit -qam "$TESTCASE"
263+
ln -s "$ROOT" "$DIR/rootlink" # symlink to test
264+
(
265+
set +o errexit
266+
GIT_SYNC \
267+
--one-time \
268+
--repo="file://$REPO" \
269+
--branch=e2e-branch \
270+
--rev=does-not-exit \
271+
--root="$DIR/rootlink" \
272+
--link="link" \
273+
> "$DIR"/log."$TESTCASE" 2>&1
274+
RET=$?
275+
if [[ "$RET" != 1 ]]; then
276+
fail "expected exit code 1, got $RET"
277+
fi
278+
assert_file_absent "$ROOT"/link
279+
assert_file_absent "$ROOT"/link/file
280+
)
281+
# Wrap up
282+
pass
283+
257284
##############################################
258285
# Test default syncing (master)
259286
##############################################

0 commit comments

Comments
 (0)