Skip to content

Commit 3ff9c4f

Browse files
author
Bryan C. Mills
committed
os/signal: make TestStop resilient to initially-blocked signals
For reasons unknown, SIGUSR1 appears to be blocked at process start for tests on the android-arm-corellium and android-arm64-corellium builders. (This has been observed before, too: see CL 203957.) Make the test resilient to blocked signals by always calling Notify and waiting for potential signal delivery after sending any signal that is not known to be unblocked. Also remove the initial SIGWINCH signal from testCancel. The behavior of an unhandled SIGWINCH is already tested in TestStop, so we don't need to re-test that same case: waiting for an unhandled signal takes a comparatively long time (because we necessarily don't know when it has been delivered), so this redundancy makes the overall test binary needlessly slow, especially since it is called from both TestReset and TestIgnore. Since each signal is always unblocked while we have a notification channel registered for it, we don't need to modify any other tests: TestStop and testCancel are the only functions that send signals without a registered channel. Fixes #38165 Updates #33174 Updates #15661 Change-Id: I215880894e954b62166024085050d34323431b63 Reviewed-on: https://go-review.googlesource.com/c/go/+/226461 Run-TryBot: Bryan C. Mills <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 2cb80bd commit 3ff9c4f

File tree

1 file changed

+49
-53
lines changed

1 file changed

+49
-53
lines changed

src/os/signal/signal_test.go

Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,6 @@ func TestStress(t *testing.T) {
152152
}
153153

154154
func testCancel(t *testing.T, ignore bool) {
155-
// Send SIGWINCH. By default this signal should be ignored.
156-
syscall.Kill(syscall.Getpid(), syscall.SIGWINCH)
157-
quiesce()
158-
159155
// Ask to be notified on c1 when a SIGWINCH is received.
160156
c1 := make(chan os.Signal, 1)
161157
Notify(c1, syscall.SIGWINCH)
@@ -175,25 +171,16 @@ func testCancel(t *testing.T, ignore bool) {
175171
waitSig(t, c2, syscall.SIGHUP)
176172

177173
// Ignore, or reset the signal handlers for, SIGWINCH and SIGHUP.
174+
// Either way, this should undo both calls to Notify above.
178175
if ignore {
179176
Ignore(syscall.SIGWINCH, syscall.SIGHUP)
177+
// Don't bother deferring a call to Reset: it is documented to undo Notify,
178+
// but its documentation says nothing about Ignore, and (as of the time of
179+
// writing) it empirically does not undo an Ignore.
180180
} else {
181181
Reset(syscall.SIGWINCH, syscall.SIGHUP)
182182
}
183183

184-
// At this point we do not expect any further signals on c1.
185-
// However, it is just barely possible that the initial SIGWINCH
186-
// at the start of this function was delivered after we called
187-
// Notify on c1. In that case the waitSig for SIGWINCH may have
188-
// picked up that initial SIGWINCH, and the second SIGWINCH may
189-
// then have been delivered on the channel. This sequence of events
190-
// may have caused issue 15661.
191-
// So, read any possible signal from the channel now.
192-
select {
193-
case <-c1:
194-
default:
195-
}
196-
197184
// Send this process a SIGWINCH. It should be ignored.
198185
syscall.Kill(syscall.Getpid(), syscall.SIGWINCH)
199186

@@ -206,20 +193,24 @@ func testCancel(t *testing.T, ignore bool) {
206193

207194
select {
208195
case s := <-c1:
209-
t.Fatalf("unexpected signal %v", s)
196+
t.Errorf("unexpected signal %v", s)
210197
default:
211198
// nothing to read - good
212199
}
213200

214201
select {
215202
case s := <-c2:
216-
t.Fatalf("unexpected signal %v", s)
203+
t.Errorf("unexpected signal %v", s)
217204
default:
218205
// nothing to read - good
219206
}
220207

221-
// Reset the signal handlers for all signals.
222-
Reset()
208+
// One or both of the signals may have been blocked for this process
209+
// by the calling process.
210+
// Discard any queued signals now to avoid interfering with other tests.
211+
Notify(c1, syscall.SIGWINCH)
212+
Notify(c2, syscall.SIGHUP)
213+
quiesce()
223214
}
224215

225216
// Test that Reset cancels registration for listed signals on all channels.
@@ -313,61 +304,66 @@ func TestStop(t *testing.T) {
313304
// Test the three different signals concurrently.
314305
t.Parallel()
315306

316-
// Send the signal.
307+
// If the signal is not ignored, send the signal before registering a
308+
// channel to verify the behavior of the default Go handler.
317309
// If it's SIGWINCH or SIGUSR1 we should not see it.
318310
// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
319-
switch sig {
320-
case syscall.SIGHUP:
321-
if *sendUncaughtSighup == 1 {
322-
syscall.Kill(syscall.Getpid(), sig)
323-
for *dieFromSighup {
324-
quiesce()
325-
}
326-
}
327-
default:
311+
mayHaveBlockedSignal := false
312+
if !Ignored(sig) && (sig != syscall.SIGHUP || *sendUncaughtSighup == 1) {
328313
syscall.Kill(syscall.Getpid(), sig)
314+
quiesce()
315+
316+
// We don't know whether sig is blocked for this process; see
317+
// https://golang.org/issue/38165. Assume that it could be.
318+
mayHaveBlockedSignal = true
329319
}
330-
quiesce()
331320

332321
// Ask for signal
333322
c := make(chan os.Signal, 1)
334323
Notify(c, sig)
335324

336-
// Send this process that signal
325+
// Send this process the signal again.
337326
syscall.Kill(syscall.Getpid(), sig)
338327
waitSig(t, c, sig)
339328

329+
if mayHaveBlockedSignal {
330+
// We may have received a queued initial signal in addition to the one
331+
// that we sent after Notify. If so, waitSig may have observed that
332+
// initial signal instead of the second one, and we may need to wait for
333+
// the second signal to clear. Do that now.
334+
quiesce()
335+
select {
336+
case <-c:
337+
default:
338+
}
339+
}
340+
340341
// Stop watching for the signal and send it again.
341342
// If it's SIGHUP, maybe we'll die. Let the flag tell us what to do.
342343
Stop(c)
343-
switch sig {
344-
case syscall.SIGHUP:
345-
if *sendUncaughtSighup == 2 {
346-
syscall.Kill(syscall.Getpid(), sig)
347-
for *dieFromSighup {
348-
quiesce()
349-
}
350-
}
351-
default:
344+
if sig != syscall.SIGHUP || *sendUncaughtSighup == 2 {
352345
syscall.Kill(syscall.Getpid(), sig)
353-
}
346+
quiesce()
354347

355-
quiesce()
356-
select {
357-
case s := <-c:
358-
if sig == syscall.SIGUSR1 && s == syscall.SIGUSR1 && runtime.GOOS == "android" {
359-
testenv.SkipFlaky(t, 38165)
348+
select {
349+
case s := <-c:
350+
t.Errorf("unexpected signal %v", s)
351+
default:
352+
// nothing to read - good
360353
}
361-
t.Fatalf("unexpected signal %v", s)
362-
default:
363-
// nothing to read - good
354+
355+
// If we're going to receive a signal, it has almost certainly been
356+
// received by now. However, it may have been blocked for this process —
357+
// we don't know. Explicitly unblock it and wait for it to clear now.
358+
Notify(c, sig)
359+
quiesce()
360+
Stop(c)
364361
}
365362
})
366363
}
367364
}
368365

369-
// Test that when run under nohup, an uncaught SIGHUP does not kill the program,
370-
// but a
366+
// Test that when run under nohup, an uncaught SIGHUP does not kill the program.
371367
func TestNohup(t *testing.T) {
372368
// Ugly: ask for SIGHUP so that child will not have no-hup set
373369
// even if test is running under nohup environment.

0 commit comments

Comments
 (0)