Skip to content

Commit f29206e

Browse files
Merge pull request #15957 from danwinship/service-test-flake
Automatic merge from submit-queue (batch tested with PRs 15942, 15940, 15957, 15858, 15946) Make service e2e tests retry to avoid flakes This is an attempt to fix #14385; given that our tests tend to flake but the upstream service tests don't, it seems like we should make our tests more like theirs. So this replaces our `checkConnectivityToHost` code with code mostly copied from the upstream `execSourceipTest` (which, among other things, retries on failure until the timeout is reached). There are actually a lot of changes we could make to our tests to use new upstream code, but I wanted to keep this simple for now to avoid introducing new flakes. Fixes #14385 (hopefully)
2 parents 0518674 + 460db29 commit f29206e

File tree

1 file changed

+32
-27
lines changed

1 file changed

+32
-27
lines changed

test/extended/networking/util.go

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package networking
33
import (
44
"fmt"
55
"os"
6+
"strings"
67
"time"
78

89
testexutil "github.com/openshift/origin/test/extended/util"
@@ -163,32 +164,36 @@ func launchWebserverService(f *e2e.Framework, serviceName string, nodeName strin
163164
return
164165
}
165166

166-
func checkConnectivityToHost(f *e2e.Framework, nodeName string, podName string, host string, timeout int) error {
167-
contName := fmt.Sprintf("%s-container", podName)
168-
pod := &kapiv1.Pod{
169-
TypeMeta: metav1.TypeMeta{
170-
Kind: "Pod",
171-
},
172-
ObjectMeta: metav1.ObjectMeta{
173-
Name: podName,
174-
},
175-
Spec: kapiv1.PodSpec{
176-
Containers: []kapiv1.Container{
177-
{
178-
Name: contName,
179-
Image: "gcr.io/google_containers/busybox",
180-
Command: []string{"wget", fmt.Sprintf("--timeout=%d", timeout), "-s", host},
181-
},
182-
},
183-
NodeName: nodeName,
184-
RestartPolicy: kapiv1.RestartPolicyNever,
185-
},
167+
func checkConnectivityToHost(f *e2e.Framework, nodeName string, podName string, host string, timeout time.Duration) error {
168+
e2e.Logf("Creating an exec pod on node %v", nodeName)
169+
execPodName := e2e.CreateExecPodOrFail(f.ClientSet, f.Namespace.Name, fmt.Sprintf("execpod-sourceip-%s", nodeName), func(pod *kapiv1.Pod) {
170+
pod.Spec.NodeName = nodeName
171+
})
172+
defer func() {
173+
e2e.Logf("Cleaning up the exec pod")
174+
err := f.ClientSet.Core().Pods(f.Namespace.Name).Delete(execPodName, nil)
175+
Expect(err).NotTo(HaveOccurred())
176+
}()
177+
execPod, err := f.ClientSet.Core().Pods(f.Namespace.Name).Get(execPodName, metav1.GetOptions{})
178+
e2e.ExpectNoError(err)
179+
180+
var stdout string
181+
e2e.Logf("Waiting up to %v to wget %s", timeout, host)
182+
cmd := fmt.Sprintf("wget -T 30 -qO- %s", host)
183+
for start := time.Now(); time.Since(start) < timeout; time.Sleep(2) {
184+
stdout, err = e2e.RunHostCmd(execPod.Namespace, execPod.Name, cmd)
185+
if err != nil {
186+
e2e.Logf("got err: %v, retry until timeout", err)
187+
continue
188+
}
189+
// Need to check output because wget -q might omit the error.
190+
if strings.TrimSpace(stdout) == "" {
191+
e2e.Logf("got empty stdout, retry until timeout")
192+
continue
193+
}
194+
break
186195
}
187-
podClient := f.ClientSet.CoreV1().Pods(f.Namespace.Name)
188-
_, err := podClient.Create(pod)
189-
expectNoError(err)
190-
defer podClient.Delete(podName, nil)
191-
return waitForPodSuccessInNamespace(f.ClientSet, podName, contName, f.Namespace.Name)
196+
return err
192197
}
193198

194199
func pluginIsolatesNamespaces() bool {
@@ -226,7 +231,7 @@ func checkPodIsolation(f1, f2 *e2e.Framework, nodeType NodeType) error {
226231
defer f1.ClientSet.CoreV1().Pods(f1.Namespace.Name).Delete(podName, nil)
227232
ip := e2e.LaunchWebserverPod(f1, podName, serverNode.Name)
228233

229-
return checkConnectivityToHost(f2, clientNode.Name, "isolation-wget", ip, 10)
234+
return checkConnectivityToHost(f2, clientNode.Name, "isolation-wget", ip, 10*time.Second)
230235
}
231236

232237
func checkServiceConnectivity(serverFramework, clientFramework *e2e.Framework, nodeType NodeType) error {
@@ -247,7 +252,7 @@ func checkServiceConnectivity(serverFramework, clientFramework *e2e.Framework, n
247252
defer serverFramework.ClientSet.CoreV1().Services(serverFramework.Namespace.Name).Delete(podName, nil)
248253
ip := launchWebserverService(serverFramework, podName, serverNode.Name)
249254

250-
return checkConnectivityToHost(clientFramework, clientNode.Name, "service-wget", ip, 10)
255+
return checkConnectivityToHost(clientFramework, clientNode.Name, "service-wget", ip, 10*time.Second)
251256
}
252257

253258
func InSingleTenantContext(body func()) {

0 commit comments

Comments
 (0)