Skip to content

Commit fd6dee8

Browse files
committed
sdn: kill containers that fail to update on node restart
With the move to remote runtimes, we can no longer get the pod's network namespace from kubelet (since we cannot insert ourselves into the remote runtime's plugin list and intercept network plugin calls). As kubelet does not call network plugins in any way on startup if a container is already running, we have no way to ensure the container is using the correct NetNamespace (as it may have changed while openshift-node was down) at startup, unless we encode the required information into OVS flows. But if OVS was restarted around the same time OpenShift was, those flows are lost, and we have no information with which to recover the pod's networking on node startup. In this case, kill the infra container underneath kubelet so that it will be restarted and we can set its network up again. NOTE: this is somewhat hacky and will not work with other remote runtimes like CRI-O, but OpenShift 3.6 hardcodes dockershim so this isn't a problem yet. The "correct" solution is to either checkpoint our network configuration at container setup time and recover that ourselves, or to add a GET/STATUS call to CNI and make Kubelet call that operation on startup when recovering running containers. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1453113
1 parent 234eaab commit fd6dee8

File tree

3 files changed

+219
-18
lines changed

3 files changed

+219
-18
lines changed

pkg/sdn/plugin/node.go

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ import (
2020
"github.com/openshift/origin/pkg/util/netutils"
2121
"github.com/openshift/origin/pkg/util/ovs"
2222

23-
docker "github.com/fsouza/go-dockerclient"
24-
2523
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2624
"k8s.io/apimachinery/pkg/fields"
2725
"k8s.io/apimachinery/pkg/labels"
@@ -32,6 +30,7 @@ import (
3230
"k8s.io/kubernetes/pkg/apis/componentconfig"
3331
kclientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
3432
kinternalinformers "k8s.io/kubernetes/pkg/client/informers/informers_generated/internalversion"
33+
"k8s.io/kubernetes/pkg/kubelet/dockertools"
3534
knetwork "k8s.io/kubernetes/pkg/kubelet/network"
3635
kexec "k8s.io/kubernetes/pkg/util/exec"
3736
)
@@ -195,31 +194,41 @@ func (node *OsdnNode) dockerPreCNICleanup() error {
195194
itx.EndTransaction()
196195
}
197196

198-
// Wait until docker has restarted since kubelet will exit it docker isn't running
199-
dockerClient, err := docker.NewClientFromEnv()
200-
if err != nil {
201-
return fmt.Errorf("failed to get docker client: %v", err)
197+
// Wait until docker has restarted since kubelet will exit if docker isn't running
198+
if _, err := ensureDockerClient(); err != nil {
199+
return err
200+
}
201+
202+
log.Infof("Cleaned up left-over openshift-sdn docker bridge and interfaces")
203+
204+
return nil
205+
}
206+
207+
func ensureDockerClient() (dockertools.DockerInterface, error) {
208+
endpoint := os.Getenv("DOCKER_HOST")
209+
if endpoint == "" {
210+
endpoint = "unix:///var/run/docker.sock"
202211
}
203-
err = kwait.ExponentialBackoff(
212+
dockerClient := dockertools.ConnectToDockerOrDie(endpoint, time.Minute, time.Minute)
213+
214+
// Wait until docker has restarted since kubelet will exit it docker isn't running
215+
err := kwait.ExponentialBackoff(
204216
kwait.Backoff{
205217
Duration: 100 * time.Millisecond,
206218
Factor: 1.2,
207219
Steps: 6,
208220
},
209221
func() (bool, error) {
210-
if err := dockerClient.Ping(); err != nil {
222+
if _, err := dockerClient.Version(); err != nil {
211223
// wait longer
212224
return false, nil
213225
}
214226
return true, nil
215227
})
216228
if err != nil {
217-
return fmt.Errorf("failed to connect to docker after SDN cleanup restart: %v", err)
229+
return nil, fmt.Errorf("failed to connect to docker: %v", err)
218230
}
219-
220-
log.Infof("Cleaned up left-over openshift-sdn docker bridge and interfaces")
221-
222-
return nil
231+
return dockerClient, nil
223232
}
224233

225234
func (node *OsdnNode) Start() error {
@@ -271,21 +280,35 @@ func (node *OsdnNode) Start() error {
271280
}
272281

273282
if networkChanged {
274-
var pods []kapi.Pod
283+
var pods, podsToKill []kapi.Pod
284+
275285
pods, err = node.GetLocalPods(metav1.NamespaceAll)
276286
if err != nil {
277287
return err
278288
}
279289
for _, p := range pods {
280-
err = node.UpdatePod(p)
281-
if err != nil {
282-
log.Warningf("Could not update pod %q: %s", p.Name, err)
290+
// Ignore HostNetwork pods since they don't go through OVS
291+
if p.Spec.SecurityContext != nil && p.Spec.SecurityContext.HostNetwork {
283292
continue
284293
}
285-
if vnid, err := node.policy.GetVNID(p.Namespace); err == nil {
294+
if err := node.UpdatePod(p); err != nil {
295+
log.Warningf("will restart pod '%s/%s' due to update failure on restart: %s", p.Namespace, p.Name, err)
296+
podsToKill = append(podsToKill, p)
297+
} else if vnid, err := node.policy.GetVNID(p.Namespace); err == nil {
286298
node.policy.EnsureVNIDRules(vnid)
287299
}
288300
}
301+
302+
// Kill pods we couldn't recover; they will get restarted and then
303+
// we'll be able to set them up correctly
304+
if len(podsToKill) > 0 {
305+
docker, err := ensureDockerClient()
306+
if err != nil {
307+
log.Warningf("failed to get docker client: %v", err)
308+
} else if err := killUpdateFailedPods(docker, podsToKill); err != nil {
309+
log.Warningf("failed to restart pods that failed to update at startup: %v", err)
310+
}
311+
}
289312
}
290313

291314
go kwait.Forever(node.policy.SyncVNIDRules, time.Hour)

pkg/sdn/plugin/update.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package plugin
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/golang/glog"
8+
9+
dockertypes "github.com/docker/engine-api/types"
10+
11+
kapi "k8s.io/kubernetes/pkg/api"
12+
kcontainer "k8s.io/kubernetes/pkg/kubelet/container"
13+
"k8s.io/kubernetes/pkg/kubelet/dockertools"
14+
"k8s.io/kubernetes/pkg/kubelet/leaky"
15+
)
16+
17+
func formatPod(pod *kapi.Pod) string {
18+
return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
19+
}
20+
21+
// Copied from pkg/kubelet/dockershim/naming.go::parseSandboxName()
22+
func dockerSandboxNameToInfraPodNamePrefix(name string) (string, error) {
23+
// Docker adds a "/" prefix to names. so trim it.
24+
name = strings.TrimPrefix(name, "/")
25+
26+
parts := strings.Split(name, "_")
27+
// Tolerate the random suffix.
28+
// TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated.
29+
if len(parts) != 6 && len(parts) != 7 {
30+
return "", fmt.Errorf("failed to parse the sandbox name: %q", name)
31+
}
32+
if parts[0] != "k8s" {
33+
return "", fmt.Errorf("container is not managed by kubernetes: %q", name)
34+
}
35+
36+
// Return /k8s_POD_name_namespace_uid
37+
return fmt.Sprintf("/k8s_%s_%s_%s_%s", leaky.PodInfraContainerName, parts[2], parts[3], parts[4]), nil
38+
}
39+
40+
func killInfraContainerForPod(docker dockertools.DockerInterface, containers []dockertypes.Container, cid kcontainer.ContainerID) error {
41+
// FIXME: handle CRI-O; but unfortunately CRI-O supports multiple
42+
// "runtimes" which depend on the filename of that runtime binary,
43+
// so we have no idea what cid.Type will be.
44+
if cid.Type != "docker" {
45+
return fmt.Errorf("unhandled runtime %q", cid.Type)
46+
}
47+
48+
var err error
49+
var infraPrefix string
50+
for _, c := range containers {
51+
if c.ID == cid.ID {
52+
infraPrefix, err = dockerSandboxNameToInfraPodNamePrefix(c.Names[0])
53+
if err != nil {
54+
return err
55+
}
56+
break
57+
}
58+
}
59+
if infraPrefix == "" {
60+
return fmt.Errorf("failed to generate infra container prefix from %q", cid.ID)
61+
}
62+
// Find and kill the infra container
63+
for _, c := range containers {
64+
if strings.HasPrefix(c.Names[0], infraPrefix) {
65+
if err := docker.StopContainer(c.ID, 10); err != nil {
66+
glog.Warningf("failed to stop infra container %q", c.ID)
67+
}
68+
}
69+
}
70+
71+
return nil
72+
}
73+
74+
// This function finds the ContainerID of a failed pod, parses it, and kills
75+
// any matching Infra container for that pod.
76+
func killUpdateFailedPods(docker dockertools.DockerInterface, pods []kapi.Pod) error {
77+
containers, err := docker.ListContainers(dockertypes.ContainerListOptions{All: true})
78+
if err != nil {
79+
return fmt.Errorf("failed to list docker containers: %v", err)
80+
}
81+
82+
for _, pod := range pods {
83+
// Find the first ready container in the pod and use it to find the infra container
84+
var cid kcontainer.ContainerID
85+
for i := range pod.Status.ContainerStatuses {
86+
if pod.Status.ContainerStatuses[i].State.Running != nil && pod.Status.ContainerStatuses[i].ContainerID != "" {
87+
cid = kcontainer.ParseContainerID(pod.Status.ContainerStatuses[i].ContainerID)
88+
break
89+
}
90+
}
91+
if cid.IsEmpty() {
92+
continue
93+
}
94+
glog.V(5).Infof("Killing pod %q sandbox on restart", formatPod(&pod))
95+
if err := killInfraContainerForPod(docker, containers, cid); err != nil {
96+
glog.Warningf("Failed to kill pod %q sandbox: %v", formatPod(&pod), err)
97+
continue
98+
}
99+
}
100+
return nil
101+
}

pkg/sdn/plugin/update_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
package plugin
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
8+
kapi "k8s.io/kubernetes/pkg/api"
9+
"k8s.io/kubernetes/pkg/kubelet/dockertools"
10+
)
11+
12+
func TestPodKillOnFailedUpdate(t *testing.T) {
13+
fakeDocker := dockertools.NewFakeDockerClient()
14+
id := "509383712c59ee328a78ae99d0f9411aa99f0bdf1ecf304aa83afb58f16f0768"
15+
name := "/k8s_nginx1_nginx1_default_379e14d9-562e-11e7-b251-0242ac110003_0"
16+
infraId := "0e7ff50ca5399654fe3b93a21dae1d264560bc018d5f0b13e79601c1a7948d6e"
17+
randomId := "71167588cc97636d2f269081579fb9668b4e42acdfdd1e1cea220f6de86a8b50"
18+
fakeDocker.SetFakeRunningContainers([]*dockertools.FakeContainer{
19+
{
20+
ID: id,
21+
Name: name,
22+
},
23+
{
24+
// Infra container for the above container
25+
ID: infraId,
26+
Name: "/k8s_POD_nginx1_default_379e14d9-562e-11e7-b251-0242ac110003_1",
27+
},
28+
{
29+
// Random container unrelated to first two
30+
ID: randomId,
31+
Name: "/k8s_POD_blah_default_fef9db05-f5c2-4361-9244-2eb505bc61e7_1",
32+
},
33+
})
34+
35+
pods := []kapi.Pod{
36+
{
37+
ObjectMeta: metav1.ObjectMeta{
38+
Name: "testpod1",
39+
Namespace: "namespace1",
40+
},
41+
Status: kapi.PodStatus{
42+
ContainerStatuses: []kapi.ContainerStatus{
43+
{
44+
Name: "container1",
45+
ContainerID: fmt.Sprintf("docker://%s", id),
46+
State: kapi.ContainerState{
47+
Running: &kapi.ContainerStateRunning{},
48+
},
49+
},
50+
},
51+
},
52+
},
53+
}
54+
55+
err := killUpdateFailedPods(fakeDocker, pods)
56+
if err != nil {
57+
t.Fatalf("Unexpected error killing update failed pods: %v", err)
58+
}
59+
60+
// Infra container should be stopped
61+
result, err := fakeDocker.InspectContainer(infraId)
62+
if err != nil {
63+
t.Fatalf("Unexpected error inspecting container: %v", err)
64+
}
65+
if result.State.Running != false {
66+
t.Fatalf("Infra container was not stopped")
67+
}
68+
69+
// Unrelated container should still be running
70+
result, err = fakeDocker.InspectContainer(randomId)
71+
if err != nil {
72+
t.Fatalf("Unexpected error inspecting container: %v", err)
73+
}
74+
if result.State.Running != true {
75+
t.Fatalf("Unrelated container was stopped")
76+
}
77+
}

0 commit comments

Comments
 (0)