Skip to content

Commit 7a7770e

Browse files
committed
deploy: remove deployer pods on cancellation
1 parent df69a8b commit 7a7770e

File tree

2 files changed

+67
-84
lines changed

2 files changed

+67
-84
lines changed

pkg/deploy/controller/deployment/controller.go

Lines changed: 50 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -61,17 +61,19 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
6161
switch currentStatus {
6262
case deployapi.DeploymentStatusNew:
6363
// If the deployment has been cancelled, don't create a deployer pod.
64-
// Transition the deployment to Pending so that re-syncs will check
65-
// up on the deployer pods and so that the deployment config controller
66-
// continues to see the deployment as in-flight (which it is until we
67-
// have deployer pod outcomes).
64+
// Instead try to delete any deployer pods found and transition the
65+
// deployment to Pending so that the deployment config controller
66+
// continues to see the deployment as in-flight. Eventually the deletion
67+
// of the deployer pod should cause a requeue of this deployment and
68+
// then it can be transitioned to Failed by this controller.
6869
if deployutil.IsDeploymentCancelled(deployment) {
6970
nextStatus = deployapi.DeploymentStatusPending
70-
if err := c.cancelDeployerPods(deployment); err != nil {
71+
if err := c.cleanupDeployerPods(deployment); err != nil {
7172
return err
7273
}
7374
break
7475
}
76+
7577
// If the pod already exists, it's possible that a previous CreatePod
7678
// succeeded but the deployment state update failed and now we're re-
7779
// entering. Ensure that the pod is the one we created by verifying the
@@ -126,29 +128,34 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
126128
case deployapi.DeploymentStatusPending, deployapi.DeploymentStatusRunning:
127129
// If the deployer pod has vanished, consider the deployment a failure.
128130
deployerPodName := deployutil.DeployerPodNameForDeployment(deployment.Name)
129-
if _, err := c.podClient.getPod(deployment.Namespace, deployerPodName); err != nil {
130-
if kerrors.IsNotFound(err) {
131-
nextStatus = deployapi.DeploymentStatusFailed
131+
_, err := c.podClient.getPod(deployment.Namespace, deployerPodName)
132+
switch {
133+
case kerrors.IsNotFound(err):
134+
nextStatus = deployapi.DeploymentStatusFailed
135+
// If the deployment is cancelled here then we deleted the deployer in a previous
136+
// resync of the deployment.
137+
if !deployutil.IsDeploymentCancelled(deployment) {
132138
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(nextStatus)
133139
deployment.Annotations[deployapi.DeploymentStatusReasonAnnotation] = deployapi.DeploymentFailedDeployerPodNoLongerExists
134140
c.emitDeploymentEvent(deployment, kapi.EventTypeWarning, "Failed", fmt.Sprintf("Deployer pod %q has gone missing", deployerPodName))
135141
glog.V(4).Infof("Failing deployment %q because its deployer pod %q disappeared", deployutil.LabelForDeployment(deployment), deployerPodName)
136-
break
137-
} else {
138-
// We'll try again later on resync. Continue to process cancellations.
139-
glog.V(2).Infof("Error getting deployer pod %s for deployment %s: %#v", deployerPodName, deployutil.LabelForDeployment(deployment), err)
140142
}
141-
}
142143

143-
// If the deployment is cancelled, terminate any deployer/hook pods.
144-
// NOTE: Do not mark the deployment as Failed just yet.
145-
// The deployment will be marked as Failed by the deployer pod controller
146-
// when the deployer pod failure state is picked up.
147-
// Then, the deployment config controller will scale down the failed deployment
148-
// and scale back up the last successful completed deployment.
149-
if deployutil.IsDeploymentCancelled(deployment) {
150-
if err := c.cancelDeployerPods(deployment); err != nil {
151-
return err
144+
case err != nil:
145+
// We'll try again later on resync. Continue to process cancellations.
146+
glog.V(4).Infof("Error getting deployer pod %s for deployment %s: %#v", deployerPodName, deployutil.LabelForDeployment(deployment), err)
147+
148+
default: /* err == nil */
149+
// If the deployment has been cancelled, delete any deployer pods
150+
// found and transition the deployment to Pending so that the
151+
// deployment config controller continues to see the deployment
152+
// as in-flight. Eventually the deletion of the deployer pod should
153+
// cause a requeue of this deployment and then it can be transitioned
154+
// to Failed by this controller.
155+
if deployutil.IsDeploymentCancelled(deployment) {
156+
if err := c.cleanupDeployerPods(deployment); err != nil {
157+
return err
158+
}
152159
}
153160
}
154161
case deployapi.DeploymentStatusFailed:
@@ -157,38 +164,22 @@ func (c *DeploymentController) Handle(deployment *kapi.ReplicationController) er
157164
deploymentScaled = deployment.Spec.Replicas != 0
158165
deployment.Spec.Replicas = 0
159166
}
167+
// Try to cleanup once more a cancelled deployment in case hook pods
168+
// were created just after we issued the first cleanup request.
169+
if deployutil.IsDeploymentCancelled(deployment) {
170+
if err := c.cleanupDeployerPods(deployment); err != nil {
171+
return err
172+
}
173+
}
160174
case deployapi.DeploymentStatusComplete:
161175
// Check for test deployment and ensure the deployment scale matches
162176
if config, err := c.decodeConfig(deployment); err == nil && config.Spec.Test {
163177
deploymentScaled = deployment.Spec.Replicas != 0
164178
deployment.Spec.Replicas = 0
165179
}
166180

167-
// now list any pods in the namespace that have the specified label
168-
deployerPods, err := c.podClient.getDeployerPodsFor(deployment.Namespace, deployment.Name)
169-
if err != nil {
170-
return fmt.Errorf("couldn't fetch deployer pods for %s after successful completion: %v", deployutil.LabelForDeployment(deployment), err)
171-
}
172-
if len(deployerPods) > 0 {
173-
glog.V(4).Infof("Deleting %d deployer pods for deployment %s", len(deployerPods), deployutil.LabelForDeployment(deployment))
174-
}
175-
cleanedAll := true
176-
for _, deployerPod := range deployerPods {
177-
if err := c.podClient.deletePod(deployerPod.Namespace, deployerPod.Name); err != nil {
178-
if !kerrors.IsNotFound(err) {
179-
// if the pod deletion failed, then log the error and continue
180-
// we will try to delete any remaining deployer pods and return an error later
181-
utilruntime.HandleError(fmt.Errorf("couldn't delete completed deployer pod %s/%s for deployment %s: %v", deployment.Namespace, deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
182-
cleanedAll = false
183-
}
184-
// Already deleted
185-
} else {
186-
glog.V(4).Infof("Deleted completed deployer pod %s/%s for deployment %s", deployment.Namespace, deployerPod.Name, deployutil.LabelForDeployment(deployment))
187-
}
188-
}
189-
190-
if !cleanedAll {
191-
return actionableError(fmt.Sprintf("couldn't clean up all deployer pods for %s", deployment.Name))
181+
if err := c.cleanupDeployerPods(deployment); err != nil {
182+
return err
192183
}
193184
}
194185

@@ -264,27 +255,24 @@ func (c *DeploymentController) makeDeployerPod(deployment *kapi.ReplicationContr
264255
return pod, nil
265256
}
266257

267-
func (c *DeploymentController) cancelDeployerPods(deployment *kapi.ReplicationController) error {
258+
func (c *DeploymentController) cleanupDeployerPods(deployment *kapi.ReplicationController) error {
268259
deployerPods, err := c.podClient.getDeployerPodsFor(deployment.Namespace, deployment.Name)
269260
if err != nil {
270-
return fmt.Errorf("couldn't fetch deployer pods for %s while trying to cancel deployment: %v", deployutil.LabelForDeployment(deployment), err)
261+
return fmt.Errorf("couldn't fetch deployer pods for %q: %v", deployutil.LabelForDeployment(deployment), err)
271262
}
272-
glog.V(4).Infof("Cancelling %d deployer pods for deployment %s", len(deployerPods), deployutil.LabelForDeployment(deployment))
273-
zeroDelay := int64(1)
274-
cleanedAll := len(deployerPods) > 0
263+
264+
cleanedAll := true
275265
for _, deployerPod := range deployerPods {
276-
// Set the ActiveDeadlineSeconds on the pod so it's terminated very soon.
277-
if deployerPod.Spec.ActiveDeadlineSeconds == nil || *deployerPod.Spec.ActiveDeadlineSeconds != zeroDelay {
278-
deployerPod.Spec.ActiveDeadlineSeconds = &zeroDelay
279-
if _, err := c.podClient.updatePod(deployerPod.Namespace, &deployerPod); err != nil {
280-
cleanedAll = false
281-
utilruntime.HandleError(fmt.Errorf("couldn't cancel deployer pod %s for deployment %s: %v", deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
282-
}
283-
glog.V(4).Infof("Cancelled deployer pod %s for deployment %s", deployerPod.Name, deployutil.LabelForDeployment(deployment))
266+
if err := c.podClient.deletePod(deployerPod.Namespace, deployerPod.Name); err != nil && !kerrors.IsNotFound(err) {
267+
// if the pod deletion failed, then log the error and continue
268+
// we will try to delete any remaining deployer pods and return an error later
269+
utilruntime.HandleError(fmt.Errorf("couldn't delete completed deployer pod %q for deployment %q: %v", deployerPod.Name, deployutil.LabelForDeployment(deployment), err))
270+
cleanedAll = false
284271
}
285272
}
286-
if cleanedAll {
287-
c.emitDeploymentEvent(deployment, kapi.EventTypeNormal, "Cancelled", "Cancelled all deployer pods")
273+
274+
if !cleanedAll {
275+
return actionableError(fmt.Sprintf("couldn't clean up all deployer pods for %s", deployment.Name))
288276
}
289277
return nil
290278
}

pkg/deploy/controller/deployment/controller_test.go

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,9 @@ func TestHandle_cancelNew(t *testing.T) {
673673
}
674674
}
675675

676-
func TestHandle_cancelNewWithDeployers(t *testing.T) {
676+
func TestHandle_cleanupNewWithDeployers(t *testing.T) {
677677
var updatedDeployment *kapi.ReplicationController
678-
updatedDeployer := false
678+
deletedDeployer := false
679679

680680
deployment, _ := deployutil.MakeDeployment(deploytest.OkDeploymentConfig(1), kapi.Codecs.LegacyCodec(deployapi.SchemeGroupVersion))
681681
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(deployapi.DeploymentStatusNew)
@@ -696,13 +696,13 @@ func TestHandle_cancelNewWithDeployers(t *testing.T) {
696696
t.Fatalf("unexpected call to make container")
697697
return nil, nil
698698
},
699-
updatePodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
700-
updatedDeployer = true
701-
return nil, nil
702-
},
703699
getDeployerPodsForFunc: func(namespace, name string) ([]kapi.Pod, error) {
704700
return []kapi.Pod{*relatedPod(deployment)}, nil
705701
},
702+
deletePodFunc: func(namespace, name string) error {
703+
deletedDeployer = true
704+
return nil
705+
},
706706
},
707707
makeContainer: func(strategy *deployapi.DeploymentStrategy) *kapi.Container {
708708
return okContainer()
@@ -718,16 +718,16 @@ func TestHandle_cancelNewWithDeployers(t *testing.T) {
718718
if e, a := deployapi.DeploymentStatusPending, deployutil.DeploymentStatusFor(updatedDeployment); e != a {
719719
t.Fatalf("expected deployment status %s, got %s", e, a)
720720
}
721-
if !updatedDeployer {
722-
t.Fatalf("expected deployer update")
721+
if !deletedDeployer {
722+
t.Fatalf("expected deployer delete")
723723
}
724724
}
725725

726-
// TestHandle_cancelPendingRunning ensures that deployer pods are terminated
726+
// TestHandle_cleanupPendingRunning ensures that deployer pods are deleted
727727
// for deployments in post-New phases.
728-
func TestHandle_cancelPendingRunning(t *testing.T) {
728+
func TestHandle_cleanupPendingRunning(t *testing.T) {
729729
deployerPodCount := 3
730-
updatedPods := []kapi.Pod{}
730+
deletedPods := 0
731731

732732
controller := &DeploymentController{
733733
decodeConfig: func(deployment *kapi.ReplicationController) (*deployapi.DeploymentConfig, error) {
@@ -744,9 +744,9 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
744744
getPodFunc: func(namespace, name string) (*kapi.Pod, error) {
745745
return ttlNonZeroPod(), nil
746746
},
747-
updatePodFunc: func(namespace string, pod *kapi.Pod) (*kapi.Pod, error) {
748-
updatedPods = append(updatedPods, *pod)
749-
return pod, nil
747+
deletePodFunc: func(namespace, name string) error {
748+
deletedPods++
749+
return nil
750750
},
751751
getDeployerPodsForFunc: func(namespace, name string) ([]kapi.Pod, error) {
752752
pods := []kapi.Pod{}
@@ -768,7 +768,7 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
768768
}
769769

770770
for _, status := range cases {
771-
updatedPods = []kapi.Pod{}
771+
deletedPods = 0
772772
deployment, _ := deployutil.MakeDeployment(deploytest.OkDeploymentConfig(1), kapi.Codecs.LegacyCodec(deployapi.SchemeGroupVersion))
773773
deployment.Annotations[deployapi.DeploymentStatusAnnotation] = string(status)
774774
deployment.Annotations[deployapi.DeploymentCancelledAnnotation] = deployapi.DeploymentCancelledAnnotationValue
@@ -777,13 +777,8 @@ func TestHandle_cancelPendingRunning(t *testing.T) {
777777
t.Fatalf("unexpected error: %v", err)
778778
}
779779

780-
if e, a := len(updatedPods), deployerPodCount; e != a {
781-
t.Fatalf("expected %d updated pods, got %d", e, a)
782-
}
783-
for _, pod := range updatedPods {
784-
if e, a := int64(1), *pod.Spec.ActiveDeadlineSeconds; e != a {
785-
t.Errorf("expected ActiveDeadlineSeconds %d, got %d", e, a)
786-
}
780+
if e, a := deletedPods, deployerPodCount; e != a {
781+
t.Fatalf("expected %d deleted pods, got %d", e, a)
787782
}
788783
}
789784
}

0 commit comments

Comments
 (0)