Skip to content

Commit 5edc6a9

Browse files
Merge pull request #14954 from tnozicka/fix-minreadyseconds-for-dc-final
Automatic merge from submit-queue Fix minReadySeconds for DC Follow up to: #14936 (needs to be merged first) Make AvailableReplicas work with MinReadySeconds set. Removes obsolete counting of pods which makes it overlap with AvailableReplicas from RC. This was causing RC to be in a state where AvailableReplicas=0 and deployment-phase=Complete with about 50% chance. This state lasts for a very short time. [Outdated] At this time ignore the first 2 commits which are part of #14936 (because that isn't merged yet)
2 parents 2cbd190 + 50c9a66 commit 5edc6a9

File tree

6 files changed

+40
-255
lines changed

6 files changed

+40
-255
lines changed

pkg/deploy/strategy/recreate/recreate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func NewRecreateDeploymentStrategy(client kclientset.Interface, tagClient client
9191
eventClient: client.Core(),
9292
podClient: client.Core(),
9393
getUpdateAcceptor: func(timeout time.Duration, minReadySeconds int32) strat.UpdateAcceptor {
94-
return stratsupport.NewAcceptAvailablePods(out, client.Core(), timeout, acceptorInterval, minReadySeconds)
94+
return stratsupport.NewAcceptAvailablePods(out, client.Core(), timeout)
9595
},
9696
scaler: scaler,
9797
decoder: decoder,

pkg/deploy/strategy/rolling/rolling.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func NewRollingDeploymentStrategy(namespace string, client kclientset.Interface,
114114
},
115115
hookExecutor: stratsupport.NewHookExecutor(client.Core(), tags, client.Core(), os.Stdout, decoder),
116116
getUpdateAcceptor: func(timeout time.Duration, minReadySeconds int32) strat.UpdateAcceptor {
117-
return stratsupport.NewAcceptAvailablePods(out, client.Core(), timeout, acceptorInterval, minReadySeconds)
117+
return stratsupport.NewAcceptAvailablePods(out, client.Core(), timeout)
118118
},
119119
}
120120
}

pkg/deploy/strategy/support/lifecycle.go

Lines changed: 26 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,16 @@ import (
77
"sync"
88
"time"
99

10-
"github.com/golang/glog"
11-
1210
kerrors "k8s.io/apimachinery/pkg/api/errors"
1311
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1412
"k8s.io/apimachinery/pkg/fields"
15-
"k8s.io/apimachinery/pkg/labels"
1613
"k8s.io/apimachinery/pkg/runtime"
1714
utilerrors "k8s.io/apimachinery/pkg/util/errors"
1815
"k8s.io/apimachinery/pkg/util/sets"
1916
"k8s.io/apimachinery/pkg/util/wait"
2017
"k8s.io/apimachinery/pkg/watch"
2118
"k8s.io/client-go/tools/cache"
2219
kapi "k8s.io/kubernetes/pkg/api"
23-
kapipod "k8s.io/kubernetes/pkg/api/pod"
2420
kcoreclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/internalversion"
2521

2622
"github.com/openshift/origin/pkg/client"
@@ -482,117 +478,53 @@ func newPodWatch(client kcoreclient.PodInterface, namespace, name, resourceVersi
482478
// NewAcceptAvailablePods makes a new acceptAvailablePods from a real client.
483479
func NewAcceptAvailablePods(
484480
out io.Writer,
485-
kclient kcoreclient.PodsGetter,
481+
kclient kcoreclient.ReplicationControllersGetter,
486482
timeout time.Duration,
487-
interval time.Duration,
488-
minReadySeconds int32,
489483
) *acceptAvailablePods {
490-
491484
return &acceptAvailablePods{
492-
out: out,
493-
timeout: timeout,
494-
interval: interval,
495-
minReadySeconds: minReadySeconds,
496-
acceptedPods: sets.NewString(),
497-
getRcPodStore: func(rc *kapi.ReplicationController) (cache.Store, chan struct{}) {
498-
selector := labels.Set(rc.Spec.Selector).AsSelector()
499-
store := cache.NewStore(cache.MetaNamespaceKeyFunc)
500-
lw := &cache.ListWatch{
501-
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
502-
options.LabelSelector = selector.String()
503-
return kclient.Pods(rc.Namespace).List(options)
504-
},
505-
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
506-
options.LabelSelector = selector.String()
507-
return kclient.Pods(rc.Namespace).Watch(options)
508-
},
509-
}
510-
stop := make(chan struct{})
511-
cache.NewReflector(lw, &kapi.Pod{}, store, 10*time.Second).RunUntil(stop)
512-
return store, stop
513-
},
485+
out: out,
486+
kclient: kclient,
487+
timeout: timeout,
514488
}
515489
}
516490

517491
// acceptAvailablePods will accept a replication controller if all the pods
518492
// for the replication controller become available.
519-
//
520-
// acceptAvailablePods keeps track of the pods it has accepted for a
521-
// replication controller so that the acceptor can be reused across multiple
522-
// batches of updates to a single controller. For example, if during the first
523-
// acceptance call the replication controller has 3 pods, the acceptor will
524-
// validate those 3 pods. If the same acceptor instance is used again for the
525-
// same replication controller which now has 6 pods, only the latest 3 pods
526-
// will be considered for acceptance. The status of the original 3 pods becomes
527-
// irrelevant.
528-
//
529-
// Note that this struct is stateful and intended for use with a single
530-
// replication controller and should be discarded and recreated between
531-
// rollouts.
532493
type acceptAvailablePods struct {
533-
out io.Writer
534-
// getRcPodStore should return a Store containing all the pods for the
535-
// replication controller, and a channel to stop whatever process is
536-
// feeding the store.
537-
getRcPodStore func(*kapi.ReplicationController) (cache.Store, chan struct{})
538-
// timeout is how long to wait for pod readiness.
494+
out io.Writer
495+
kclient kcoreclient.ReplicationControllersGetter
496+
// timeout is how long to wait for pods to become available from ready state.
539497
timeout time.Duration
540-
// interval is how often to check for pod readiness
541-
interval time.Duration
542-
// minReadySeconds is the minimum number of seconds for which a newly created
543-
// pod should be ready without any of its container crashing, for it to be
544-
// considered available.
545-
minReadySeconds int32
546-
// acceptedPods keeps track of pods which have been previously accepted for
547-
// a replication controller.
548-
acceptedPods sets.String
549498
}
550499

551500
// Accept all pods for a replication controller once they are available.
552501
func (c *acceptAvailablePods) Accept(rc *kapi.ReplicationController) error {
553-
// Make a pod store to poll and ensure it gets cleaned up.
554-
podStore, stopStore := c.getRcPodStore(rc)
555-
defer close(stopStore)
502+
allReplicasAvailable := func(r *kapi.ReplicationController) bool {
503+
return r.Status.AvailableReplicas == r.Spec.Replicas
504+
}
556505

557-
// Start checking for pod updates.
558-
if c.acceptedPods.Len() > 0 {
559-
fmt.Fprintf(c.out, "--> Waiting up to %s for pods in rc %s to become ready (%d pods previously accepted)\n", c.timeout, rc.Name, c.acceptedPods.Len())
560-
} else {
561-
fmt.Fprintf(c.out, "--> Waiting up to %s for pods in rc %s to become ready\n", c.timeout, rc.Name)
562-
}
563-
err := wait.Poll(c.interval, c.timeout, func() (done bool, err error) {
564-
// Check for pod readiness.
565-
unready := sets.NewString()
566-
for _, obj := range podStore.List() {
567-
pod := obj.(*kapi.Pod)
568-
// Skip previously accepted pods; we only want to verify newly observed
569-
// and unaccepted pods.
570-
if c.acceptedPods.Has(pod.Name) {
571-
continue
572-
}
573-
if kapipod.IsPodAvailable(pod, c.minReadySeconds, metav1.NewTime(time.Now())) {
574-
// If the pod is ready, track it as accepted.
575-
c.acceptedPods.Insert(pod.Name)
576-
} else {
577-
// Otherwise, track it as unready.
578-
unready.Insert(pod.Name)
579-
}
580-
}
581-
// Check to see if we're done.
582-
if unready.Len() == 0 {
583-
return true, nil
506+
if allReplicasAvailable(rc) {
507+
return nil
508+
}
509+
510+
watcher, err := c.kclient.ReplicationControllers(rc.Namespace).Watch(metav1.SingleObject(metav1.ObjectMeta{Name: rc.Name, ResourceVersion: rc.ResourceVersion}))
511+
if err != nil {
512+
return fmt.Errorf("acceptAvailablePods failed to watch ReplicationController %s/%s: %v", rc.Namespace, rc.Name, err)
513+
}
514+
515+
_, err = watch.Until(c.timeout, watcher, func(event watch.Event) (bool, error) {
516+
if t := event.Type; t != watch.Modified {
517+
return false, fmt.Errorf("acceptAvailablePods failed watching for ReplicationController %s/%s: received event %v", rc.Namespace, rc.Name, t)
584518
}
585-
// Otherwise, try again later.
586-
glog.V(4).Infof("Still waiting for %d pods to become ready for rc %s", unready.Len(), rc.Name)
587-
return false, nil
519+
newRc := event.Object.(*kapi.ReplicationController)
520+
return allReplicasAvailable(newRc), nil
588521
})
589-
590522
// Handle acceptance failure.
591523
if err != nil {
592524
if err == wait.ErrWaitTimeout {
593-
return fmt.Errorf("pods for rc %q took longer than %.f seconds to become ready", rc.Name, c.timeout.Seconds())
525+
return fmt.Errorf("pods for rc '%s/%s' took longer than %.f seconds to become available", rc.Namespace, rc.Name, c.timeout.Seconds())
594526
}
595-
return fmt.Errorf("pod readiness check failed for rc %q: %v", rc.Name, err)
527+
return err
596528
}
597529
return nil
598530
}

pkg/deploy/strategy/support/lifecycle_test.go

Lines changed: 0 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ import (
1515
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1616
"k8s.io/apimachinery/pkg/runtime"
1717
"k8s.io/apimachinery/pkg/util/diff"
18-
"k8s.io/apimachinery/pkg/util/sets"
1918
"k8s.io/apimachinery/pkg/watch"
2019
clientgotesting "k8s.io/client-go/testing"
21-
"k8s.io/client-go/tools/cache"
2220
kapi "k8s.io/kubernetes/pkg/api"
2321
kapihelper "k8s.io/kubernetes/pkg/api/helper"
2422
"k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
@@ -542,116 +540,6 @@ func TestHookExecutor_makeHookPodRestart(t *testing.T) {
542540
}
543541
}
544542

545-
func TestAcceptAvailablePods_scenarios(t *testing.T) {
546-
scenarios := []struct {
547-
name string
548-
// any pods which are previously accepted
549-
acceptedPods []string
550-
// the current pods which will be in the store; pod name -> ready
551-
currentPods map[string]bool
552-
// whether or not the scenario should result in acceptance
553-
accepted bool
554-
}{
555-
{
556-
name: "all ready, none previously accepted",
557-
accepted: true,
558-
acceptedPods: []string{},
559-
currentPods: map[string]bool{
560-
"pod-1": true,
561-
"pod-2": true,
562-
},
563-
},
564-
{
565-
name: "some ready, none previously accepted",
566-
accepted: false,
567-
acceptedPods: []string{},
568-
currentPods: map[string]bool{
569-
"pod-1": false,
570-
"pod-2": true,
571-
},
572-
},
573-
{
574-
name: "previously accepted has become unready, new are ready",
575-
accepted: true,
576-
acceptedPods: []string{"pod-1"},
577-
currentPods: map[string]bool{
578-
// this pod should be ignored because it was previously accepted
579-
"pod-1": false,
580-
"pod-2": true,
581-
},
582-
},
583-
{
584-
name: "previously accepted all ready, new is unready",
585-
accepted: false,
586-
acceptedPods: []string{"pod-1"},
587-
currentPods: map[string]bool{
588-
"pod-1": true,
589-
"pod-2": false,
590-
},
591-
},
592-
}
593-
for _, s := range scenarios {
594-
t.Logf("running scenario: %s", s.name)
595-
596-
// Populate the store with real pods with the desired ready condition.
597-
store := cache.NewStore(cache.MetaNamespaceKeyFunc)
598-
for podName, ready := range s.currentPods {
599-
status := kapi.ConditionTrue
600-
if !ready {
601-
status = kapi.ConditionFalse
602-
}
603-
pod := &kapi.Pod{
604-
ObjectMeta: metav1.ObjectMeta{
605-
Name: podName,
606-
},
607-
Status: kapi.PodStatus{
608-
Conditions: []kapi.PodCondition{
609-
{
610-
Type: kapi.PodReady,
611-
Status: status,
612-
},
613-
},
614-
},
615-
}
616-
store.Add(pod)
617-
}
618-
619-
// Set up accepted pods for the scenario.
620-
acceptedPods := sets.NewString()
621-
for _, podName := range s.acceptedPods {
622-
acceptedPods.Insert(podName)
623-
}
624-
625-
acceptorLogs := &bytes.Buffer{}
626-
acceptor := &acceptAvailablePods{
627-
out: acceptorLogs,
628-
timeout: 10 * time.Millisecond,
629-
interval: 1 * time.Millisecond,
630-
getRcPodStore: func(deployment *kapi.ReplicationController) (cache.Store, chan struct{}) {
631-
return store, make(chan struct{})
632-
},
633-
acceptedPods: acceptedPods,
634-
}
635-
636-
deployment, _ := deployutil.MakeDeployment(deploytest.OkDeploymentConfig(1), kapi.Codecs.LegacyCodec(deployv1.SchemeGroupVersion))
637-
deployment.Spec.Replicas = 1
638-
639-
acceptor.out = &bytes.Buffer{}
640-
err := acceptor.Accept(deployment)
641-
642-
if s.accepted {
643-
if err != nil {
644-
t.Fatalf("unexpected error: %s", err)
645-
}
646-
} else {
647-
if err == nil {
648-
t.Fatalf("expected an error")
649-
}
650-
t.Logf("got expected error: %s", err)
651-
}
652-
}
653-
}
654-
655543
func deployment(name, namespace string, strategyLabels, strategyAnnotations map[string]string) (*deployapi.DeploymentConfig, *kapi.ReplicationController) {
656544
config := &deployapi.DeploymentConfig{
657545
ObjectMeta: metav1.ObjectMeta{

test/extended/deployments/deployments.go

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package deployments
22

33
import (
4-
//"errors"
4+
"errors"
55
"fmt"
66
"math/rand"
77
"strings"
@@ -941,31 +941,22 @@ var _ = g.Describe("deploymentconfigs", func() {
941941
return true, nil
942942
}
943943

944-
// FIXME: There is a race between deployer pod updating phase and RC updating AvailableReplicas
945-
// FIXME: Enable this when we switch pod acceptors to use RC AvailableReplicas with MinReadySecondsSet
946-
//if deployutil.DeploymentStatusFor(rc) == deployapi.DeploymentStatusComplete {
947-
// e2e.Logf("Failed RC: %#v", rc)
948-
// return false, errors.New("deployment shouldn't be completed before ReadyReplicas become AvailableReplicas")
949-
//}
944+
if deployutil.DeploymentStatusFor(rc) == deployapi.DeploymentStatusComplete {
945+
e2e.Logf("Failed RC: %#v", rc)
946+
return false, errors.New("deployment shouldn't be completed before ReadyReplicas become AvailableReplicas")
947+
}
950948
return false, nil
951949
})
952950
o.Expect(err).NotTo(o.HaveOccurred())
953951
o.Expect(rc1.Status.AvailableReplicas).To(o.Equal(dc.Spec.Replicas))
954-
// FIXME: There is a race between deployer pod updating phase and RC updating AvailableReplicas
955-
// FIXME: Enable this when we switch pod acceptors to use RC AvailableReplicas with MinReadySecondsSet
956-
//// Deployment status can't be updated yet but should be right after
957-
//o.Expect(deployutil.DeploymentStatusFor(rc1)).To(o.Equal(deployapi.DeploymentStatusRunning))
952+
// Deployment status can't be updated yet but should be right after
953+
o.Expect(deployutil.DeploymentStatusFor(rc1)).To(o.Equal(deployapi.DeploymentStatusRunning))
958954
// It should finish right after
959-
// FIXME: remove this condition when the above is fixed
960-
if deployutil.DeploymentStatusFor(rc1) != deployapi.DeploymentStatusComplete {
961-
// FIXME: remove this assertion when the above is fixed
962-
o.Expect(deployutil.DeploymentStatusFor(rc1)).To(o.Equal(deployapi.DeploymentStatusRunning))
963-
rc1, err = waitForRCModification(oc, namespace, rc1.Name, deploymentChangeTimeout,
964-
rc1.GetResourceVersion(), func(rc *kapiv1.ReplicationController) (bool, error) {
965-
return deployutil.DeploymentStatusFor(rc) == deployapi.DeploymentStatusComplete, nil
966-
})
967-
o.Expect(err).NotTo(o.HaveOccurred())
968-
}
955+
rc1, err = waitForRCModification(oc, namespace, rc1.Name, deploymentChangeTimeout,
956+
rc1.GetResourceVersion(), func(rc *kapiv1.ReplicationController) (bool, error) {
957+
return deployutil.DeploymentStatusFor(rc) == deployapi.DeploymentStatusComplete, nil
958+
})
959+
o.Expect(err).NotTo(o.HaveOccurred())
969960

970961
// We might check that minReadySecond passed between pods becoming ready
971962
// and available but I don't think there is a way to get a timestamp from events

vendor/k8s.io/kubernetes/pkg/api/pod/util_patch.go

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)