openshift · openshift-merge-robot · Apr 12, 2018 · Apr 9, 2018 · mfojtik · Apr 11, 2018
diff --git a/pkg/apps/strategy/recreate/recreate.go b/pkg/apps/strategy/recreate/recreate.go
@@ -14,11 +14,12 @@ import (
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/client-go/scale"
 	"k8s.io/client-go/tools/record"
+	"k8s.io/client-go/util/retry"
 	kapi "k8s.io/kubernetes/pkg/apis/core"
 	kclientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
 	kcoreclient "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/typed/core/internalversion"
-	"k8s.io/kubernetes/pkg/kubectl"
 
 	appsapi "github.com/openshift/origin/pkg/apps/apis/apps"
 	strat "github.com/openshift/origin/pkg/apps/strategy"
@@ -41,30 +42,27 @@ type RecreateDeploymentStrategy struct {
 	until string
 	// rcClient is a client to access replication controllers
 	rcClient kcoreclient.ReplicationControllersGetter
+	// scaleClient is a client to access scaling
+	scaleClient scale.ScalesGetter
 	// podClient is used to list and watch pods.
 	podClient kcoreclient.PodsGetter
 	// eventClient is a client to access events
 	eventClient kcoreclient.EventsGetter
 	// getUpdateAcceptor returns an UpdateAcceptor to verify the first replica
 	// of the deployment.
 	getUpdateAcceptor func(time.Duration, int32) strat.UpdateAcceptor
-	// scaler is used to scale replication controllers.
-	scaler kubectl.Scaler
 	// codec is used to decode DeploymentConfigs contained in deployments.
 	decoder runtime.Decoder
 	// hookExecutor can execute a lifecycle hook.
 	hookExecutor stratsupport.HookExecutor
-	// retryPeriod is how often to try updating the replica count.
-	retryPeriod time.Duration
-	// retryParams encapsulates the retry parameters
-	retryParams *kubectl.RetryParams
 	// events records the events
 	events record.EventSink
 }
 
 // NewRecreateDeploymentStrategy makes a RecreateDeploymentStrategy backed by
 // a real HookExecutor and client.
-func NewRecreateDeploymentStrategy(client kclientset.Interface, tagClient imageclient.ImageStreamTagsGetter, events record.EventSink, decoder runtime.Decoder, out, errOut io.Writer, until string) *RecreateDeploymentStrategy {
+func NewRecreateDeploymentStrategy(client kclientset.Interface, tagClient imageclient.ImageStreamTagsGetter,
+	events record.EventSink, decoder runtime.Decoder, out, errOut io.Writer, until string) *RecreateDeploymentStrategy {
 	if out == nil {
 		out = ioutil.Discard
 	}
@@ -78,15 +76,14 @@ func NewRecreateDeploymentStrategy(client kclientset.Interface, tagClient imagec
 		events:      events,
 		until:       until,
 		rcClient:    client.Core(),
+		scaleClient: appsutil.NewReplicationControllerV1ScaleClient(client),
 		eventClient: client.Core(),
 		podClient:   client.Core(),
 		getUpdateAcceptor: func(timeout time.Duration, minReadySeconds int32) strat.UpdateAcceptor {
 			return stratsupport.NewAcceptAvailablePods(out, client.Core(), timeout)
 		},
-		scaler:       appsutil.NewReplicationControllerV1Scaler(client),
 		decoder:      decoder,
 		hookExecutor: stratsupport.NewHookExecutor(client.Core(), tagClient, client.Core(), os.Stdout, decoder),
-		retryPeriod:  1 * time.Second,
 	}
 }
 
@@ -108,25 +105,22 @@ func (s *RecreateDeploymentStrategy) DeployWithAcceptor(from *kapi.ReplicationCo
 		return fmt.Errorf("couldn't decode config from deployment %s: %v", to.Name, err)
 	}
 
-	retryTimeout := time.Duration(appsapi.DefaultRecreateTimeoutSeconds) * time.Second
+	recreateTimeout := time.Duration(appsapi.DefaultRecreateTimeoutSeconds) * time.Second
 	params := config.Spec.Strategy.RecreateParams
 	rollingParams := config.Spec.Strategy.RollingParams
 
 	if params != nil && params.TimeoutSeconds != nil {
-		retryTimeout = time.Duration(*params.TimeoutSeconds) * time.Second
+		recreateTimeout = time.Duration(*params.TimeoutSeconds) * time.Second
 	}
 
 	// When doing the initial rollout for rolling strategy we use recreate and for that we
 	// have to set the TimeoutSecond based on the rollling strategy parameters.
 	if rollingParams != nil && rollingParams.TimeoutSeconds != nil {
-		retryTimeout = time.Duration(*rollingParams.TimeoutSeconds) * time.Second
+		recreateTimeout = time.Duration(*rollingParams.TimeoutSeconds) * time.Second
 	}
 
-	s.retryParams = kubectl.NewRetryParams(s.retryPeriod, retryTimeout)
-	waitParams := kubectl.NewRetryParams(s.retryPeriod, retryTimeout)
-
 	if updateAcceptor == nil {
-		updateAcceptor = s.getUpdateAcceptor(retryTimeout, config.Spec.MinReadySeconds)
+		updateAcceptor = s.getUpdateAcceptor(recreateTimeout, config.Spec.MinReadySeconds)
 	}
 
 	// Execute any pre-hook.
@@ -147,7 +141,7 @@ func (s *RecreateDeploymentStrategy) DeployWithAcceptor(from *kapi.ReplicationCo
 	// Scale down the from deployment.
 	if from != nil {
 		fmt.Fprintf(s.out, "--> Scaling %s down to zero\n", from.Name)
-		_, err := s.scaleAndWait(from, 0, s.retryParams, waitParams)
+		_, err := s.scaleAndWait(from, 0, recreateTimeout)
 		if err != nil {
 			return fmt.Errorf("couldn't scale %s to 0: %v", from.Name, err)
 		}
@@ -177,7 +171,7 @@ func (s *RecreateDeploymentStrategy) DeployWithAcceptor(from *kapi.ReplicationCo
 			// Scale up to 1 and validate the replica,
 			// aborting if the replica isn't acceptable.
 			fmt.Fprintf(s.out, "--> Scaling %s to 1 before performing acceptance check\n", to.Name)
-			updatedTo, err := s.scaleAndWait(to, 1, s.retryParams, waitParams)
+			updatedTo, err := s.scaleAndWait(to, 1, recreateTimeout)
 			if err != nil {
 				return fmt.Errorf("couldn't scale %s to 1: %v", to.Name, err)
 			}
@@ -195,7 +189,7 @@ func (s *RecreateDeploymentStrategy) DeployWithAcceptor(from *kapi.ReplicationCo
 		// Complete the scale up.
 		if to.Spec.Replicas != int32(desiredReplicas) {
 			fmt.Fprintf(s.out, "--> Scaling %s to %d\n", to.Name, desiredReplicas)
-			updatedTo, err := s.scaleAndWait(to, desiredReplicas, s.retryParams, waitParams)
+			updatedTo, err := s.scaleAndWait(to, desiredReplicas, recreateTimeout)
 			if err != nil {
 				return fmt.Errorf("couldn't scale %s to %d: %v", to.Name, desiredReplicas, err)
 			}
@@ -224,32 +218,48 @@ func (s *RecreateDeploymentStrategy) DeployWithAcceptor(from *kapi.ReplicationCo
 	return nil
 }
 
-func (s *RecreateDeploymentStrategy) scaleAndWait(deployment *kapi.ReplicationController, replicas int, retry *kubectl.RetryParams, retryParams *kubectl.RetryParams) (*kapi.ReplicationController, error) {
+func (s *RecreateDeploymentStrategy) scaleAndWait(deployment *kapi.ReplicationController, replicas int, retryTimeout time.Duration) (*kapi.ReplicationController, error) {
 	if int32(replicas) == deployment.Spec.Replicas && int32(replicas) == deployment.Status.Replicas {
 		return deployment, nil
 	}
-	var scaleErr error
-	err := wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) {
-		scaleErr = s.scaler.Scale(deployment.Namespace, deployment.Name, uint(replicas), &kubectl.ScalePrecondition{Size: -1, ResourceVersion: ""}, retry, retryParams, kapi.Resource("replicationcontrollers"))
-		if scaleErr == nil {
-			return true, nil
-		}
-		// This error is returned when the lifecycle admission plugin cache is not fully
-		// synchronized. In that case the scaling should be retried.
-		//
-		// FIXME: The error returned from admission should not be forbidden but come-back-later error.
-		if errors.IsForbidden(scaleErr) && strings.Contains(scaleErr.Error(), "not yet ready to handle request") {
+	alreadyScaled := false
+	// Scale the replication controller.
+	// In case the cache is not fully synced, retry the scaling.
+	err := wait.PollImmediate(1*time.Second, retryTimeout, func() (bool, error) {
+		updateScaleErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+			curScale, err := s.scaleClient.Scales(deployment.Namespace).Get(kapi.Resource("replicationcontrollers"), deployment.Name)
+			if err != nil {
+				return err
+			}
+			if curScale.Status.Replicas == int32(replicas) {
+				alreadyScaled = true
+				return nil
+			}
+			curScaleCopy := curScale.DeepCopy()
+			curScaleCopy.Spec.Replicas = int32(replicas)
+			_, scaleErr := s.scaleClient.Scales(deployment.Namespace).Update(kapi.Resource("replicationcontrollers"), curScaleCopy)
+			return scaleErr
+		})
+		// FIXME: The error admission returns here should be 503 (come back later) or similar.
+		if errors.IsForbidden(updateScaleErr) && strings.Contains(updateScaleErr.Error(), "not yet ready to handle request") {
 			return false, nil
 		}
-		return false, scaleErr
+		return true, updateScaleErr
 	})
-	if err == wait.ErrWaitTimeout {
-		return nil, fmt.Errorf("%v: %v", err, scaleErr)
-	}
 	if err != nil {
 		return nil, err
 	}
-
+	// Wait for the scale to take effect.
+	if !alreadyScaled {
+		// FIXME: This should really be a watch, however the scaler client does not implement the watch interface atm.
+		err = wait.PollImmediate(1*time.Second, retryTimeout, func() (bool, error) {
+			curScale, err := s.scaleClient.Scales(deployment.Namespace).Get(kapi.Resource("replicationcontrollers"), deployment.Name)
+			if err != nil {
+				return false, err
+			}
+			return curScale.Status.Replicas == int32(replicas), nil
+		})
+	}
 	return s.rcClient.ReplicationControllers(deployment.Namespace).Get(deployment.Name, metav1.GetOptions{})
 }