openshift · openshift-bot · Apr 9, 2017 · Apr 7, 2017 · gabemontero · Apr 7, 2017
diff --git a/pkg/build/controller/buildpod/controller.go b/pkg/build/controller/buildpod/controller.go
@@ -121,6 +121,9 @@ func (bc *BuildPodController) HandlePod(pod *kapi.Pod) error {
 	nextStatus := build.Status.Phase
 	currentReason := build.Status.Reason
 
+	// if the build is marked failed, the build status reason has already been
+	// set (probably by the build pod itself), so don't do any updating here
+	// or we'll overwrite the correct value.
 	if build.Status.Phase != buildapi.BuildPhaseFailed {
 		switch pod.Status.Phase {
 		case kapi.PodPending:
@@ -176,33 +179,48 @@ func (bc *BuildPodController) HandlePod(pod *kapi.Pod) error {
 			build.Status.Message = ""
 		}
 	}
+
+	needsUpdate := false
 	// Update the build object when it progress to a next state or the reason for
-	// the current state changed.
-	if (!common.HasBuildPodNameAnnotation(build) || build.Status.Phase != nextStatus || build.Status.Phase == buildapi.BuildPhaseFailed) && !buildutil.IsBuildComplete(build) {
+	// the current state changed.  Do not touch builds that are complete
+	// because we'd potentially be overwriting build state information set by the
+	// build pod directly.
+	if (!common.HasBuildPodNameAnnotation(build) || build.Status.Phase != nextStatus) && !buildutil.IsBuildComplete(build) {
+		needsUpdate = true
 		common.SetBuildPodNameAnnotation(build, pod.Name)
 		reason := ""
 		if len(build.Status.Reason) > 0 {
 			reason = " (" + string(build.Status.Reason) + ")"
 		}
 		glog.V(4).Infof("Updating build %s/%s status %s -> %s%s", build.Namespace, build.Name, build.Status.Phase, nextStatus, reason)
 		build.Status.Phase = nextStatus
-
-		if buildutil.IsBuildComplete(build) {
-			common.SetBuildCompletionTimeAndDuration(build)
-		}
 		if build.Status.Phase == buildapi.BuildPhaseRunning {
 			now := unversioned.Now()
 			build.Status.StartTimestamp = &now
 		}
+	}
+
+	// we're going to get pod relist events for completed/failed pods,
+	// there's no reason to re-update the build and rerun
+	// HandleBuildCompletion if we've already done it for this
+	// build previously.
+	buildWasComplete := build.Status.CompletionTimestamp != nil
+	if !buildWasComplete && buildutil.IsBuildComplete(build) && build.Status.Phase != buildapi.BuildPhaseCancelled {
+		needsUpdate = common.SetBuildCompletionTimeAndDuration(build)
+	}
+	if needsUpdate {
 		if err := bc.buildUpdater.Update(build.Namespace, build); err != nil {
 			return fmt.Errorf("failed to update build %s/%s: %v", build.Namespace, build.Name, err)
 		}
 		glog.V(4).Infof("Build %s/%s status was updated to %s", build.Namespace, build.Name, build.Status.Phase)
-
-		if buildutil.IsBuildComplete(build) {
-			common.HandleBuildCompletion(build, bc.runPolicies)
-		}
 	}
+	// if the build was not previously marked complete but it's complete now,
+	// handle completion for it.  otherwise ignore it because we've already
+	// handled its completion previously.
+	if !buildWasComplete && buildutil.IsBuildComplete(build) {
+		common.HandleBuildCompletion(build, bc.runPolicies)
+	}
+
 	return nil
 }
 
@@ -244,6 +262,7 @@ func (bc *BuildPodController) HandleBuildPodDeletion(pod *kapi.Pod) error {
 		if err := bc.buildUpdater.Update(build.Namespace, build); err != nil {
 			return fmt.Errorf("Failed to update build %s/%s: %v", build.Namespace, build.Name, err)
 		}
+		common.HandleBuildCompletion(build, bc.runPolicies)
 	}
 	return nil
 }

diff --git a/pkg/build/controller/buildpod/controller_test.go b/pkg/build/controller/buildpod/controller_test.go
@@ -158,7 +158,7 @@ func TestHandlePod(t *testing.T) {
 			outStatus:           buildapi.BuildPhaseComplete,
 			podStatus:           kapi.PodSucceeded,
 			exitCode:            0,
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 4
@@ -167,7 +167,7 @@ func TestHandlePod(t *testing.T) {
 			outStatus:           buildapi.BuildPhaseFailed,
 			podStatus:           kapi.PodFailed,
 			exitCode:            -1,
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 5
@@ -177,7 +177,7 @@ func TestHandlePod(t *testing.T) {
 			podStatus:           kapi.PodSucceeded,
 			exitCode:            0,
 			buildUpdater:        &errBuildUpdater{},
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 6

diff --git a/pkg/build/controller/common/util.go b/pkg/build/controller/common/util.go
@@ -11,12 +11,23 @@ import (
 	utilruntime "k8s.io/kubernetes/pkg/util/runtime"
 )
 
-func SetBuildCompletionTimeAndDuration(build *buildapi.Build) {
+// SetBuildCompletionTimeAndDuration will set the build completion timestamp
+// to the current time if it is nil.  It will also set the start timestamp to
+// the same value if it is nil.  Returns true if the build object was
+// modified.
+func SetBuildCompletionTimeAndDuration(build *buildapi.Build) bool {
+	if build.Status.CompletionTimestamp != nil {
+		return false
+	}
 	now := unversioned.Now()
 	build.Status.CompletionTimestamp = &now
-	if build.Status.StartTimestamp != nil {
-		build.Status.Duration = build.Status.CompletionTimestamp.Rfc3339Copy().Time.Sub(build.Status.StartTimestamp.Rfc3339Copy().Time)
+	// apparently this build completed so fast we didn't see the pod running event,
+	// so just use the completion time as the start time.
+	if build.Status.StartTimestamp == nil {
+		build.Status.StartTimestamp = &now
 	}
+	build.Status.Duration = build.Status.CompletionTimestamp.Rfc3339Copy().Time.Sub(build.Status.StartTimestamp.Rfc3339Copy().Time)
+	return true
 }
 
 func HandleBuildCompletion(build *buildapi.Build, runPolicies []policy.RunPolicy) {

diff --git a/pkg/build/controller/controller_test.go b/pkg/build/controller/controller_test.go
@@ -447,23 +447,23 @@ func TestCancelBuild(t *testing.T) {
 			inStatus:            buildapi.BuildPhaseNew,
 			outStatus:           buildapi.BuildPhaseCancelled,
 			exitCode:            0,
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 1
 			inStatus:            buildapi.BuildPhasePending,
 			outStatus:           buildapi.BuildPhaseCancelled,
 			podStatus:           kapi.PodRunning,
 			exitCode:            0,
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 2
 			inStatus:            buildapi.BuildPhaseRunning,
 			outStatus:           buildapi.BuildPhaseCancelled,
 			podStatus:           kapi.PodRunning,
 			exitCode:            0,
-			startTimestamp:      nil,
+			startTimestamp:      curtime,
 			completionTimestamp: curtime,
 		},
 		{ // 3

diff --git a/pkg/build/controller/policy/policy.go b/pkg/build/controller/policy/policy.go
@@ -142,17 +142,21 @@ func handleComplete(lister buildclient.BuildLister, updater buildclient.BuildUpd
 	}
 	for _, build := range nextBuilds {
 		// TODO: replace with informer notification requeueing in the future
-		build.Annotations[buildapi.BuildAcceptedAnnotation] = uuid.NewRandom().String()
-		err := wait.Poll(500*time.Millisecond, 5*time.Second, func() (bool, error) {
-			err := updater.Update(build.Namespace, build)
-			if err != nil && errors.IsConflict(err) {
-				glog.V(5).Infof("Error updating build %s/%s: %v (will retry)", build.Namespace, build.Name, err)
-				return false, nil
+
+		// only set the annotation once.
+		if _, ok := build.Annotations[buildapi.BuildAcceptedAnnotation]; !ok {
+			build.Annotations[buildapi.BuildAcceptedAnnotation] = uuid.NewRandom().String()
+			err := wait.Poll(500*time.Millisecond, 5*time.Second, func() (bool, error) {
+				err := updater.Update(build.Namespace, build)
+				if err != nil && errors.IsConflict(err) {
+					glog.V(5).Infof("Error updating build %s/%s: %v (will retry)", build.Namespace, build.Name, err)
+					return false, nil
+				}
+				return true, err
+			})
+			if err != nil {
+				return err
 			}
-			return true, err
-		})
-		if err != nil {
-			return err
 		}
 	}
 	return nil

diff --git a/test/extended/builds/run_policy.go b/test/extended/builds/run_policy.go
@@ -132,10 +132,21 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func()
 			defer buildWatch.Stop()
 			o.Expect(err).NotTo(o.HaveOccurred())
 
+			sawCompletion := false
 			for {
 				event := <-buildWatch.ResultChan()
 				build := event.Object.(*buildapi.Build)
+				var lastCompletion time.Time
+				if build.Status.Phase == buildapi.BuildPhaseComplete {
+					lastCompletion = time.Now()
+					o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "completed builds should have a valid start time")
+					o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "completed builds should have a valid completion time")
+					sawCompletion = true
+				}
 				if build.Status.Phase == buildapi.BuildPhaseRunning {
+					latency := lastCompletion.Sub(time.Now())
+					o.Expect(latency).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after last completed build")
+
 					// Ignore events from complete builds (if there are any) if we already
 					// verified the build.
 					if _, exists := buildVerified[build.Name]; exists {
@@ -166,6 +177,7 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func()
 					break
 				}
 			}
+			o.Expect(sawCompletion).To(o.BeTrue(), "should have seen at least one build complete")
 		})
 	})
 
@@ -212,6 +224,78 @@ var _ = g.Describe("[builds][Slow] using build configuration runPolicy", func()
 		})
 	})
 
+	g.Describe("build configuration with Serial build run policy handling failure", func() {
+		g.It("starts the next build immediately after one fails", func() {
+			g.By("starting multiple builds")
+			bcName := "sample-serial-build-fail"
+
+			for i := 0; i < 3; i++ {
+				_, _, err := exutil.StartBuild(oc, bcName)
+				o.Expect(err).NotTo(o.HaveOccurred())
+			}
+
+			buildWatch, err := oc.Client().Builds(oc.Namespace()).Watch(kapi.ListOptions{
+				LabelSelector: buildutil.BuildConfigSelector(bcName),
+			})
+			defer buildWatch.Stop()
+			o.Expect(err).NotTo(o.HaveOccurred())
+
+			var failTime, failTime2 time.Time
+			done, timestamps1, timestamps2, timestamps3 := false, false, false, false
+
+			for done == false {
+				select {
+				case event := <-buildWatch.ResultChan():
+					build := event.Object.(*buildapi.Build)
+					if build.Status.Phase == buildapi.BuildPhaseFailed {
+						if build.Name == "sample-serial-build-fail-1" {
+							// this may not be set on the first build modified to failed event because
+							// the build gets marked failed by the build pod, but the timestamps get
+							// set by the buildpod controller.
+							if build.Status.CompletionTimestamp != nil {
+								o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time")
+								o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time")
+								timestamps1 = true
+							}
+							failTime = time.Now()
+						}
+						if build.Name == "sample-serial-build-fail-2" {
+							duration := time.Now().Sub(failTime)
+							o.Expect(duration).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after failed build")
+							if build.Status.CompletionTimestamp != nil {
+								o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time")
+								o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time")
+								timestamps2 = true
+							}
+							failTime2 = time.Now()
+						}
+						if build.Name == "sample-serial-build-fail-3" {
+							duration := time.Now().Sub(failTime2)
+							o.Expect(duration).To(o.BeNumerically("<", 10*time.Second), "next build should have started less than 10s after failed build")
+							if build.Status.CompletionTimestamp != nil {
+								o.Expect(build.Status.StartTimestamp).ToNot(o.BeNil(), "failed builds should have a valid start time")
+								o.Expect(build.Status.CompletionTimestamp).ToNot(o.BeNil(), "failed builds should have a valid completion time")
+								timestamps3 = true
+							}
+						}
+					}
+					// once we have all the expected timestamps, or we run out of time, we can bail.
+					if timestamps1 && timestamps2 && timestamps3 {
+						done = true
+					}
+				case <-time.After(2 * time.Minute):
+					// we've waited as long as we dare, go see if we got all the timestamp data we expected.
+					// if we have the timestamp data, we also know that we checked the next build start latency.
+					done = true
+				}
+			}
+			o.Expect(timestamps1).To(o.BeTrue(), "failed builds should have start and completion timestamps set")
+			o.Expect(timestamps2).To(o.BeTrue(), "failed builds should have start and completion timestamps set")
+			o.Expect(timestamps3).To(o.BeTrue(), "failed builds should have start and completion timestamps set")
+
+		})
+	})
+
 	g.Describe("build configuration with SerialLatestOnly build run policy", func() {
 		g.It("runs the builds in serial order but cancel previous builds", func() {
 			g.By("starting multiple builds")

diff --git a/test/extended/testdata/bindata.go b/test/extended/testdata/bindata.go
diff --git a/test/extended/testdata/run_policy/serial-bc.yaml b/test/extended/testdata/run_policy/serial-bc.yaml
@@ -24,7 +24,24 @@
             from: 
               kind: "DockerImage"
               name: "centos/ruby-22-centos7"
-        resources: {}
-      status: 
-        lastVersion: 0
-
+    - 
+      kind: "BuildConfig"
+      apiVersion: "v1"
+      metadata: 
+        name: "sample-serial-build-fail"
+      spec: 
+        runPolicy: "Serial"
+        triggers: 
+          - 
+            type: "imageChange"
+            imageChange: {}
+        source: 
+          type: "Git"
+          git: 
+            uri: "git://github.com/openshift/invalidrepo.git"
+        strategy: 
+          type: "Source"
+          sourceStrategy: 
+            from: 
+              kind: "DockerImage"
+              name: "centos/ruby-22-centos7"