Skip to content

cancel binary builds if they hang #12484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 24, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/build/generator/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type GeneratorClient interface {
UpdateBuildConfig(ctx kapi.Context, buildConfig *buildapi.BuildConfig) error
GetBuild(ctx kapi.Context, name string) (*buildapi.Build, error)
CreateBuild(ctx kapi.Context, build *buildapi.Build) error
UpdateBuild(ctx kapi.Context, build *buildapi.Build) error
GetImageStream(ctx kapi.Context, name string) (*imageapi.ImageStream, error)
GetImageStreamImage(ctx kapi.Context, name string) (*imageapi.ImageStreamImage, error)
GetImageStreamTag(ctx kapi.Context, name string) (*imageapi.ImageStreamTag, error)
Expand All @@ -68,6 +69,7 @@ type Client struct {
UpdateBuildConfigFunc func(ctx kapi.Context, buildConfig *buildapi.BuildConfig) error
GetBuildFunc func(ctx kapi.Context, name string) (*buildapi.Build, error)
CreateBuildFunc func(ctx kapi.Context, build *buildapi.Build) error
UpdateBuildFunc func(ctx kapi.Context, build *buildapi.Build) error
GetImageStreamFunc func(ctx kapi.Context, name string) (*imageapi.ImageStream, error)
GetImageStreamImageFunc func(ctx kapi.Context, name string) (*imageapi.ImageStreamImage, error)
GetImageStreamTagFunc func(ctx kapi.Context, name string) (*imageapi.ImageStreamTag, error)
Expand All @@ -93,6 +95,11 @@ func (c Client) CreateBuild(ctx kapi.Context, build *buildapi.Build) error {
return c.CreateBuildFunc(ctx, build)
}

// UpdateBuild updates a build
func (c Client) UpdateBuild(ctx kapi.Context, build *buildapi.Build) error {
return c.UpdateBuildFunc(ctx, build)
}

// GetImageStream retrieves a named image stream
func (c Client) GetImageStream(ctx kapi.Context, name string) (*imageapi.ImageStream, error) {
return c.GetImageStreamFunc(ctx, name)
Expand Down
71 changes: 55 additions & 16 deletions pkg/build/registry/buildconfiginstantiate/rest.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ import (
buildutil "github.com/openshift/origin/pkg/build/util"
)

var (
cancelPollInterval = 500 * time.Millisecond
cancelPollDuration = 30 * time.Second
)

// NewStorage creates a new storage object for build generation
func NewStorage(generator *generator.BuildGenerator) *InstantiateREST {
return &InstantiateREST{generator: generator}
Expand Down Expand Up @@ -178,24 +183,39 @@ func (h *binaryInstantiateHandler) handle(r io.Reader) (runtime.Object, error) {
}
remaining := h.r.Timeout - time.Now().Sub(start)

latest, ok, err := registry.WaitForRunningBuild(h.r.Watcher, h.ctx, build, remaining)
if err != nil {
switch {
case latest.Status.Phase == buildapi.BuildPhaseError:
return nil, errors.NewBadRequest(fmt.Sprintf("build %s encountered an error: %s", build.Name, buildutil.NoBuildLogsMessage))
case latest.Status.Phase == buildapi.BuildPhaseCancelled:
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was cancelled: %s", build.Name, buildutil.NoBuildLogsMessage))
case err == registry.ErrBuildDeleted:
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was deleted before it started: %s", build.Name, buildutil.NoBuildLogsMessage))
default:
return nil, errors.NewBadRequest(fmt.Sprintf("unable to wait for build %s to run: %v", build.Name, err))
// Attempt to cancel the build if it did not start running
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Create an e2e test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any suggestions on how to create one that doesn't have to wait 5 minutes to timeout?

since the timeout isn't configurable and can't easily be made configurable from the client side.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm. Certainly ugly to test, but also ugly not to have any sort of certainly this code works.

// before we gave up.
cancel := true
defer func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this a method.

if !cancel {
return
}
}
if !ok {
return nil, errors.NewTimeoutError(fmt.Sprintf("timed out waiting for build %s to start after %s", build.Name, h.r.Timeout), 0)
}
if latest.Status.Phase != buildapi.BuildPhaseRunning {
h.cancelBuild(build)
}()

latest, ok, err := registry.WaitForRunningBuild(h.r.Watcher, h.ctx, build, remaining)

switch {
case latest.Status.Phase == buildapi.BuildPhaseError:
// don't cancel the build if it reached a terminal state on its own
cancel = false
return nil, errors.NewBadRequest(fmt.Sprintf("build %s encountered an error: %s", build.Name, buildutil.NoBuildLogsMessage))
case latest.Status.Phase == buildapi.BuildPhaseFailed:
// don't cancel the build if it reached a terminal state on its own
cancel = false
return nil, errors.NewBadRequest(fmt.Sprintf("build %s failed: %s: %s", build.Name, build.Status.Reason, build.Status.Message))
case latest.Status.Phase == buildapi.BuildPhaseCancelled:
// don't cancel the build if it reached a terminal state on its own
cancel = false
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was cancelled: %s", build.Name, buildutil.NoBuildLogsMessage))
case latest.Status.Phase != buildapi.BuildPhaseRunning:
return nil, errors.NewBadRequest(fmt.Sprintf("cannot upload file to build %s with status %s", build.Name, latest.Status.Phase))
case err == registry.ErrBuildDeleted:
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was deleted before it started: %s", build.Name, buildutil.NoBuildLogsMessage))
case err != nil:
return nil, errors.NewBadRequest(fmt.Sprintf("unable to wait for build %s to run: %v", build.Name, err))
case !ok:
return nil, errors.NewTimeoutError(fmt.Sprintf("timed out waiting for build %s to start after %s", build.Name, h.r.Timeout), 0)
}

// The container should be the default build container, so setting it to blank
Expand Down Expand Up @@ -226,9 +246,28 @@ func (h *binaryInstantiateHandler) handle(r io.Reader) (runtime.Object, error) {
if err := exec.Stream(streamOptions); err != nil {
return nil, errors.NewInternalError(err)
}
cancel = false
return latest, nil
}

// cancelBuild will mark a build for cancellation unless
// cancel is false in which case it is a no-op.
func (h *binaryInstantiateHandler) cancelBuild(build *buildapi.Build) {
build.Status.Cancelled = true
h.r.Generator.Client.UpdateBuild(h.ctx, build)
wait.Poll(cancelPollInterval, cancelPollDuration, func() (bool, error) {
build.Status.Cancelled = true
err := h.r.Generator.Client.UpdateBuild(h.ctx, build)
switch {
case err != nil && errors.IsConflict(err):
build, err = h.r.Generator.Client.GetBuild(h.ctx, build.Name)
return false, err
default:
return true, err
}
})
}

type podGetter struct {
podsNamespacer kcoreclient.PodsGetter
}
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/server/origin/master.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ func (c *MasterConfig) GetRestStorage() map[string]rest.Storage {
UpdateBuildConfigFunc: buildConfigRegistry.UpdateBuildConfig,
GetBuildFunc: buildRegistry.GetBuild,
CreateBuildFunc: buildRegistry.CreateBuild,
UpdateBuildFunc: buildRegistry.UpdateBuild,
GetImageStreamFunc: imageStreamRegistry.GetImageStream,
GetImageStreamImageFunc: imageStreamImageRegistry.GetImageStreamImage,
GetImageStreamTagFunc: imageStreamTagRegistry.GetImageStreamTag,
Expand Down
4 changes: 2 additions & 2 deletions test/extended/builds/dockerfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ USER 1001

buildName := "jenkins-1"
g.By("expecting the Dockerfile build is in Complete phase")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, nil, nil, nil)
//debug for failures on jenkins
if err != nil {
exutil.DumpBuildLogs("jenkins", oc)
Expand Down Expand Up @@ -75,7 +75,7 @@ USER 1001

buildName := "centos-1"
g.By("expecting the Dockerfile build is in Complete phase")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, nil, nil, nil)
//debug for failures on jenkins
if err != nil {
exutil.DumpBuildLogs("centos", oc)
Expand Down
2 changes: 1 addition & 1 deletion test/extended/builds/gitauth.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ var _ = g.Describe("[builds][Slow] can use private repositories as build input",
o.Expect(err).NotTo(o.HaveOccurred())

g.By(fmt.Sprintf("expecting build %s to complete successfully", buildName))
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs(buildConfigName, oc)
}
Expand Down
19 changes: 18 additions & 1 deletion test/extended/builds/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
g "github.com/onsi/ginkgo"
o "github.com/onsi/gomega"

buildapi "github.com/openshift/origin/pkg/build/api"
exutil "github.com/openshift/origin/test/extended/util"
)

Expand Down Expand Up @@ -103,7 +104,6 @@ var _ = g.Describe("[builds][Slow] starting a build using CLI", func() {
o.Expect(br.StartBuildStdErr).To(o.ContainSubstring("Uploading file"))
o.Expect(br.StartBuildStdErr).To(o.ContainSubstring("as binary input for the build ..."))
o.Expect(buildLog).To(o.ContainSubstring("Your bundle is complete"))

})

g.It("should accept --from-dir as input", func() {
Expand Down Expand Up @@ -194,6 +194,23 @@ var _ = g.Describe("[builds][Slow] starting a build using CLI", func() {
})
})

g.Describe("cancel a binary build that doesn't start running in 5 minutes", func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add "[slow]"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the whole suite is already tagged slow.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

g.It("should start a build and wait for the build to be cancelled", func() {
g.By("starting a build with a nodeselector that can't be matched")
go func() {
exutil.StartBuild(oc, "sample-build-binary-invalidnodeselector", fmt.Sprintf("--from-file=%s", exampleGemfile))
}()
build := &buildapi.Build{}
cancelFn := func(b *buildapi.Build) bool {
build = b
return exutil.CheckBuildCancelledFn(b)
}
err := exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "sample-build-binary-invalidnodeselector-1", nil, nil, cancelFn)
o.Expect(err).NotTo(o.HaveOccurred())
o.Expect(build.Status.Phase).To(o.Equal(buildapi.BuildPhaseCancelled))
})
})

g.Describe("cancelling build started by oc start-build --wait", func() {
g.It("should start a build and wait for the build to cancel", func() {
g.By("starting the build with --wait flag")
Expand Down
2 changes: 1 addition & 1 deletion test/extended/image_ecosystem/s2i_perl.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var _ = g.Describe("[image_ecosystem][perl][Slow] hot deploy for openshift perl
o.Expect(err).NotTo(o.HaveOccurred())

g.By("waiting for build to finish")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "dancer-mysql-example-1", exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "dancer-mysql-example-1", nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs("dancer-mysql-example", oc)
}
Expand Down
2 changes: 1 addition & 1 deletion test/extended/image_ecosystem/s2i_php.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var _ = g.Describe("[image_ecosystem][php][Slow] hot deploy for openshift php im
o.Expect(err).NotTo(o.HaveOccurred())

g.By("waiting for build to finish")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), dcName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), dcName, nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs("cakephp-mysql-example", oc)
}
Expand Down
2 changes: 1 addition & 1 deletion test/extended/image_ecosystem/s2i_python.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var _ = g.Describe("[image_ecosystem][python][Slow] hot deploy for openshift pyt
o.Expect(err).NotTo(o.HaveOccurred())

g.By("waiting for build to finish")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "django-ex-1", exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "django-ex-1", nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs("django-ex", oc)
}
Expand Down
2 changes: 1 addition & 1 deletion test/extended/image_ecosystem/s2i_ruby.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ var _ = g.Describe("[image_ecosystem][ruby][Slow] hot deploy for openshift ruby
o.Expect(err).NotTo(o.HaveOccurred())

g.By("waiting for build to finish")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), dcName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), dcName, nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs("rails-postgresql-example", oc)
}
Expand Down
2 changes: 1 addition & 1 deletion test/extended/image_ecosystem/sample_repos.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func NewSampleRepoTest(c SampleRepoConfig) func() {
buildName := c.buildConfigName + "-1"

g.By("expecting the build is in the Complete phase")
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, exutil.CheckBuildSuccessFn, exutil.CheckBuildFailedFn)
err = exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), buildName, nil, nil, nil)
if err != nil {
exutil.DumpBuildLogs(c.buildConfigName, oc)
}
Expand Down
39 changes: 39 additions & 0 deletions test/extended/testdata/test-build.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,45 @@
"status": {
"lastVersion": 0
}
},
{
"kind": "BuildConfig",
"apiVersion": "v1",
"metadata": {
"name": "sample-build-binary-invalidnodeselector",
"creationTimestamp": null
},
"spec": {
"triggers": [
{
"type": "imageChange",
"imageChange": {}
}
],
"source": {
"type": "Binary",
"binary": {}
},
"strategy": {
"type": "Docker",
"dockerStrategy": {
"env": [
{ "name": "FOO", "value": "test" },
{ "name": "BAR", "value": "test" },
{ "name": "BUILD_LOGLEVEL", "value": "5" }
],
"from": {
"kind": "DockerImage",
"name": "centos/ruby-22-centos7"
}
}
},
"resources": {},
"nodeSelector": {"nodelabelkey":"nodelabelvalue"}
},
"status": {
"lastVersion": 0
}
}
]
}
29 changes: 26 additions & 3 deletions test/extended/util/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ type BuildResult struct {
BuildSuccess bool
// BuildFailure is true if the build was finished with an error.
BuildFailure bool
// BuildCancelled is true if the build was canceled.
BuildCancelled bool
// BuildTimeout is true if there was a timeout waiting for the build to finish.
BuildTimeout bool
// The openshift client which created this build.
Expand Down Expand Up @@ -400,6 +402,7 @@ func StartBuildAndWait(oc *CLI, args ...string) (result *BuildResult, err error)
BuildAttempt: false,
BuildSuccess: false,
BuildFailure: false,
BuildCancelled: false,
BuildTimeout: false,
oc: oc,
}
Expand Down Expand Up @@ -428,6 +431,11 @@ func StartBuildAndWait(oc *CLI, args ...string) (result *BuildResult, err error)
result.BuildFailure = CheckBuildFailedFn(b)
return result.BuildFailure
},
func(b *buildapi.Build) bool {
result.Build = b
result.BuildCancelled = CheckBuildCancelledFn(b)
return result.BuildCancelled
},
)

if result.Build == nil {
Expand All @@ -436,14 +444,24 @@ func StartBuildAndWait(oc *CLI, args ...string) (result *BuildResult, err error)
}

result.BuildAttempt = true
result.BuildTimeout = !(result.BuildFailure || result.BuildSuccess)
result.BuildTimeout = !(result.BuildFailure || result.BuildSuccess || result.BuildCancelled)

fmt.Fprintf(g.GinkgoWriter, "Done waiting for %s: %#v\n", buildPath, *result)
return result, nil
}

// WaitForABuild waits for a Build object to match either isOK or isFailed conditions.
func WaitForABuild(c client.BuildInterface, name string, isOK, isFailed func(*buildapi.Build) bool) error {
func WaitForABuild(c client.BuildInterface, name string, isOK, isFailed, isCanceled func(*buildapi.Build) bool) error {
if isOK == nil {
isOK = CheckBuildSuccessFn
}
if isFailed == nil {
isFailed = CheckBuildFailedFn
}
if isCanceled == nil {
isCanceled = CheckBuildCancelledFn
}

// wait 2 minutes for build to exist
err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) {
if _, err := c.Get(name); err != nil {
Expand All @@ -464,7 +482,7 @@ func WaitForABuild(c client.BuildInterface, name string, isOK, isFailed func(*bu
return false, err
}
for i := range list.Items {
if name == list.Items[i].Name && isOK(&list.Items[i]) {
if name == list.Items[i].Name && (isOK(&list.Items[i]) || isCanceled(&list.Items[i])) {
return true, nil
}
if name != list.Items[i].Name || isFailed(&list.Items[i]) {
Expand All @@ -489,6 +507,11 @@ var CheckBuildFailedFn = func(b *buildapi.Build) bool {
return b.Status.Phase == buildapi.BuildPhaseFailed || b.Status.Phase == buildapi.BuildPhaseError
}

// CheckBuildCancelledFn return true if the build was canceled
var CheckBuildCancelledFn = func(b *buildapi.Build) bool {
return b.Status.Phase == buildapi.BuildPhaseCancelled
}

// WaitForBuilderAccount waits until the builder service account gets fully
// provisioned
func WaitForBuilderAccount(c kcoreclient.ServiceAccountInterface) error {
Expand Down