-
Notifications
You must be signed in to change notification settings - Fork 4.7k
cancel binary builds if they hang #12484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,11 @@ import ( | |
buildutil "github.com/openshift/origin/pkg/build/util" | ||
) | ||
|
||
var ( | ||
cancelPollInterval = 500 * time.Millisecond | ||
cancelPollDuration = 30 * time.Second | ||
) | ||
|
||
// NewStorage creates a new storage object for build generation | ||
func NewStorage(generator *generator.BuildGenerator) *InstantiateREST { | ||
return &InstantiateREST{generator: generator} | ||
|
@@ -178,24 +183,39 @@ func (h *binaryInstantiateHandler) handle(r io.Reader) (runtime.Object, error) { | |
} | ||
remaining := h.r.Timeout - time.Now().Sub(start) | ||
|
||
latest, ok, err := registry.WaitForRunningBuild(h.r.Watcher, h.ctx, build, remaining) | ||
if err != nil { | ||
switch { | ||
case latest.Status.Phase == buildapi.BuildPhaseError: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s encountered an error: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
case latest.Status.Phase == buildapi.BuildPhaseCancelled: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was cancelled: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
case err == registry.ErrBuildDeleted: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was deleted before it started: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
default: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("unable to wait for build %s to run: %v", build.Name, err)) | ||
// Attempt to cancel the build if it did not start running | ||
// before we gave up. | ||
cancel := true | ||
defer func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this a method. |
||
if !cancel { | ||
return | ||
} | ||
} | ||
if !ok { | ||
return nil, errors.NewTimeoutError(fmt.Sprintf("timed out waiting for build %s to start after %s", build.Name, h.r.Timeout), 0) | ||
} | ||
if latest.Status.Phase != buildapi.BuildPhaseRunning { | ||
h.cancelBuild(build) | ||
}() | ||
|
||
latest, ok, err := registry.WaitForRunningBuild(h.r.Watcher, h.ctx, build, remaining) | ||
|
||
switch { | ||
case latest.Status.Phase == buildapi.BuildPhaseError: | ||
// don't cancel the build if it reached a terminal state on its own | ||
cancel = false | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s encountered an error: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
case latest.Status.Phase == buildapi.BuildPhaseFailed: | ||
// don't cancel the build if it reached a terminal state on its own | ||
cancel = false | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s failed: %s: %s", build.Name, build.Status.Reason, build.Status.Message)) | ||
case latest.Status.Phase == buildapi.BuildPhaseCancelled: | ||
// don't cancel the build if it reached a terminal state on its own | ||
cancel = false | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was cancelled: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
case latest.Status.Phase != buildapi.BuildPhaseRunning: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("cannot upload file to build %s with status %s", build.Name, latest.Status.Phase)) | ||
case err == registry.ErrBuildDeleted: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("build %s was deleted before it started: %s", build.Name, buildutil.NoBuildLogsMessage)) | ||
case err != nil: | ||
return nil, errors.NewBadRequest(fmt.Sprintf("unable to wait for build %s to run: %v", build.Name, err)) | ||
case !ok: | ||
return nil, errors.NewTimeoutError(fmt.Sprintf("timed out waiting for build %s to start after %s", build.Name, h.r.Timeout), 0) | ||
} | ||
|
||
// The container should be the default build container, so setting it to blank | ||
|
@@ -226,9 +246,28 @@ func (h *binaryInstantiateHandler) handle(r io.Reader) (runtime.Object, error) { | |
if err := exec.Stream(streamOptions); err != nil { | ||
return nil, errors.NewInternalError(err) | ||
} | ||
cancel = false | ||
return latest, nil | ||
} | ||
|
||
// cancelBuild will mark a build for cancellation unless | ||
// cancel is false in which case it is a no-op. | ||
func (h *binaryInstantiateHandler) cancelBuild(build *buildapi.Build) { | ||
build.Status.Cancelled = true | ||
h.r.Generator.Client.UpdateBuild(h.ctx, build) | ||
wait.Poll(cancelPollInterval, cancelPollDuration, func() (bool, error) { | ||
build.Status.Cancelled = true | ||
err := h.r.Generator.Client.UpdateBuild(h.ctx, build) | ||
switch { | ||
case err != nil && errors.IsConflict(err): | ||
build, err = h.r.Generator.Client.GetBuild(h.ctx, build.Name) | ||
return false, err | ||
default: | ||
return true, err | ||
} | ||
}) | ||
} | ||
|
||
type podGetter struct { | ||
podsNamespacer kcoreclient.PodsGetter | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ import ( | |
g "github.com/onsi/ginkgo" | ||
o "github.com/onsi/gomega" | ||
|
||
buildapi "github.com/openshift/origin/pkg/build/api" | ||
exutil "github.com/openshift/origin/test/extended/util" | ||
) | ||
|
||
|
@@ -103,7 +104,6 @@ var _ = g.Describe("[builds][Slow] starting a build using CLI", func() { | |
o.Expect(br.StartBuildStdErr).To(o.ContainSubstring("Uploading file")) | ||
o.Expect(br.StartBuildStdErr).To(o.ContainSubstring("as binary input for the build ...")) | ||
o.Expect(buildLog).To(o.ContainSubstring("Your bundle is complete")) | ||
|
||
}) | ||
|
||
g.It("should accept --from-dir as input", func() { | ||
|
@@ -194,6 +194,23 @@ var _ = g.Describe("[builds][Slow] starting a build using CLI", func() { | |
}) | ||
}) | ||
|
||
g.Describe("cancel a binary build that doesn't start running in 5 minutes", func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add "[slow]" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the whole suite is already tagged slow. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
g.It("should start a build and wait for the build to be cancelled", func() { | ||
g.By("starting a build with a nodeselector that can't be matched") | ||
go func() { | ||
exutil.StartBuild(oc, "sample-build-binary-invalidnodeselector", fmt.Sprintf("--from-file=%s", exampleGemfile)) | ||
}() | ||
build := &buildapi.Build{} | ||
cancelFn := func(b *buildapi.Build) bool { | ||
build = b | ||
return exutil.CheckBuildCancelledFn(b) | ||
} | ||
err := exutil.WaitForABuild(oc.Client().Builds(oc.Namespace()), "sample-build-binary-invalidnodeselector-1", nil, nil, cancelFn) | ||
o.Expect(err).NotTo(o.HaveOccurred()) | ||
o.Expect(build.Status.Phase).To(o.Equal(buildapi.BuildPhaseCancelled)) | ||
}) | ||
}) | ||
|
||
g.Describe("cancelling build started by oc start-build --wait", func() { | ||
g.It("should start a build and wait for the build to cancel", func() { | ||
g.By("starting the build with --wait flag") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Create an e2e test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any suggestions on how to create one that doesn't have to wait 5 minutes to timeout?
since the timeout isn't configurable and can't easily be made configurable from the client side.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hrm. Certainly ugly to test, but also ugly not to have any sort of certainly this code works.