Skip to content

Commit 6b0c540

Browse files
Merge pull request #20297 from wozniakjan/bug-1596440/surface_oomkilled_in_build
Bug 1596440 - surface OOMKilled pod to build
2 parents 656dd2f + 01eab5b commit 6b0c540

File tree

6 files changed

+115
-2
lines changed

6 files changed

+115
-2
lines changed

pkg/build/apis/build/types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,9 @@ const (
514514
// range of build failures.
515515
StatusReasonGenericBuildFailed StatusReason = "GenericBuildFailed"
516516

517+
// StatusReasonOutOfMemoryKilled indicates that the build pod was killed for its memory consumption
518+
StatusReasonOutOfMemoryKilled StatusReason = "OutOfMemoryKilled"
519+
517520
// StatusCannotRetrieveServiceAccount is the reason associated with a failure
518521
// to look up the service account associated with the BuildConfig.
519522
StatusReasonCannotRetrieveServiceAccount StatusReason = "CannotRetrieveServiceAccount"
@@ -540,6 +543,7 @@ const (
540543
StatusMessageNoBuildContainerStatus = "The pod for this build has no container statuses indicating success or failure."
541544
StatusMessageFailedContainer = "The pod for this build has at least one container with a non-zero exit status."
542545
StatusMessageGenericBuildFailed = "Generic Build failure - check logs for details."
546+
StatusMessageOutOfMemoryKilled = "The build pod was killed due to an out of memory condition."
543547
StatusMessageUnresolvableEnvironmentVariable = "Unable to resolve build environment variable reference."
544548
StatusMessageCannotRetrieveServiceAccount = "Unable to look up the service account secrets for this build."
545549
)

pkg/build/controller/build/build_controller.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1001,7 +1001,9 @@ func (bc *BuildController) handleActiveBuild(build *buildapi.Build, pod *v1.Pod)
10011001
}
10021002
}
10031003
case v1.PodFailed:
1004-
if build.Status.Phase != buildapi.BuildPhaseFailed {
1004+
if isOOMKilled(pod) {
1005+
update = transitionToPhase(buildapi.BuildPhaseFailed, buildapi.StatusReasonOutOfMemoryKilled, buildapi.StatusMessageOutOfMemoryKilled)
1006+
} else if build.Status.Phase != buildapi.BuildPhaseFailed {
10051007
// If a DeletionTimestamp has been set, it means that the pod will
10061008
// soon be deleted. The build should be transitioned to the Error phase.
10071009
if pod.DeletionTimestamp != nil {
@@ -1014,11 +1016,33 @@ func (bc *BuildController) handleActiveBuild(build *buildapi.Build, pod *v1.Pod)
10141016
return update, nil
10151017
}
10161018

1019+
func isOOMKilled(pod *v1.Pod) bool {
1020+
if pod.Status.Reason == "OOMKilled" {
1021+
return true
1022+
}
1023+
for _, c := range pod.Status.InitContainerStatuses {
1024+
terminated := c.State.Terminated
1025+
if terminated != nil && terminated.Reason == "OOMKilled" {
1026+
return true
1027+
}
1028+
}
1029+
for _, c := range pod.Status.ContainerStatuses {
1030+
terminated := c.State.Terminated
1031+
if terminated != nil && terminated.Reason == "OOMKilled" {
1032+
return true
1033+
}
1034+
}
1035+
return false
1036+
}
1037+
10171038
// handleCompletedBuild will only be called on builds that are already in a terminal phase. It is used to setup the
10181039
// completion timestamp and failure logsnippet as needed.
10191040
func (bc *BuildController) handleCompletedBuild(build *buildapi.Build, pod *v1.Pod) (*buildUpdate, error) {
10201041

10211042
update := &buildUpdate{}
1043+
if isOOMKilled(pod) {
1044+
update = transitionToPhase(buildapi.BuildPhaseFailed, buildapi.StatusReasonOutOfMemoryKilled, buildapi.StatusMessageOutOfMemoryKilled)
1045+
}
10221046
setBuildCompletionData(build, pod, update)
10231047

10241048
return update, nil

pkg/build/registry/build/strategy.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ func (strategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object) {
5353
// of the reason and message. This is to prevent the build controller from
5454
// overwriting the reason and message that was set by the builder pod
5555
// when it updated the build's details.
56-
if oldBuild.Status.Phase == buildapi.BuildPhaseFailed {
56+
// Only allow OOMKilled override because various processes in a container
57+
// can get OOMKilled and this confuses builder to prematurely populate
58+
// failure reason
59+
if oldBuild.Status.Phase == buildapi.BuildPhaseFailed &&
60+
newBuild.Status.Reason != buildapi.StatusReasonOutOfMemoryKilled {
5761
newBuild.Status.Reason = oldBuild.Status.Reason
5862
newBuild.Status.Message = oldBuild.Status.Message
5963
}

test/extended/builds/failure_status.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ var _ = g.Describe("[Feature:Builds][Slow] update failure status", func() {
2727
fetchDockerSrc = exutil.FixturePath("testdata", "builds", "statusfail-fetchsourcedocker.yaml")
2828
fetchS2ISrc = exutil.FixturePath("testdata", "builds", "statusfail-fetchsources2i.yaml")
2929
badContextDirS2ISrc = exutil.FixturePath("testdata", "builds", "statusfail-badcontextdirs2i.yaml")
30+
oomkilled = exutil.FixturePath("testdata", "builds", "statusfail-oomkilled.yaml")
3031
builderImageFixture = exutil.FixturePath("testdata", "builds", "statusfail-fetchbuilderimage.yaml")
3132
pushToRegistryFixture = exutil.FixturePath("testdata", "builds", "statusfail-pushtoregistry.yaml")
3233
failedAssembleFixture = exutil.FixturePath("testdata", "builds", "statusfail-failedassemble.yaml")
@@ -130,6 +131,32 @@ var _ = g.Describe("[Feature:Builds][Slow] update failure status", func() {
130131
})
131132
})
132133

134+
g.Describe("Build status OutOfMemoryKilled", func() {
135+
g.It("should contain OutOfMemoryKilled failure reason and message", func() {
136+
err := oc.Run("create").Args("-f", oomkilled).Execute()
137+
o.Expect(err).NotTo(o.HaveOccurred())
138+
139+
br, err := exutil.StartBuildAndWait(oc, "statusfail-oomkilled", "--build-loglevel=5")
140+
o.Expect(err).NotTo(o.HaveOccurred())
141+
br.AssertFailure()
142+
br.DumpLogs()
143+
144+
var build *buildapi.Build
145+
wait.PollImmediate(200*time.Millisecond, 30*time.Second, func() (bool, error) {
146+
build, err = oc.BuildClient().Build().Builds(oc.Namespace()).Get(br.Build.Name, metav1.GetOptions{})
147+
if build.Status.Reason != buildapi.StatusReasonOutOfMemoryKilled {
148+
return false, nil
149+
}
150+
return true, nil
151+
})
152+
o.Expect(err).NotTo(o.HaveOccurred())
153+
o.Expect(build.Status.Reason).To(o.Equal(buildapi.StatusReasonOutOfMemoryKilled))
154+
o.Expect(build.Status.Message).To(o.Equal(buildapi.StatusMessageOutOfMemoryKilled))
155+
156+
exutil.CheckForBuildEvent(oc.KubeClient().Core(), br.Build, buildapi.BuildFailedEventReason, buildapi.BuildFailedEventMessage)
157+
})
158+
})
159+
133160
g.Describe("Build status S2I bad context dir failure", func() {
134161
g.It("should contain the S2I bad context dir failure reason and message", func() {
135162
err := oc.Run("create").Args("-f", badContextDirS2ISrc).Execute()

test/extended/testdata/bindata.go

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
kind: BuildConfig
2+
apiVersion: v1
3+
metadata:
4+
name: statusfail-oomkilled
5+
spec:
6+
resources:
7+
limits:
8+
memory: 10Mi
9+
source:
10+
git:
11+
uri: "https://github.com/openshift/ruby-hello-world"
12+
strategy:
13+
type: Source
14+
sourceStrategy:
15+
from:
16+
kind: DockerImage
17+
name: centos/ruby-23-centos7:latest

0 commit comments

Comments
 (0)