Skip to content

fix imagePullBackOff doc #7679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/additional-configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -678,9 +678,10 @@ with the tekton controller and the affinity assistant is enabled.
Tekton pipelines has adopted a fail fast strategy with a taskRun failing with `TaskRunImagePullFailed` in case of an
`imagePullBackOff`. This can be limited in some cases, and it generally depends on the infrastructure. To allow the
cluster operators to decide whether to wait in case of an `imagePullBackOff`, a setting is available to configure
the wait time in minutes such that the controller will wait for the specified duration before declaring a failure.
the wait time such that the controller will wait for the specified duration before declaring a failure.
For example, with the following `config-defaults`, the controller does not mark the taskRun as failure for 5 minutes since
the pod is scheduled in case the image pull fails with `imagePullBackOff`.
the pod is scheduled in case the image pull fails with `imagePullBackOff`. The `default-imagepullbackoff-timeout` is
of type `time.Duration` and can be set to a duration such as "1m", "5m", "10s", "1h", etc.
See issue https://github.com/tektoncd/pipeline/issues/5987 for more details.

```yaml
Expand All @@ -690,7 +691,7 @@ metadata:
name: config-defaults
namespace: tekton-pipelines
data:
default-imagepullbackoff-timeout: "5"
default-imagepullbackoff-timeout: "5m"
```

## Next steps
Expand Down
8 changes: 4 additions & 4 deletions pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func (c *Reconciler) checkPodFailed(ctx context.Context, tr *v1.TaskRun) (bool,
if imagePullBackOffTimeOut.Seconds() != 0 {
p, err := c.KubeClientSet.CoreV1().Pods(tr.Namespace).Get(ctx, tr.Status.PodName, metav1.GetOptions{})
if err != nil {
message := fmt.Sprintf(`The step %q in TaskRun %q failed to pull the image %q and the pod with error: "%s."`, step.Name, tr.Name, step.ImageID, err)
message := fmt.Sprintf(`the step %q in TaskRun %q failed to pull the image %q and the pod with error: "%s."`, step.Name, tr.Name, step.ImageID, err)
return true, v1.TaskRunReasonImagePullFailed, message
}
for _, condition := range p.Status.Conditions {
Expand All @@ -249,7 +249,7 @@ func (c *Reconciler) checkPodFailed(ctx context.Context, tr *v1.TaskRun) (bool,
}
}
image := step.ImageID
message := fmt.Sprintf(`The step %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, step.Name, tr.Name, image, step.Waiting.Message)
message := fmt.Sprintf(`the step %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, step.Name, tr.Name, image, step.Waiting.Message)
return true, v1.TaskRunReasonImagePullFailed, message
}
}
Expand All @@ -263,7 +263,7 @@ func (c *Reconciler) checkPodFailed(ctx context.Context, tr *v1.TaskRun) (bool,
if imagePullBackOffTimeOut.Seconds() != 0 {
p, err := c.KubeClientSet.CoreV1().Pods(tr.Namespace).Get(ctx, tr.Status.PodName, metav1.GetOptions{})
if err != nil {
message := fmt.Sprintf(`The sidecar %q in TaskRun %q failed to pull the image %q and the pod with error: "%s."`, sidecar.Name, tr.Name, sidecar.ImageID, err)
message := fmt.Sprintf(`the sidecar %q in TaskRun %q failed to pull the image %q and the pod with error: "%s."`, sidecar.Name, tr.Name, sidecar.ImageID, err)
return true, v1.TaskRunReasonImagePullFailed, message
}
for _, condition := range p.Status.Conditions {
Expand All @@ -278,7 +278,7 @@ func (c *Reconciler) checkPodFailed(ctx context.Context, tr *v1.TaskRun) (bool,
}
}
image := sidecar.ImageID
message := fmt.Sprintf(`The sidecar %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, sidecar.Name, tr.Name, image, sidecar.Waiting.Message)
message := fmt.Sprintf(`the sidecar %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, sidecar.Name, tr.Name, image, sidecar.Waiting.Message)
return true, v1.TaskRunReasonImagePullFailed, message
}
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ status:
- reason: ToBeRetried
status: Unknown
type: Succeeded
message: "The step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\""
message: "the step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\""
steps:
- container: step-unamed-0
name: unamed-0
Expand All @@ -1592,7 +1592,7 @@ status:
- reason: "TaskRunImagePullFailed"
status: "False"
type: Succeeded
message: "The step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\""
message: "the step \"unamed-0\" in TaskRun \"test-taskrun-run-retry-pod-failure\" failed to pull the image \"\". The pod errored with the message: \".\""
startTime: "2021-12-31T23:59:59Z"
completionTime: "2022-01-01T00:00:00Z"
podName: test-taskrun-run-retry-pod-failure-pod
Expand Down Expand Up @@ -2785,12 +2785,12 @@ status:
Type: apis.ConditionSucceeded,
Status: corev1.ConditionFalse,
Reason: "TaskRunImagePullFailed",
Message: fmt.Sprintf(`The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s."`, tc.failure, stepNumber, tc.message),
Message: fmt.Sprintf(`the %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s."`, tc.failure, stepNumber, tc.message),
}

wantEvents := []string{
"Normal Started ",
fmt.Sprintf(`Warning Failed The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s.`, tc.failure, stepNumber, tc.message),
fmt.Sprintf(`Warning Failed the %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s.`, tc.failure, stepNumber, tc.message),
}

d := test.Data{
Expand Down Expand Up @@ -2851,8 +2851,8 @@ status:
}
// the error message includes the error if the pod is not found
if tc.podNotFound {
expectedStatus.Message = fmt.Sprintf(`The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever" and the pod with error: "%s."`, tc.failure, stepNumber, tc.message)
wantEvents[1] = fmt.Sprintf(`Warning Failed The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever" and the pod with error: "%s.`, tc.failure, stepNumber, tc.message)
expectedStatus.Message = fmt.Sprintf(`the %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever" and the pod with error: "%s."`, tc.failure, stepNumber, tc.message)
wantEvents[1] = fmt.Sprintf(`Warning Failed the %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever" and the pod with error: "%s.`, tc.failure, stepNumber, tc.message)
}
condition := newTr.Status.GetCondition(apis.ConditionSucceeded)
if d := cmp.Diff(expectedStatus, condition, ignoreLastTransitionTime); d != "" {
Expand Down