Skip to content

Commit cb83d14

Browse files
tenzen-ymszadkow
andauthored
Release-1.8: Cherry-pick of #2243 (#2244)
* No cleaning up a job if the job is suspended. Signed-off-by: Michal Szadkowski <[email protected]> Signed-off-by: Yuki Iwai <[email protected]> Co-authored-by: Michał Szadkowski <[email protected]>
1 parent a822688 commit cb83d14

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

pkg/controller.v1/common/job.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ func (jc *JobController) CleanupJob(runPolicy *apiv1.RunPolicy, jobStatus apiv1.
421421
currentTime := time.Now()
422422
metaObject, _ := job.(metav1.Object)
423423
ttl := runPolicy.TTLSecondsAfterFinished
424-
if ttl == nil {
424+
if ttl == nil || trainutil.IsJobSuspended(runPolicy) {
425425
return nil
426426
}
427427
duration := time.Second * time.Duration(*ttl)

pkg/controller.v1/tensorflow/job_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,30 @@ var _ = Describe("Test for controller.v1/common", func() {
663663
wantTFJobIsRemoved: false,
664664
wantErr: false,
665665
}),
666+
Entry("No error with completionTime is nil if suspended", &cleanUpCases{
667+
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, nil),
668+
runPolicy: &kubeflowv1.RunPolicy{
669+
TTLSecondsAfterFinished: nil,
670+
Suspend: ptr.To(true),
671+
},
672+
jobStatus: kubeflowv1.JobStatus{
673+
CompletionTime: nil,
674+
},
675+
wantTFJobIsRemoved: false,
676+
wantErr: false,
677+
}),
678+
Entry("No error with TTL is set and completionTime is nil, if suspended", &cleanUpCases{
679+
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)),
680+
runPolicy: &kubeflowv1.RunPolicy{
681+
TTLSecondsAfterFinished: ptr.To[int32](10),
682+
Suspend: ptr.To(true),
683+
},
684+
jobStatus: kubeflowv1.JobStatus{
685+
CompletionTime: nil,
686+
},
687+
wantTFJobIsRemoved: false,
688+
wantErr: false,
689+
}),
666690
Entry("Error is occurred since completionTime is nil", &cleanUpCases{
667691
tfJob: tftestutil.NewTFJobWithCleanupJobDelay(1, 2, 0, ptr.To[int32](10)),
668692
runPolicy: &kubeflowv1.RunPolicy{

0 commit comments

Comments
 (0)