Skip to content

Commit fba853b

Browse files
author
Akshay Chitneni
committed
Adding cel validations on trainjob crd
Signed-off-by: Akshay Chitneni <[email protected]>
1 parent 126110f commit fba853b

File tree

4 files changed

+223
-8
lines changed

4 files changed

+223
-8
lines changed

manifests/v2/base/crds/kubeflow.org_trainjobs.yaml

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,14 @@ spec:
197197
They will be merged with the TrainingRuntime values.
198198
type: object
199199
managedBy:
200-
description: |-
201-
ManagedBy is used to indicate the controller or entity that manages a TrainJob.
202-
The value must be either an empty, `kubeflow.org/trainjob-controller` or
203-
`kueue.x-k8s.io/multikueue`. The built-in TrainJob controller reconciles TrainJob which
204-
don't have this field at all or the field value is the reserved string
205-
`kubeflow.org/trainjob-controller`, but delegates reconciling TrainJobs
206-
with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.
207-
Defaults to `kubeflow.org/trainjob-controller`
200+
default: kubeflow.org/trainjob-controller
208201
type: string
202+
x-kubernetes-validations:
203+
- message: ManagedBy must be kubeflow.org/trainjob-controller or kueue.x-k8s.io/multikueue
204+
if set
205+
rule: self in ['kubeflow.org/trainjob-controller', 'kueue.x-k8s.io/multikueue']
206+
- message: ManagedBy value is immutable
207+
rule: self == oldSelf
209208
modelConfig:
210209
description: Configuration of the pre-trained and trained model.
211210
properties:
@@ -2733,6 +2732,7 @@ spec:
27332732
type: object
27342733
type: array
27352734
suspend:
2735+
default: false
27362736
description: |-
27372737
Whether the controller should suspend the running TrainJob.
27382738
Defaults to false.
@@ -2941,16 +2941,22 @@ spec:
29412941
description: Reference to the training runtime.
29422942
properties:
29432943
apiGroup:
2944+
default: kubeflow.org
29442945
description: |-
29452946
APIGroup of the runtime being referenced.
29462947
Defaults to `kubeflow.org`.
29472948
type: string
29482949
kind:
2950+
default: ClusterTrainingRuntime
29492951
description: |-
29502952
Kind of the runtime being referenced.
29512953
It must be one of TrainingRuntime or ClusterTrainingRuntime.
29522954
Defaults to ClusterTrainingRuntime.
29532955
type: string
2956+
x-kubernetes-validations:
2957+
- message: Kind must be ClusterTrainingRuntime or TrainingRuntime
2958+
if set
2959+
rule: self in ['ClusterTrainingRuntime', 'TrainingRuntime']
29542960
name:
29552961
description: |-
29562962
Name of the runtime being referenced.
@@ -2963,6 +2969,9 @@ spec:
29632969
required:
29642970
- trainingRuntimeRef
29652971
type: object
2972+
x-kubernetes-validations:
2973+
- message: ManagedBy is required once set
2974+
rule: '!has(oldSelf.managedBy) || has(self.managedBy)'
29662975
status:
29672976
description: Current status of TrainJob.
29682977
properties:

pkg/apis/kubeflow.org/v2alpha1/trainjob_types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type TrainJobList struct {
5656
}
5757

5858
// TrainJobSpec represents specification of the desired TrainJob.
59+
// +kubebuilder:validation:XValidation:rule="!has(oldSelf.managedBy) || has(self.managedBy)", message="ManagedBy is required once set"
5960
type TrainJobSpec struct {
6061
// Reference to the training runtime.
6162
TrainingRuntimeRef TrainingRuntimeRef `json:"trainingRuntimeRef"`
@@ -82,6 +83,7 @@ type TrainJobSpec struct {
8283

8384
// Whether the controller should suspend the running TrainJob.
8485
// Defaults to false.
86+
// +kubebuilder:default=false
8587
Suspend *bool `json:"suspend,omitempty"`
8688

8789
// ManagedBy is used to indicate the controller or entity that manages a TrainJob.
@@ -91,6 +93,10 @@ type TrainJobSpec struct {
9193
// `kubeflow.org/trainjob-controller`, but delegates reconciling TrainJobs
9294
// with a 'kueue.x-k8s.io/multikueue' to the Kueue. The field is immutable.
9395
// Defaults to `kubeflow.org/trainjob-controller`
96+
97+
// +kubebuilder:default="kubeflow.org/trainjob-controller"
98+
// +kubebuilder:validation:XValidation:rule="self in ['kubeflow.org/trainjob-controller', 'kueue.x-k8s.io/multikueue']", message="ManagedBy must be kubeflow.org/trainjob-controller or kueue.x-k8s.io/multikueue if set"
99+
// +kubebuilder:validation:XValidation:rule="self == oldSelf", message="ManagedBy value is immutable"
94100
ManagedBy *string `json:"managedBy,omitempty"`
95101
}
96102

@@ -103,11 +109,14 @@ type TrainingRuntimeRef struct {
103109

104110
// APIGroup of the runtime being referenced.
105111
// Defaults to `kubeflow.org`.
112+
// +kubebuilder:default="kubeflow.org"
106113
APIGroup *string `json:"apiGroup,omitempty"`
107114

108115
// Kind of the runtime being referenced.
109116
// It must be one of TrainingRuntime or ClusterTrainingRuntime.
110117
// Defaults to ClusterTrainingRuntime.
118+
// +kubebuilder:default="ClusterTrainingRuntime"
119+
// +kubebuilder:validation:XValidation:rule="self in ['ClusterTrainingRuntime', 'TrainingRuntime']", message="Kind must be ClusterTrainingRuntime or TrainingRuntime if set"
111120
Kind *string `json:"kind,omitempty"`
112121
}
113122

test/integration/cel.v2/suite_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
Copyright 2024 The Kubeflow Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package cel_v2
18+
19+
import (
20+
"context"
21+
"testing"
22+
23+
"github.com/onsi/ginkgo/v2"
24+
"github.com/onsi/gomega"
25+
"k8s.io/client-go/rest"
26+
"sigs.k8s.io/controller-runtime/pkg/client"
27+
28+
"github.com/kubeflow/training-operator/test/integration/framework"
29+
)
30+
31+
var (
32+
cfg *rest.Config
33+
k8sClient client.Client
34+
ctx context.Context
35+
fwk *framework.Framework
36+
)
37+
38+
func TestAPIs(t *testing.T) {
39+
gomega.RegisterFailHandler(ginkgo.Fail)
40+
41+
ginkgo.RunSpecs(t, "v2 CRD Suite")
42+
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
package cel_v2
2+
3+
import (
4+
kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1"
5+
"github.com/onsi/ginkgo/v2"
6+
"github.com/onsi/gomega"
7+
corev1 "k8s.io/api/core/v1"
8+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
9+
"sigs.k8s.io/controller-runtime/pkg/client"
10+
11+
"github.com/kubeflow/training-operator/test/integration/framework"
12+
)
13+
14+
var _ = ginkgo.Describe("TrainJob CRD", ginkgo.Ordered, func() {
15+
var ns *corev1.Namespace
16+
17+
ginkgo.BeforeAll(func() {
18+
fwk = &framework.Framework{}
19+
cfg = fwk.Init()
20+
ctx, k8sClient = fwk.RunManager(cfg)
21+
})
22+
ginkgo.AfterAll(func() {
23+
fwk.Teardown()
24+
})
25+
26+
ginkgo.BeforeEach(func() {
27+
ns = &corev1.Namespace{
28+
TypeMeta: metav1.TypeMeta{
29+
APIVersion: corev1.SchemeGroupVersion.String(),
30+
Kind: "Namespace",
31+
},
32+
ObjectMeta: metav1.ObjectMeta{
33+
GenerateName: "trainjob-validation-",
34+
},
35+
}
36+
gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
37+
})
38+
39+
ginkgo.When("TrainJob CR Validation", func() {
40+
ginkgo.AfterEach(func() {
41+
gomega.Expect(k8sClient.DeleteAllOf(ctx, &kubeflowv2.TrainJob{}, client.InNamespace(ns.Name))).Should(gomega.Succeed())
42+
})
43+
44+
ginkgo.It("Should succeed in creating TrainJob", func() {
45+
46+
apiGroup := "kubeflow.org"
47+
kind := "TrainingRuntime"
48+
managedBy := "kubeflow.org/trainjob-controller"
49+
50+
trainingRuntimeRef := kubeflowv2.TrainingRuntimeRef{
51+
Name: "InvalidRuntimeRef",
52+
APIGroup: &apiGroup,
53+
Kind: &kind,
54+
}
55+
jobSpec := kubeflowv2.TrainJobSpec{
56+
TrainingRuntimeRef: trainingRuntimeRef,
57+
ManagedBy: &managedBy,
58+
}
59+
trainJob := &kubeflowv2.TrainJob{
60+
TypeMeta: metav1.TypeMeta{
61+
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
62+
Kind: "TrainJob",
63+
},
64+
ObjectMeta: metav1.ObjectMeta{
65+
Name: "alpha",
66+
Namespace: ns.Name,
67+
},
68+
Spec: jobSpec,
69+
}
70+
71+
err := k8sClient.Create(ctx, trainJob)
72+
gomega.Expect(err).Should(gomega.Succeed())
73+
})
74+
75+
ginkgo.It("Should fail in creating TrainJob with invalid spec.trainingRuntimeRef", func() {
76+
77+
apiGroup := "kubeflow.org"
78+
kind := "InvalidRuntime"
79+
80+
trainingRuntimeRef := kubeflowv2.TrainingRuntimeRef{
81+
Name: "InvalidRuntimeRef",
82+
APIGroup: &apiGroup,
83+
Kind: &kind,
84+
}
85+
jobSpec := kubeflowv2.TrainJobSpec{
86+
TrainingRuntimeRef: trainingRuntimeRef,
87+
}
88+
trainJob := &kubeflowv2.TrainJob{
89+
TypeMeta: metav1.TypeMeta{
90+
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
91+
Kind: "TrainJob",
92+
},
93+
ObjectMeta: metav1.ObjectMeta{
94+
Name: "invalid-trainjob",
95+
Namespace: ns.Name,
96+
},
97+
Spec: jobSpec,
98+
}
99+
gomega.Expect(k8sClient.Create(ctx, trainJob)).To(gomega.HaveOccurred())
100+
})
101+
102+
ginkgo.It("Should fail in creating TrainJob with invalid spec.managedBy", func() {
103+
managedBy := "invalidManagedBy"
104+
jobSpec := kubeflowv2.TrainJobSpec{
105+
ManagedBy: &managedBy,
106+
}
107+
trainJob := &kubeflowv2.TrainJob{
108+
TypeMeta: metav1.TypeMeta{
109+
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
110+
Kind: "TrainJob",
111+
},
112+
ObjectMeta: metav1.ObjectMeta{
113+
Name: "invalid-trainjob",
114+
Namespace: ns.Name,
115+
},
116+
Spec: jobSpec,
117+
}
118+
gomega.Expect(k8sClient.Create(ctx, trainJob)).To(gomega.HaveOccurred())
119+
})
120+
121+
ginkgo.It("Should fail in updating spec.managedBy", func() {
122+
123+
apiGroup := "kubeflow.org"
124+
kind := "TrainingRuntime"
125+
managedBy := "kubeflow.org/trainjob-controller"
126+
127+
trainingRuntimeRef := kubeflowv2.TrainingRuntimeRef{
128+
Name: "InvalidRuntimeRef",
129+
APIGroup: &apiGroup,
130+
Kind: &kind,
131+
}
132+
jobSpec := kubeflowv2.TrainJobSpec{
133+
TrainingRuntimeRef: trainingRuntimeRef,
134+
ManagedBy: &managedBy,
135+
}
136+
trainJob := &kubeflowv2.TrainJob{
137+
TypeMeta: metav1.TypeMeta{
138+
APIVersion: kubeflowv2.SchemeGroupVersion.String(),
139+
Kind: "TrainJob",
140+
},
141+
ObjectMeta: metav1.ObjectMeta{
142+
Name: "alpha",
143+
Namespace: ns.Name,
144+
},
145+
Spec: jobSpec,
146+
}
147+
148+
gomega.Expect(k8sClient.Create(ctx, trainJob)).Should(gomega.Succeed())
149+
updatedManagedBy := "kueue.x-k8s.io/multikueue"
150+
jobSpec.ManagedBy = &updatedManagedBy
151+
trainJob.Spec = jobSpec
152+
gomega.Expect(k8sClient.Update(ctx, trainJob)).To(gomega.HaveOccurred())
153+
})
154+
})
155+
})

0 commit comments

Comments
 (0)