-
Notifications
You must be signed in to change notification settings - Fork 786
support monitor podgroup #1688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
support monitor podgroup #1688
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package util | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this file used? |
||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
) | ||
|
||
// GvkListFlag is the custom flag to parse GroupVersionKind list for trial resources. | ||
type GvkListFlag []schema.GroupVersionKind | ||
|
||
// Set is the method to convert gvk to string value | ||
func (flag *GvkListFlag) String() string { | ||
gvkStrings := []string{} | ||
for _, x := range []schema.GroupVersionKind(*flag) { | ||
gvkStrings = append(gvkStrings, x.String()) | ||
} | ||
return strings.Join(gvkStrings, ",") | ||
} | ||
|
||
// Set is the method to set gvk from string flag value | ||
func (flag *GvkListFlag) Set(value string) error { | ||
gvk, _ := schema.ParseKindArg(value) | ||
if gvk == nil { | ||
return fmt.Errorf("Invalid GroupVersionKind: %v", value) | ||
} | ||
*flag = append(*flag, *gvk) | ||
return nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ import ( | |
"fmt" | ||
"time" | ||
|
||
"github.com/kubeflow/training-operator/pkg/config" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
|
||
"github.com/go-logr/logr" | ||
commonv1 "github.com/kubeflow/common/pkg/apis/common/v1" | ||
"github.com/kubeflow/common/pkg/controller.v1/common" | ||
|
@@ -215,6 +218,51 @@ func (r *PyTorchJobReconciler) SetupWithManager(mgr ctrl.Manager) error { | |
return err | ||
} | ||
|
||
// inject watching for job related service | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this duplicate? |
||
if err = c.Watch(&source.Kind{Type: &corev1.Service{}}, &handler.EnqueueRequestForOwner{ | ||
IsController: true, | ||
OwnerType: &kubeflowv1.PyTorchJob{}, | ||
}, predicate.Funcs{ | ||
CreateFunc: util.OnDependentCreateFunc(r.Expectations), | ||
UpdateFunc: util.OnDependentUpdateFunc(&r.JobController), | ||
DeleteFunc: util.OnDependentDeleteFunc(r.Expectations), | ||
}); err != nil { | ||
return err | ||
} | ||
|
||
// inject watching for job related objects,such as PodGroup | ||
if config.Config.WatchedResources != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you rebase your code and add it for all frameworks? Also, the volcano static watches in the current code should be removed for this dynamic watcher. #1666 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I wrote this feature, I did not realize that it was already supported and merged. @_@ |
||
gvkList := config.Config.WatchedResources | ||
|
||
// Watch for changes in custom resources | ||
for _, gvk := range gvkList { | ||
// Check if CRD is installed on the cluster. | ||
_, err := mgr.GetRESTMapper().RESTMapping(gvk.GroupKind(), gvk.Version) | ||
if err != nil { | ||
if meta.IsNoMatchError(err) { | ||
logrus.Info("Job watch error. CRD might be missing. Please install CRD and restart pytorchjob-controller", | ||
"CRD Group ", gvk.Group, ",CRD Version ", gvk.Version, ",CRD Kind ", gvk.Kind) | ||
continue | ||
} | ||
return err | ||
} | ||
// Watch for the CRD changes. | ||
unstructuredJob := &unstructured.Unstructured{} | ||
unstructuredJob.SetGroupVersionKind(gvk) | ||
err = c.Watch( | ||
&source.Kind{Type: unstructuredJob}, | ||
&handler.EnqueueRequestForOwner{ | ||
IsController: true, | ||
OwnerType: &kubeflowv1.PyTorchJob{}, | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
logrus.Info("Job watch added successfully", | ||
"CRD Group ", gvk.Group, ",CRD Version ", gvk.Version, ",CRD Kind ", gvk.Kind) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the example, you can provide pod group resource for easy reference