diff --git a/src/codeflare_sdk/cli/codeflare_cli.py b/src/codeflare_sdk/cli/codeflare_cli.py index f8a5cbab7..78354695f 100644 --- a/src/codeflare_sdk/cli/codeflare_cli.py +++ b/src/codeflare_sdk/cli/codeflare_cli.py @@ -1,13 +1,21 @@ import click -import sys import os +from codeflare_sdk.cli.cli_utils import load_auth + cmd_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "commands")) class CodeflareContext: - def __init__(self, codeflare_path): - self.codeflare_path = codeflare_path + def __init__(self): + self.codeflare_path = _initialize_codeflare_folder() + + +def _initialize_codeflare_folder(): + codeflare_folder = os.path.expanduser("~/.codeflare") + if not os.path.exists(codeflare_folder): + os.makedirs(codeflare_folder) + return codeflare_folder class CodeflareCLI(click.MultiCommand): @@ -31,18 +39,11 @@ def get_command(self, ctx, name): return -def initialize_cli(ctx): - # Make .codeflare folder - codeflare_folder = os.path.expanduser("~/.codeflare") - if not os.path.exists(codeflare_folder): - os.makedirs(codeflare_folder) - ctx.obj = CodeflareContext(codeflare_folder) - - @click.command(cls=CodeflareCLI) @click.pass_context def cli(ctx): - initialize_cli(ctx) # Ran on every command + load_auth() + ctx.obj = CodeflareContext() # Ran on every command pass diff --git a/src/codeflare_sdk/cli/commands/delete.py b/src/codeflare_sdk/cli/commands/delete.py new file mode 100644 index 000000000..c1ec12451 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/delete.py @@ -0,0 +1,23 @@ +import click + +from codeflare_sdk.cluster.cluster import get_cluster + + +@click.group() +def cli(): + """ + Delete a specified resource from the Kubernetes cluster + """ + pass + + +@cli.command() +@click.argument("name", type=str) +@click.option("--namespace", type=str, default="default") +def raycluster(name, namespace): + """ + Delete a specified RayCluster from the Kubernetes cluster + """ + cluster = get_cluster(name, namespace) + cluster.down() + click.echo(f"Cluster deleted successfully") diff --git a/src/codeflare_sdk/cli/commands/submit.py b/src/codeflare_sdk/cli/commands/submit.py new file mode 100644 index 000000000..8a476d602 --- /dev/null +++ b/src/codeflare_sdk/cli/commands/submit.py @@ -0,0 +1,32 @@ +import click + +from codeflare_sdk.cluster.cluster import Cluster + + +@click.group() +def cli(): + """ + Submit a defined resource to the Kubernetes cluster + """ + pass + + +@cli.command() +@click.argument("name", type=str) +@click.option("--wait", is_flag=True) +def raycluster(name, wait): + """ + Submit a defined RayCluster to the Kubernetes cluster + """ + cluster = Cluster.from_definition_yaml(name + ".yaml") + if not cluster: + click.echo( + "Error submitting RayCluster. Make sure the RayCluster is defined before submitting it" + ) + return + if not wait: + cluster.up() + click.echo("Cluster submitted successfully") + return + cluster.up() + cluster.wait_ready() diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index ff92bfcf0..b0075dfc3 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -307,7 +307,8 @@ def torchx_config( def from_k8_cluster_object(rc): machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") - if "orderedinstance" in rc["metadata"]["labels"] + if "labels" in rc["metadata"] + and "orderedinstance" in rc["metadata"]["labels"] else [] ) local_interactive = ( @@ -347,6 +348,58 @@ def from_k8_cluster_object(rc): ) return Cluster(cluster_config) + def from_definition_yaml(yaml_path): + try: + with open(yaml_path) as yaml_file: + rc = yaml.load(yaml_file, Loader=yaml.FullLoader) + machine_types = ( + rc["metadata"]["labels"]["orderedinstance"].split("_") + if "labels" in rc["metadata"] + and "orderedinstance" in rc["metadata"]["labels"] + else [] + ) + worker_group_specs = rc["spec"]["resources"]["GenericItems"][0][ + "generictemplate" + ]["spec"]["workerGroupSpecs"][0] + local_interactive = ( + "volumeMounts" + in worker_group_specs["template"]["spec"]["containers"][0] + ) + cluster_config = ClusterConfiguration( + name=rc["metadata"]["name"], + namespace=rc["metadata"]["namespace"], + machine_types=machine_types, + min_worker=worker_group_specs["minReplicas"], + max_worker=worker_group_specs["maxReplicas"], + min_cpus=worker_group_specs["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["cpu"], + max_cpus=worker_group_specs["template"]["spec"]["containers"][0][ + "resources" + ]["limits"]["cpu"], + min_memory=int( + worker_group_specs["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["memory"][:-1] + ), + max_memory=int( + worker_group_specs["template"]["spec"]["containers"][0][ + "resources" + ]["limits"]["memory"][:-1] + ), + gpu=worker_group_specs["template"]["spec"]["containers"][0][ + "resources" + ]["requests"]["nvidia.com/gpu"], + instascale=True if machine_types else False, + image=worker_group_specs["template"]["spec"]["containers"][0][ + "image" + ], + local_interactive=local_interactive, + ) + return Cluster(cluster_config) + except IOError: + return None + def local_client_url(self): if self.config.local_interactive == True: ingress_domain = _get_ingress_domain() @@ -406,8 +459,8 @@ def get_current_namespace(): # pragma: no cover def get_cluster(cluster_name: str, namespace: str = "default"): try: - config.load_kube_config() - api_instance = client.CustomObjectsApi() + config_check() + api_instance = client.CustomObjectsApi(api_config_handler()) rcs = api_instance.list_namespaced_custom_object( group="ray.io", version="v1alpha1", diff --git a/tests/cli-test-case.yaml b/tests/cli-test-case.yaml index 0788996a7..c312abfaa 100644 --- a/tests/cli-test-case.yaml +++ b/tests/cli-test-case.yaml @@ -4,7 +4,7 @@ metadata: labels: orderedinstance: cpu.small_gpu.large name: cli-test-cluster - namespace: ns + namespace: default spec: priority: 9 resources: @@ -36,7 +36,7 @@ spec: appwrapper.mcad.ibm.com: cli-test-cluster controller-tools.k8s.io: '1.0' name: cli-test-cluster - namespace: ns + namespace: default spec: autoscalerOptions: idleTimeoutSeconds: 60 @@ -184,7 +184,7 @@ spec: labels: odh-ray-cluster-service: cli-test-cluster-head-svc name: ray-dashboard-cli-test-cluster - namespace: ns + namespace: default spec: port: targetPort: dashboard diff --git a/tests/unit_test.py b/tests/unit_test.py index f8d12580a..783ec928f 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -87,12 +87,13 @@ def test_cli_working(): assert result.exit_code == 0 -def test_cluster_definition_cli(): +def test_cluster_definition_cli(mocker): + mocker.patch.object(client, "ApiClient") runner = CliRunner() define_cluster_command = """ define raycluster --name=cli-test-cluster - --namespace=ns + --namespace=default --min_worker=1 --max_worker=2 --min_cpus=3 @@ -105,7 +106,10 @@ def test_cluster_definition_cli(): --image_pull_secrets='["cli-test-pull-secret"]' """ result = runner.invoke(cli, define_cluster_command) - assert result.output == "Written to: cli-test-cluster.yaml\n" + assert ( + result.output + == "No authentication found, trying default kubeconfig\nWritten to: cli-test-cluster.yaml\n" + ) assert filecmp.cmp( "cli-test-cluster.yaml", f"{parent}/tests/cli-test-case.yaml", shallow=True ) @@ -120,7 +124,10 @@ def test_login_cli(mocker): --token=testtoken """ login_result = runner.invoke(cli, k8s_login_command) - assert login_result.output == "Logged into 'testserver:6443'\n" + assert ( + login_result.output + == "No authentication found, trying default kubeconfig\nLogged into 'testserver:6443'\n" + ) try: auth_file_path = os.path.expanduser("~/.codeflare/auth") with open(auth_file_path, "rb") as file: @@ -170,6 +177,37 @@ def test_load_auth(): assert sdk_auth.api_client is not None +def test_cluster_submission_cli(mocker): + mocker.patch.object(client, "ApiClient") + runner = CliRunner() + submit_cluster_command = """ + submit raycluster + cli-test-cluster + """ + result = runner.invoke(cli, submit_cluster_command) + + assert result.exit_code == 0 + assert "Cluster submitted successfully" in result.output + + +def test_cluster_deletion_cli(mocker): + mocker.patch.object(client, "ApiClient") + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch( + "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", + side_effect=get_ray_obj, + ) + runner = CliRunner() + delete_cluster_command = """ + delete raycluster + quicktest + """ + result = runner.invoke(cli, delete_cluster_command) + + assert result.exit_code == 0 + assert "Cluster deleted successfully" in result.output + + # For mocking openshift client results fake_res = openshift.Result("fake")