@@ -17,39 +17,27 @@ limitations under the License.
17
17
package odh
18
18
19
19
import (
20
+ "fmt"
21
+ "os"
22
+ "strings"
20
23
"testing"
21
24
22
25
. "github.com/onsi/gomega"
23
26
. "github.com/project-codeflare/codeflare-common/support"
24
27
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
25
28
)
26
29
27
- func TestRayFinetuneDemo (t * testing.T ) {
28
- mnistRayLlmFinetune (t , 1 )
30
+ func TestRayFinetuneLlmDeepspeedDemo (t * testing.T ) {
31
+ rayFinetuneLlmDeepspeed (t , 1 )
29
32
}
30
33
31
- func mnistRayLlmFinetune (t * testing.T , numGpus int ) {
34
+ func rayFinetuneLlmDeepspeed (t * testing.T , numGpus int ) {
32
35
test := With (t )
33
36
34
37
// Create a namespace
35
38
namespace := test .NewTestNamespace ()
36
-
37
- // Test configuration
38
- jupyterNotebookConfigMapFileName := "ray_finetune_llm_deepspeed.ipynb"
39
-
40
- // Test configuration
41
- configMap := map [string ][]byte {
42
- // MNIST Ray Notebook
43
- jupyterNotebookConfigMapFileName : ReadFile (test , "resources/ray_finetune_demo/ray_finetune_llm_deepspeed.ipynb" ),
44
- "ray_finetune_llm_deepspeed.py" : ReadFile (test , "resources/ray_finetune_demo/ray_finetune_llm_deepspeed.py" ),
45
- "ray_finetune_requirements.txt" : ReadRayFinetuneRequirementsTxt (test ),
46
- "create_dataset.py" : ReadFile (test , "resources/ray_finetune_demo/create_dataset.py" ),
47
- "lora.json" : ReadFile (test , "resources/ray_finetune_demo/lora.json" ),
48
- "zero_3_llama_2_7b.json" : ReadFile (test , "resources/ray_finetune_demo/zero_3_llama_2_7b.json" ),
49
- "utils.py" : ReadFile (test , "resources/ray_finetune_demo/utils.py" ),
50
- }
51
-
52
- config := CreateConfigMap (test , namespace .Name , configMap )
39
+ var workingDirectory , err = os .Getwd ()
40
+ test .Expect (err ).ToNot (HaveOccurred ())
53
41
54
42
// Define the regular(non-admin) user
55
43
userName := GetNotebookUserName (test )
@@ -58,6 +46,53 @@ func mnistRayLlmFinetune(t *testing.T, numGpus int) {
58
46
// Create role binding with Namespace specific admin cluster role
59
47
CreateUserRoleBindingWithClusterRole (test , userName , namespace .Name , "admin" )
60
48
49
+ // list changes required in llm-deepspeed-finetune-demo.ipynb file and update those
50
+ requiredChangesInNotebook := map [string ]string {
51
+ "import os" : "import os,time,sys" ,
52
+ "import sys" : "!cp /opt/app-root/notebooks/* ./" ,
53
+ "from codeflare_sdk.cluster.auth import TokenAuthentication" : "from codeflare_sdk.cluster.auth import TokenAuthentication\\ n\" ,\n \t \" from codeflare_sdk.job import RayJobClient" ,
54
+ "token = ''" : fmt .Sprintf ("token = '%s'" , userToken ),
55
+ "server = ''" : fmt .Sprintf ("server = '%s'" , GetOpenShiftApiUrl (test )),
56
+ "namespace='ray-finetune-llm-deepspeed'" : fmt .Sprintf ("namespace='%s'" , namespace .Name ),
57
+ "head_cpus=16" : "head_cpus=2" ,
58
+ "head_gpus=1" : "head_gpus=0" ,
59
+ "num_workers=7" : "num_workers=1" ,
60
+ "min_cpus=16" : "min_cpus=4" ,
61
+ "max_cpus=16" : "max_cpus=4" ,
62
+ "min_memory=128" : "min_memory=48" ,
63
+ "max_memory=256" : "max_memory=48" ,
64
+ "head_memory=128" : "head_memory=48" ,
65
+ "num_gpus=1" : fmt .Sprintf ("worker_extended_resource_requests={'nvidia.com/gpu': %d},\\ n\" ,\n \t \" write_to_file=True,\\ n\" ,\n \t \" verify_tls=False" , numGpus ),
66
+ "image='quay.io/rhoai/ray:2.23.0-py39-cu121'" : fmt .Sprintf ("image='%s'" , GetRayImage ()),
67
+ "client = cluster.job_client" : "ray_dashboard = cluster.cluster_dashboard_uri()\\ n\" ,\n \t \" header = {\\ \" Authorization\\ \" : \\ \" Bearer " + userToken + "\\ \" }\\ n\" ,\n \t \" client = RayJobClient(address=ray_dashboard, headers=header, verify=False)\\ n" ,
68
+ "--num-devices=8" : fmt .Sprintf ("--num-devices=%d" , numGpus ),
69
+ "--num-epochs=3" : fmt .Sprintf ("--num-epochs=%d" , 1 ),
70
+ "--ds-config=./deepspeed_configs/zero_3_llama_2_7b.json" : "--ds-config=./zero_3_llama_2_7b.json \\ \" \\ n\" ,\n \t \" \\ \" --lora-config=./lora.json \\ \" \\ n\" ,\n \t \" \\ \" --as-test" ,
71
+ "'pip': 'requirements.txt'" : "'pip': '/opt/app-root/src/requirements.txt'" ,
72
+ "'working_dir': './'" : "'working_dir': '/opt/app-root/src'" ,
73
+ "client.stop_job(submission_id)" : "finished = False\\ n\" ,\n \t \" while not finished:\\ n\" ,\n \t \" time.sleep(1)\\ n\" ,\n \t \" status = client.get_job_status(submission_id)\\ n\" ,\n \t \" finished = (status == \\ \" SUCCEEDED\\ \" )\\ n\" ,\n \t \" if finished:\\ n\" ,\n \t \" print(\\ \" Job completed Successfully !\\ \" )\\ n\" ,\n \t \" else:\\ n\" ,\n \t \" print(\\ \" Job failed !\\ \" )\\ n\" ,\n \t \" time.sleep(10)\\ n" ,
74
+ }
75
+
76
+ updatedNotebookContent := string (ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.ipynb" ))
77
+ for oldValue , newValue := range requiredChangesInNotebook {
78
+ updatedNotebookContent = strings .Replace (updatedNotebookContent , oldValue , newValue , - 1 )
79
+ }
80
+ updatedNotebook := []byte (updatedNotebookContent )
81
+
82
+ // Test configuration
83
+ jupyterNotebookConfigMapFileName := "ray_finetune_llm_deepspeed.ipynb"
84
+ configMap := map [string ][]byte {
85
+ jupyterNotebookConfigMapFileName : updatedNotebook ,
86
+ "ray_finetune_llm_deepspeed.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/ray_finetune_llm_deepspeed.py" ),
87
+ "requirements.txt" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/requirements.txt" ),
88
+ "create_dataset.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/create_dataset.py" ),
89
+ "lora.json" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/lora_configs/lora.json" ),
90
+ "zero_3_llama_2_7b.json" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/deepspeed_configs/zero_3_llama_2_7b.json" ),
91
+ "utils.py" : ReadFileExt (test , workingDirectory + "/../../examples/ray-finetune-llm-deepspeed/utils.py" ),
92
+ }
93
+
94
+ config := CreateConfigMap (test , namespace .Name , configMap )
95
+
61
96
// Create Notebook CR
62
97
createNotebook (test , namespace , userToken , config .Name , jupyterNotebookConfigMapFileName , numGpus )
63
98
@@ -77,26 +112,6 @@ func mnistRayLlmFinetune(t *testing.T, numGpus int) {
77
112
)
78
113
79
114
// Make sure the RayCluster finishes and is deleted
80
- test .Eventually (RayClusters (test , namespace .Name ), TestTimeoutGpuProvisioning ).
115
+ test .Eventually (RayClusters (test , namespace .Name ), TestTimeoutMedium ).
81
116
Should (HaveLen (0 ))
82
117
}
83
-
84
- func ReadRayFinetuneRequirementsTxt (test Test ) []byte {
85
- // Read the requirements.txt from resources and perform replacements for custom values using go template
86
- props := struct {
87
- PipIndexUrl string
88
- PipTrustedHost string
89
- }{
90
- PipIndexUrl : "--index " + string (GetPipIndexURL ()),
91
- }
92
-
93
- // Provide trusted host only if defined
94
- if len (GetPipTrustedHost ()) > 0 {
95
- props .PipTrustedHost = "--trusted-host " + GetPipTrustedHost ()
96
- }
97
-
98
- template , err := files .ReadFile ("resources/ray_finetune_demo/ray_finetune_requirements.txt" )
99
- test .Expect (err ).NotTo (HaveOccurred ())
100
-
101
- return ParseTemplate (test , template , props )
102
- }
0 commit comments