Skip to content

Commit 1ae623a

Browse files
tkatilatenzen-y
andcommitted
Add default Intel MPI env variables to MPIJob
Co-authored-by: Yuki Iwai <[email protected]> Signed-off-by: Tuomas Katila <[email protected]>
1 parent 485b1fb commit 1ae623a

File tree

3 files changed

+128
-0
lines changed

3 files changed

+128
-0
lines changed

pkg/controller.v1/mpi/mpijob.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ const (
4545
initContainerCpu = "100m"
4646
initContainerEphStorage = "5Gi"
4747
initContainerMem = "512Mi"
48+
iMPIDefaultBootstrap = "rsh"
4849
)
4950

5051
const (
@@ -218,6 +219,26 @@ func isGPULauncher(mpiJob *kubeflowv1.MPIJob) bool {
218219
return false
219220
}
220221

222+
// hasIntelMPIBootstrapValues returns the existence of I_MPI_HYDRA_BOOTSTRAP
223+
// and I_MPI_HYDRA_BOOTSTRAP_EXEC values.
224+
// There are also _EXEC_EXTRA_ARGS and _AUTOFORK under the I_MPI_HYDRA_BOOTSTRAP
225+
// prefix but those are not checked on purpose.
226+
func hasIntelMPIBootstrapValues(envs []corev1.EnvVar) (bootstrap, exec bool) {
227+
for _, env := range envs {
228+
if env.Name == "I_MPI_HYDRA_BOOTSTRAP" {
229+
bootstrap = true
230+
} else if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" {
231+
exec = true
232+
}
233+
234+
if bootstrap && exec {
235+
break
236+
}
237+
}
238+
239+
return bootstrap, exec
240+
}
241+
221242
func defaultReplicaLabels(genericLabels map[string]string, roleLabelVal string) map[string]string {
222243
replicaLabels := map[string]string{}
223244
for k, v := range genericLabels {

pkg/controller.v1/mpi/mpijob_controller.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,26 @@ func (jc *MPIJobReconciler) newLauncher(mpiJob *kubeflowv1.MPIJob, kubectlDelive
11521152
})
11531153
}
11541154

1155+
// Add default Intel MPI bootstrap variables if not provided by the user.
1156+
if bootstrap, exec := hasIntelMPIBootstrapValues(container.Env); !bootstrap || !exec {
1157+
if !bootstrap {
1158+
container.Env = append(container.Env,
1159+
corev1.EnvVar{
1160+
Name: "I_MPI_HYDRA_BOOTSTRAP",
1161+
Value: iMPIDefaultBootstrap,
1162+
},
1163+
)
1164+
}
1165+
if !exec {
1166+
container.Env = append(container.Env,
1167+
corev1.EnvVar{
1168+
Name: "I_MPI_HYDRA_BOOTSTRAP_EXEC",
1169+
Value: fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName),
1170+
},
1171+
)
1172+
}
1173+
}
1174+
11551175
container.VolumeMounts = append(container.VolumeMounts,
11561176
corev1.VolumeMount{
11571177
Name: kubectlVolumeName,

pkg/controller.v1/mpi/mpijob_controller_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,93 @@ var _ = Describe("MPIJob controller", func() {
709709
})
710710
})
711711

712+
Context("Test launcher's Intel MPI handling", func() {
713+
It("Should create a launcher job with Intel MPI env variables", func() {
714+
getIMPIValues := func(envs []corev1.EnvVar) (bootstrap, exec string) {
715+
for _, env := range envs {
716+
if env.Name == "I_MPI_HYDRA_BOOTSTRAP" {
717+
bootstrap = env.Value
718+
}
719+
if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" {
720+
exec = env.Value
721+
}
722+
}
723+
724+
return bootstrap, exec
725+
}
726+
727+
By("By creating MPIJobs with and without preset env variables")
728+
729+
testCases := map[string]struct {
730+
envVariables map[string]string
731+
expectedAdded int
732+
}{
733+
"withoutIMPIValues": {
734+
envVariables: map[string]string{"X_MPI_HYDRA_BOOTSTRAP": "foo"},
735+
expectedAdded: 2,
736+
},
737+
"withIMPIBootstrap": {
738+
envVariables: map[string]string{"I_MPI_HYDRA_BOOTSTRAP": "RSH"},
739+
expectedAdded: 1,
740+
},
741+
"withIMPIBootstrapExec": {
742+
envVariables: map[string]string{"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh"},
743+
expectedAdded: 1,
744+
},
745+
"withIMPIBootstrapAndExec": {
746+
envVariables: map[string]string{
747+
"I_MPI_HYDRA_BOOTSTRAP": "RSH",
748+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh",
749+
},
750+
expectedAdded: 0,
751+
},
752+
}
753+
754+
for testName, testCase := range testCases {
755+
ctx := context.Background()
756+
startTime := metav1.Now()
757+
completionTime := metav1.Now()
758+
759+
jobName := "test-launcher-creation-" + strings.ToLower(testName)
760+
761+
var replicas int32 = 1
762+
mpiJob := newMPIJob(jobName, &replicas, 1, gpuResourceName, &startTime, &completionTime)
763+
Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed())
764+
765+
template := &mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template
766+
Expect(len(template.Spec.Containers) == 1).To(BeTrue())
767+
768+
cont := &template.Spec.Containers[0]
769+
770+
for k, v := range testCase.envVariables {
771+
cont.Env = append(cont.Env,
772+
corev1.EnvVar{
773+
Name: k,
774+
Value: v,
775+
},
776+
)
777+
}
778+
779+
launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", false)
780+
781+
Expect(len(launcher.Spec.Containers) == 1).To(BeTrue())
782+
783+
bootstrap, exec := getIMPIValues(launcher.Spec.Containers[0].Env)
784+
785+
if val, found := testCase.envVariables["I_MPI_HYDRA_BOOTSTRAP"]; found {
786+
Expect(bootstrap).To(BeEquivalentTo(val))
787+
} else {
788+
Expect(bootstrap).To(BeEquivalentTo(iMPIDefaultBootstrap))
789+
}
790+
791+
if val, found := testCase.envVariables["I_MPI_HYDRA_BOOTSTRAP_EXEC"]; found {
792+
Expect(exec).To(BeEquivalentTo(val))
793+
} else {
794+
Expect(exec).To(BeEquivalentTo("/etc/mpi/kubexec.sh"))
795+
}
796+
}
797+
})
798+
})
712799
})
713800

714801
func ReplicaStatusMatch(replicaStatuses map[common.ReplicaType]*common.ReplicaStatus,

0 commit comments

Comments
 (0)