13
13
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
14
14
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
15
15
* reserved.
16
- * Copyright (c) 2013-2015 Intel, Inc. All rights reserved
16
+ * Copyright (c) 2013-2017 Intel, Inc. All rights reserved
17
17
* Copyright (c) 2017 Research Organization for Information Science
18
18
* and Technology (RIST). All rights reserved.
19
19
* $COPYRIGHT$
28
28
#include "opal/mca/event/event.h"
29
29
#include "opal/util/output.h"
30
30
#include "opal/util/show_help.h"
31
+ #include "opal/util/opal_environ.h"
31
32
#include "ompi/proc/proc.h"
32
33
33
34
#include "mtl_psm2.h"
@@ -45,6 +46,10 @@ static int param_priority;
45
46
/* MPI_THREAD_MULTIPLE_SUPPORT */
46
47
opal_mutex_t mtl_psm2_mq_mutex = OPAL_MUTEX_STATIC_INIT ;
47
48
49
+ #if OPAL_CUDA_SUPPORT
50
+ static int cuda_envvar_set ;
51
+ #endif
52
+
48
53
static int ompi_mtl_psm2_component_open (void );
49
54
static int ompi_mtl_psm2_component_close (void );
50
55
static int ompi_mtl_psm2_component_query (mca_base_module_t * * module , int * priority );
@@ -201,9 +206,6 @@ static int
201
206
ompi_mtl_psm2_component_register (void )
202
207
{
203
208
int num_local_procs , num_total_procs ;
204
- #if OPAL_CUDA_SUPPORT
205
- char * cuda_env ;
206
- #endif
207
209
208
210
ompi_mtl_psm2 .connect_timeout = 180 ;
209
211
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
@@ -228,30 +230,6 @@ ompi_mtl_psm2_component_register(void)
228
230
param_priority = 40 ;
229
231
}
230
232
231
- #if OPAL_CUDA_SUPPORT
232
- /*
233
- * If using CUDA enabled OpenMPI, the user likely intends to
234
- * run with CUDA buffers. So, force-set the envvar here if user failed
235
- * to set it.
236
- */
237
- cuda_env = getenv ("PSM2_CUDA" );
238
- if (!cuda_env ) {
239
- opal_show_help ("help-mtl-psm2.txt" ,
240
- "no psm2 cuda env" , true,
241
- "not set" ,
242
- "Host buffers,\nthere will be a performance penalty"
243
- " due to OMPI force setting this variable now.\n"
244
- "Set environment variable to 0 if using Host buffers" );
245
- setenv ("PSM2_CUDA" , "1" , 0 );
246
- } else if (strcmp (cuda_env , "0" ) == 0 ) {
247
- opal_show_help ("help-mtl-psm2.txt" ,
248
- "no psm2 cuda env" , true,
249
- "set to 0" ,
250
- "CUDA buffers,\nthe execution will SEGFAULT."
251
- " Set environment variable to 1 if using CUDA buffers" );
252
- }
253
- #endif
254
-
255
233
(void ) mca_base_component_var_register (& mca_mtl_psm2_component .super .mtl_version ,
256
234
"priority" , "Priority of the PSM2 MTL component" ,
257
235
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
@@ -336,6 +314,11 @@ ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
336
314
static int
337
315
ompi_mtl_psm2_component_close (void )
338
316
{
317
+ #if OPAL_CUDA_SUPPORT
318
+ if (cuda_envvar_set ) {
319
+ opal_unsetenv ("PSM2_CUDA" , & environ );
320
+ }
321
+ #endif
339
322
return OMPI_SUCCESS ;
340
323
}
341
324
@@ -362,6 +345,12 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
362
345
int verno_major = PSM2_VERNO_MAJOR ;
363
346
int verno_minor = PSM2_VERNO_MINOR ;
364
347
int local_rank = -1 , num_local_procs = 0 ;
348
+ #if OPAL_CUDA_SUPPORT
349
+ int ret ;
350
+ char * cuda_env ;
351
+ glob_t globbuf ;
352
+ globbuf .gl_offs = 0 ;
353
+ #endif
365
354
366
355
/* Compute the total number of processes on this host and our local rank
367
356
* on that node. We need to provide PSM2 with these values so it can
@@ -389,6 +378,27 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
389
378
ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i );
390
379
}
391
380
381
+ #if OPAL_CUDA_SUPPORT
382
+ /*
383
+ * If using CUDA enabled OpenMPI, the user likely intends to
384
+ * run with CUDA buffers. So, force-set the envvar here if user failed
385
+ * to set it.
386
+ */
387
+ ret = glob ("/sys/module/nvidia" , GLOB_DOOFFS , NULL , & globbuf );
388
+ if (0 == ret || GLOB_NOMATCH == ret ) {
389
+ globfree (& globbuf );
390
+ }
391
+
392
+ cuda_env = getenv ("PSM2_CUDA" );
393
+ if (!cuda_env && (0 == ret )) {
394
+ opal_show_help ("help-mtl-psm2.txt" ,
395
+ "no psm2 cuda env" , true,
396
+ ompi_process_info .nodename );
397
+ opal_setenv ("PSM2_CUDA" , "1" , false, & environ );
398
+ cuda_envvar_set = 1 ;
399
+ }
400
+ #endif
401
+
392
402
err = psm2_init (& verno_major , & verno_minor );
393
403
if (err ) {
394
404
opal_show_help ("help-mtl-psm2.txt" ,
0 commit comments