Skip to content

Commit 63e4e8d

Browse files
committed
Move help text output regarding PSM2_CUDA envvar to component init phase
The messages should be printed only in the event of CUDA builds and when PSM2 MTL has actually been selected for use. To this end, move help text output to component init phase. Also use opal_setenv/unsetenv() for safer setting, unsetting of the environment variable and sanitize the help text message. Signed-off-by: Aravind Gopalakrishnan <[email protected]>
1 parent 1ae78e2 commit 63e4e8d

File tree

2 files changed

+50
-31
lines changed

2 files changed

+50
-31
lines changed

ompi/mca/mtl/psm2/help-mtl-psm2.txt

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- text -*-
22
#
33
# Copyright (C) 2009. QLogic Corporation. All rights reserved.
4-
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
4+
# Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
55
# $COPYRIGHT$
66
#
77
# Additional copyrights may follow
@@ -47,5 +47,19 @@ Unknown path record query mechanism %s. Supported mechanisms are %s.
4747
Message size %llu bigger than supported by PSM2 API. Max = %llu
4848
#
4949
[no psm2 cuda env]
50-
Using CUDA enabled OpenMPI but PSM2_CUDA environment variable is %s.
51-
This is not a recommended combination. If the application uses %s.
50+
Warning: Running an Open MPI build with CUDA support over Intel(r) Omni-Path
51+
without enabling CUDA support on PSM2 library. Open MPI has therefore defaulted
52+
to setting PSM2_CUDA=1. This may impact performance if NOT running CUDA aware
53+
applications. Set your environment with variable PSM2_CUDA equal to 1 to clear
54+
this message, or set it to 0 to hint PSM2 that no CUDA support is needed.
55+
56+
Local hostname: %s
57+
#
58+
[psm2 cuda env zero]
59+
Warning: Open MPI configured with CUDA and running over Intel(r) OmniPath, but
60+
the PSM2_CUDA environment variable was set to 0.
61+
62+
If your application uses CUDA buffers, the execution will SEGFAULT. You should
63+
set PSM2_CUDA environment variable to 1 if application is using CUDA buffers.
64+
65+
Local hostname: %s

ompi/mca/mtl/psm2/mtl_psm2_component.c

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
* Copyright (c) 2006-2010 QLogic Corporation. All rights reserved.
1414
* Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
1515
* reserved.
16-
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
16+
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
1717
* Copyright (c) 2017 Research Organization for Information Science
1818
* and Technology (RIST). All rights reserved.
1919
* $COPYRIGHT$
@@ -28,6 +28,7 @@
2828
#include "opal/mca/event/event.h"
2929
#include "opal/util/output.h"
3030
#include "opal/util/show_help.h"
31+
#include "opal/util/opal_environ.h"
3132
#include "ompi/proc/proc.h"
3233

3334
#include "mtl_psm2.h"
@@ -43,6 +44,10 @@
4344

4445
static int param_priority;
4546

47+
#if OPAL_CUDA_SUPPORT
48+
static int cuda_envvar_set;
49+
#endif
50+
4651
static int ompi_mtl_psm2_component_open(void);
4752
static int ompi_mtl_psm2_component_close(void);
4853
static int ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority);
@@ -199,9 +204,6 @@ static int
199204
ompi_mtl_psm2_component_register(void)
200205
{
201206
int num_local_procs, num_total_procs;
202-
#if OPAL_CUDA_SUPPORT
203-
char *cuda_env;
204-
#endif
205207

206208
ompi_mtl_psm2.connect_timeout = 180;
207209
(void) mca_base_component_var_register(&mca_mtl_psm2_component.super.mtl_version,
@@ -226,30 +228,6 @@ ompi_mtl_psm2_component_register(void)
226228
param_priority = 40;
227229
}
228230

229-
#if OPAL_CUDA_SUPPORT
230-
/*
231-
* If using CUDA enabled OpenMPI, the user likely intends to
232-
* run with CUDA buffers. So, force-set the envvar here if user failed
233-
* to set it.
234-
*/
235-
cuda_env = getenv("PSM2_CUDA");
236-
if (!cuda_env) {
237-
opal_show_help("help-mtl-psm2.txt",
238-
"no psm2 cuda env", true,
239-
"not set",
240-
"Host buffers,\nthere will be a performance penalty"
241-
" due to OMPI force setting this variable now.\n"
242-
"Set environment variable to 0 if using Host buffers" );
243-
setenv("PSM2_CUDA", "1", 0);
244-
} else if (strcmp(cuda_env, "0") == 0) {
245-
opal_show_help("help-mtl-psm2.txt",
246-
"no psm2 cuda env", true,
247-
"set to 0",
248-
"CUDA buffers,\nthe execution will SEGFAULT."
249-
" Set environment variable to 1 if using CUDA buffers");
250-
}
251-
#endif
252-
253231
(void) mca_base_component_var_register (&mca_mtl_psm2_component.super.mtl_version,
254232
"priority", "Priority of the PSM2 MTL component",
255233
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@@ -334,6 +312,10 @@ ompi_mtl_psm2_component_query(mca_base_module_t **module, int *priority)
334312
static int
335313
ompi_mtl_psm2_component_close(void)
336314
{
315+
#if OPAL_CUDA_SUPPORT
316+
if (cuda_envvar_set)
317+
opal_unsetenv("PSM2_CUDA", &environ);
318+
#endif
337319
return OMPI_SUCCESS;
338320
}
339321

@@ -360,6 +342,9 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
360342
int verno_major = PSM2_VERNO_MAJOR;
361343
int verno_minor = PSM2_VERNO_MINOR;
362344
int local_rank = -1, num_local_procs = 0;
345+
#if OPAL_CUDA_SUPPORT
346+
char *cuda_env;
347+
#endif
363348

364349
/* Compute the total number of processes on this host and our local rank
365350
* on that node. We need to provide PSM2 with these values so it can
@@ -387,6 +372,26 @@ ompi_mtl_psm2_component_init(bool enable_progress_threads,
387372
ompi_mtl_psm2_set_shadow_env (ompi_mtl_psm2_shadow_variables + i);
388373
}
389374

375+
#if OPAL_CUDA_SUPPORT
376+
/*
377+
* If using CUDA enabled OpenMPI, the user likely intends to
378+
* run with CUDA buffers. So, force-set the envvar here if user failed
379+
* to set it.
380+
*/
381+
cuda_env = getenv("PSM2_CUDA");
382+
if (!cuda_env) {
383+
opal_show_help("help-mtl-psm2.txt",
384+
"no psm2 cuda env", true,
385+
ompi_process_info.nodename);
386+
opal_setenv("PSM2_CUDA", "1", false, &environ);
387+
cuda_envvar_set = 1;
388+
} else if (strcmp(cuda_env, "0") == 0) {
389+
opal_show_help("help-mtl-psm2.txt",
390+
"psm2 cuda env zero", true,
391+
ompi_process_info.nodename);
392+
}
393+
#endif
394+
390395
err = psm2_init(&verno_major, &verno_minor);
391396
if (err) {
392397
opal_show_help("help-mtl-psm2.txt",

0 commit comments

Comments
 (0)