From fa499d78ab82cca4b4993c2521543e701e5fb3c3 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Thu, 3 Oct 2024 18:14:08 +0200 Subject: [PATCH 01/12] Update KhiopsLocalRunner to use the Khiops 10.2.3 khiops_env script Thusly, various `KhiopsLocalRunner` attributes' values are sourced from the `khiops_env` script on all platforms: - `mpi_command_args`: set according to `KHIOPS_MPI_COMMAND` - `max_cores`: set according to `KHIOPS_PROC_NUMBER` Note: as `KHIOPS_MPI_COMMAND` depends on `KHIOPS_PROC_NUMBER`, `max_cores` cannot be updated directly on an existing `KhiopsLocalRunner` instance; to update this parameter, one needs to: 1. set the `KHIOPS_PROC_NUMBER` environment variable 2. create a new `KhiopsLocalRunner` instance - `max_memory_mb`: set according to `KHIOPS_MEMORY_LIMIT` - `khiops_temp_dir`: set according to `KHIOPS_TMP_DIR` - `khiops_path`: set according to `KHIOPS_PATH` - `khiops_coclustering_path`: set according to `KHIOPS_COCLUSTERING_PATH` This change also entails that: - the following environment variables are no longer used: - `KHIOPS_MPI_COMMAND_ARGS` - `KHIOPS_MPIEXEC_PATH` - `KHIOPS_MPI_LIB` - `KHIOPS_HOME` on Windows operating systems - the MPI command can be further customized, on distributed environments only, according to the new `khiops_env` script, via the `KHIOPS_MPI_HOST_FILE` environment variable, that is only used by `khiops_env` for customizing the MPI command - the MPI command is exposed as the `KHIOPS_MPI_COMMAND` environment variable that is set by the `khiops_env` script and cannot be changed by the user directly --- doc/notes.rst | 27 +- khiops/core/internals/runner.py | 651 ++++++++---------------------- tests/test_core.py | 153 +------ tests/test_khiops_integrations.py | 69 +--- 4 files changed, 190 insertions(+), 710 deletions(-) diff --git a/doc/notes.rst b/doc/notes.rst index af9e2261..4f5dc136 100644 --- a/doc/notes.rst +++ b/doc/notes.rst @@ -46,29 +46,14 @@ Environment Variables --------------------- The default Khiops local runner used by the `khiops.core.api` functions can be customized via the -environment variables listed below. They can be split into three groups: - -- part of the Khiops API: - - - ``KHIOPS_PROC_NUMBER``: number of processes launched by Khiops - - ``KHIOPS_MEMORY_LIMIT``: memory limit of the Khiops executables in megabytes; - ignored if set above the system memory limit - - ``KHIOPS_TMP_DIR``: path to Khiops' temporary directory - -- other environment variables: - - - ``KHIOPS_HOME``: *Windows only* path to the Khiops installation directory - - ``KHIOPS_SAMPLES_DIR``: path to the Khiops sample datasets directory - -- advanced configuration variables. Most of the time the user does not need modify - them: - - - ``KHIOPS_MPI_COMMAND_ARGS``: arguments to the ``mpiexec`` command - - ``KHIOPS_MPIEXEC_PATH``: path to the ``mpiexec`` command - - ``KHIOPS_MPI_LIB``: *Linux and MacOS only* path to the MPI library; added to - the beginning of ``LD_LIBRARY_PATH`` +environment variables listed below: +- ``KHIOPS_PROC_NUMBER``: number of processes launched by Khiops +- ``KHIOPS_MEMORY_LIMIT``: memory limit of the Khiops executables in megabytes; + ignored if set above the system memory limit +- ``KHIOPS_TMP_DIR``: path to Khiops' temporary directory +- ``KHIOPS_SAMPLES_DIR``: path to the Khiops sample datasets directory (only for the Khiops Python library) .. _core-api-input-types: diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py index b0823318..b98d646d 100644 --- a/khiops/core/internals/runner.py +++ b/khiops/core/internals/runner.py @@ -15,7 +15,6 @@ import io import os import platform -import re import shlex import shutil import subprocess @@ -50,7 +49,7 @@ def _isdir_without_all_perms(dir_path): ) -def get_dir_status(a_dir): +def _get_dir_status(a_dir): """Returns the status of a local or remote directory Against a local directory a real check is performed. A remote directory is detected @@ -78,9 +77,9 @@ def get_dir_status(a_dir): return status -def check_samples_dir(samples_dir): +def _check_samples_dir(samples_dir): # Warn if there are problems with the samples_dir - samples_dir_status = get_dir_status(samples_dir) + samples_dir_status = _get_dir_status(samples_dir) download_msg = ( "Execute the kh-download-datasets script or " "the khiops.tools.download_datasets function to download them." @@ -124,98 +123,12 @@ def _extract_path_from_uri(uri): return path -def _get_system_cpu_cores(): - """Portably obtains the number of cpu cores (no hyperthreading)""" - # Set the cpu info command and arguments for each platform - if platform.system() == "Linux": - cpu_system_info_args = ["lscpu", "-b", "-p=Core,Socket"] - elif platform.system() == "Windows": - cpu_system_info_args = [ - "powershell.exe", - "-Command", - "$OutputEncoding = [System.Text.Encoding]::UTF8;", - "Get-CimInstance -ClassName 'Win32_Processor' " - "| Select-Object -Property 'NumberOfCores'", - "| Format-Table -HideTableHeaders ", - "| Write-Output", - ] - elif platform.system() == "Darwin": - cpu_system_info_args = ["sysctl", "-n", "hw.physicalcpu"] - else: - raise KhiopsEnvironmentError( - f"Unsupported OS {platform.system()}. " - "Check the supported OSes at https://khiops.org." - ) - - # Execute the cpu info process - with subprocess.Popen( - cpu_system_info_args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ) as cpu_system_info_process: - cpu_system_info_output, _ = cpu_system_info_process.communicate() - - # Count the cpus for each system - if platform.system() == "Linux": - # Ignore the comment lines starting with '#' tha lscpu puts in the message - # Each non commented line is for a cpu - cpu_entries = { - entry - for entry in cpu_system_info_output.splitlines() - if not entry.startswith("#") - } - cpu_core_count = len(cpu_entries) - elif platform.system() == "Windows": - # Each line of the cpu count command contains a number of cores of a socket - cores_per_socket = [ - int(line.strip()) for line in cpu_system_info_output.strip().splitlines() - ] - cpu_core_count = sum(cores_per_socket) - elif platform.system() == "Darwin": - cpu_core_count = int(cpu_system_info_output.strip()) - else: - raise KhiopsEnvironmentError( - f"Unsupported OS {platform.system()}. " - "Check the supported OSes at https://khiops.org." - ) - - return cpu_core_count - - -def _compute_max_cores_from_proc_number(proc_number): - # if KHIOPS_PROC_NUMBER is 0 we set max_cores to the system's core number - if proc_number == 0: - max_cores = _get_system_cpu_cores() - # Otherwise we set max_cores to KHIOPS_PROC_NUMBER - else: - max_cores = proc_number - - return max_cores - - -def _mpiexec_path(bin_dir): - mpiexec_exe = "mpiexec" +def _khiops_env_file_exists(env_dir): + """Check ``khiops_env`` exists relative to the specified environment dir""" + khiops_env_path = os.path.join(env_dir, "khiops_env") if platform.system() == "Windows": - mpiexec_exe += ".exe" - return os.path.join(bin_dir, mpiexec_exe) - - -def _modl_and_mpiexec_executables_exist(bin_dir): - """Check MODL* executables exist relative to the specified binary dir""" - modl_path = os.path.join(bin_dir, "MODL") - modl_cc_path = os.path.join(bin_dir, "MODL_Coclustering") - mpiexec_path = _mpiexec_path(bin_dir) - if platform.system() == "Windows": - modl_path += ".exe" - modl_cc_path += ".exe" - if not os.path.exists(mpiexec_path): - mpiexec_path = _mpiexec_path( - Path(bin_dir).parent.joinpath("Library", "bin").as_posix() - ) - return all( - os.path.exists(exe_path) for exe_path in (modl_path, modl_cc_path, mpiexec_path) - ) + khiops_env_path += ".cmd" + return os.path.exists(khiops_env_path) and os.path.isfile(khiops_env_path) def _infer_env_bin_dir_for_conda_based_installations(): @@ -275,7 +188,7 @@ def _infer_khiops_installation_method(trace=False): # Note: The check that MODL and MODL_Coclustering are actually executable is done # afterwards by the initializations method. # We are in a conda env if the Khiops binaries exists within `$CONDA_PREFIX/bin` - if "CONDA_PREFIX" in os.environ and _modl_and_mpiexec_executables_exist( + if "CONDA_PREFIX" in os.environ and _khiops_env_file_exists( os.path.join(os.environ["CONDA_PREFIX"], "bin") ): installation_method = "conda" @@ -284,9 +197,9 @@ def _infer_khiops_installation_method(trace=False): env_bin_dir = _infer_env_bin_dir_for_conda_based_installations() if trace: print(f"Environment binary dir: '{env_bin_dir}'") - if _check_conda_env_bin_dir( + if _check_conda_env_bin_dir(env_bin_dir) and _khiops_env_file_exists( env_bin_dir - ) and _modl_and_mpiexec_executables_exist(env_bin_dir): + ): installation_method = "conda-based" else: installation_method = "binary+pip" @@ -305,62 +218,6 @@ def _check_executable(bin_path): ) -def get_linux_distribution_name(): - """Detect Linux distribution name - - Parses the `NAME` variable defined in the `/etc/os-release` or - `/usr/lib/os-release` files and converts it to lowercase. - - Returns - ------- - str - Name of the Linux distribution, converted to lowecase - - Raises - ------ - OSError - If neither `/etc/os-release` nor `/usr/lib/os-release` are found - """ - - def get_linux_distribution_from_os_release_file(os_release_file_path): - # The `NAME` variable is always defined according to the freedesktop.org - # standard: - # https://www.freedesktop.org/software/systemd/man/latest/os-release.html - with open(os_release_file_path, encoding="ascii") as os_release_info_file: - for entry in os_release_info_file: - if entry.startswith("NAME"): - linux_distribution = entry.split("=")[-1].strip('"\n') - break - return linux_distribution - - assert platform.system() == "Linux" - - # If Python version >= 3.10, use standard library support; see - # https://docs.python.org/3/library/platform.html#platform.freedesktop_os_release - python_ver_major, python_ver_minor, _ = platform.python_version_tuple() - if int(python_ver_major) >= 3 and int(python_ver_minor) >= 10: - linux_distribution = platform.freedesktop_os_release()["NAME"] - - # If Python version < 3.10, determine the Linux distribution manually, - # but mimic the behavior of Python >= 3.10 standard library support - else: - # First try to parse /etc/os-release - try: - linux_distribution = get_linux_distribution_from_os_release_file( - os.path.join(os.sep, "etc", "os-release") - ) - except FileNotFoundError: - # Fallback on parsing /usr/lib/os-release - try: - linux_distribution = get_linux_distribution_from_os_release_file( - os.path.join(os.sep, "usr", "lib", "os-release") - ) - # Mimic `platform.freedesktop_os_release` function behavior - except FileNotFoundError as error: - raise OSError from error - return linux_distribution.lower() - - class KhiopsRunner(ABC): """Abstract Khiops Python runner to be re-implemented""" @@ -523,7 +380,9 @@ def scenario_prologue(self, prologue): def max_cores(self): """int: Maximum number of cores for Khiops executions - If set to 0 it uses the system's default. + You may not set this value directly. Instead, set the ``KHIOPS_PROC_NUMBER`` + environment variable and then create another instance of + `~.KhiopsLocalRunner`. Raises ------ @@ -534,10 +393,6 @@ def max_cores(self): """ return self.general_options.max_cores - @max_cores.setter - def max_cores(self, core_number): - self._set_max_cores(core_number) - def _set_max_cores(self, core_number): self.general_options.max_cores = core_number self.general_options.check() @@ -1028,29 +883,17 @@ class KhiopsLocalRunner(KhiopsRunner): Requires either: - This package installed through Conda and run from a Conda environment, or - - the Khiops desktop app installed on the local machine + - the ``khiops-core`` Linux native package installed on the local machine, or + - the Windows Khiops desktop application installed on the local machine .. rubric:: Environment variables taken into account by the runner: - - part of the Khiops API: - - - ``KHIOPS_PROC_NUMBER``: number of processes launched by Khiops - - ``KHIOPS_MEMORY_LIMIT``: memory limit of the Khiops executables in megabytes; - ignored if set above the system memory limit - - ``KHIOPS_TMP_DIR``: path to Khiops' temporary directory - - - other environment variables: - - - ``KHIOPS_HOME``: *Windows only* path to the Khiops installation directory - - ``KHIOPS_SAMPLES_DIR``: path to the Khiops sample datasets directory - - - advanced configuration variables. Most of the time the user does not need modify - them: - - - ``KHIOPS_MPI_COMMAND_ARGS``: arguments to the ``mpiexec`` command - - ``KHIOPS_MPIEXEC_PATH``: path to the ``mpiexec`` command - - ``KHIOPS_MPI_LIB``: *Linux and MacOS only* path to the MPI library; added to - the beginning of ``LD_LIBRARY_PATH`` + - ``KHIOPS_PROC_NUMBER``: number of processes launched by Khiops + - ``KHIOPS_MEMORY_LIMIT``: memory limit of the Khiops executables in megabytes; + ignored if set above the system memory limit + - ``KHIOPS_TMP_DIR``: path to Khiops' temporary directory + - ``KHIOPS_SAMPLES_DIR``: path to the Khiops sample datasets directory + (only for the Khiops Python library) .. rubric:: Samples directory settings @@ -1072,8 +915,9 @@ class KhiopsLocalRunner(KhiopsRunner): def __init__(self): # Define specific attributes - self.mpi_command_args = None - self._khiops_bin_dir = None + self._mpi_command_args = None + self._khiops_path = None + self._khiops_coclustering_path = None self._khiops_version = None self._samples_dir = None self._samples_dir_checked = False @@ -1082,50 +926,93 @@ def __init__(self): super().__init__() # Initialize Khiops environment - self._start_khiops_environment_initialization() + self._initialize_khiops_environment() - def _set_max_cores(self, core_number): - super()._set_max_cores(core_number) - self._initialize_mpi_command_args() - - def _start_khiops_environment_initialization(self): - # Set the Khiops process number according to the `KHIOPS_PROC_NUMBER` env var - if "KHIOPS_PROC_NUMBER" in os.environ: - self.max_cores = _compute_max_cores_from_proc_number( - int(os.environ["KHIOPS_PROC_NUMBER"]) - ) - # If not defined, set it to the number of system cores - else: - self.max_cores = _get_system_cpu_cores() - os.environ["KHIOPS_PROC_NUMBER"] = str(self.max_cores) - - # Set the Khiops memory limit - if "KHIOPS_MEMORY_LIMIT" in os.environ: - self.max_memory_mb = int(os.environ["KHIOPS_MEMORY_LIMIT"]) - else: - self.max_memory_mb = 0 - - # Set MPI command - self._initialize_mpi_command_args() - - # Add custom path to MPI library to LD_LIBRARY_PATH, to be used in priority - if "KHIOPS_MPI_LIB" in os.environ: - custom_mpi_lib_path = os.environ["KHIOPS_MPI_LIB"] - if "LD_LIBRARY_PATH" in os.environ: - old_ld_library_path = os.environ["LD_LIBRARY_PATH"] - os.environ["LD_LIBRARY_PATH"] = ( - custom_mpi_lib_path + os.pathsep + old_ld_library_path - ) + def _initialize_khiops_environment(self): + # Check the `khiops_env` script + # khiops_env is always in path for a proper installation + khiops_env_path = shutil.which("khiops_env") + if khiops_env_path is not None: + if platform.system() == "Windows": + command = f"cmd /C call {khiops_env_path} --env" else: - os.environ["LD_LIBRARY_PATH"] = custom_mpi_lib_path - - # Set the default Khiops temporary directory ("" means system's default) - if "KHIOPS_TMP_DIR" in os.environ: - self.khiops_temp_dir = os.environ["KHIOPS_TMP_DIR"] + command = shlex.split(f"bash -c '{khiops_env_path} --env'") + + with subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) as khiops_env_process: + stdout, _ = khiops_env_process.communicate() + for line in stdout.split("\n"): + tokens = line.rstrip().split(maxsplit=1) + if len(tokens) == 2: + var_name, var_value = tokens + elif len(tokens) == 1: + var_name = tokens[0] + var_value = "" + else: + continue + # Set paths to Khiops binaries + if var_name == "KHIOPS_PATH": + self.khiops_path = var_value + os.environ["KHIOPS_PATH"] = var_value + elif var_name == "KHIOPS_COCLUSTERING_PATH": + self.khiops_coclustering_path = var_value + os.environ["KHIOPS_COCLUSTERING_PATH"] = var_value + # Set MPI command + elif var_name == "KHIOPS_MPI_COMMAND": + self._mpi_command_args = shlex.split(var_value) + os.environ["KHIOPS_MPI_COMMAND"] = var_value + # Set the Khiops process number + elif var_name == "KHIOPS_PROC_NUMBER": + if var_value: + self._set_max_cores(int(var_value)) + os.environ["KHIOPS_PROC_NUMBER"] = var_value + # If `KHIOPS_PROC_NUMBER` is not set, then default to `0` + # (use all cores) + else: + self._set_max_cores(0) + # Set the Khiops memory limit + elif var_name == "KHIOPS_MEMORY_LIMIT": + if var_value: + self.max_memory_mb = int(var_value) + os.environ["KHIOPS_MEMORY_LIMIT"] = var_value + else: + self.max_memory_mb = 0 + os.environ["KHIOPS_MEMORY_LIMIT"] = "" + # Set the default Khiops temporary directory + # ("" means system's default) + elif var_name == "KHIOPS_TMP_DIR": + if var_value: + self.khiops_temp_dir = var_value + os.environ["KHIOPS_TMP_DIR"] = var_value + else: + self.khiops_temp_dir = "" + os.environ["KHIOPS_TEMP_DIR"] = self.khiops_temp_dir + # Propagate all the other environment variables to Khiops binaries + else: + os.environ[var_name] = var_value else: - self.khiops_temp_dir = "" + raise KhiopsEnvironmentError( + "The 'khiops_env' script not found. Make sure you have " + "installed khiops >= 10.2.3." + ) + + # Check the tools exist and are executable + self._check_tools() installation_method = _infer_khiops_installation_method() + + # Switch to sequential mode if 0 < max_cores < 3 + if self.max_cores in (1, 2): + warnings.warn( + f"Too few cores: {self.max_cores}. " + "To efficiently run Khiops in parallel at least 3 processes " + "are needed. Khiops will run in a single process." + ) + if platform.system() == "Linux" and installation_method == "binary+pip": # Set the OpenMPI variable OMPI_MCA_plm_rsh_agent to the empty string # if not set @@ -1133,178 +1020,9 @@ def _start_khiops_environment_initialization(self): if "OMPI_MCA_plm_rsh_agent" not in os.environ: os.environ["OMPI_MCA_plm_rsh_agent"] = "" - # Set the OpenMPI variable OMPI_MCA_btl_vader_single_copy_mechanism - # to the "none" string value to remove the mpi message - # "Read -1, expected 65536, errno = 1" that appears on Docker - if "OMPI_MCA_btl_vader_single_copy_mechanism" not in os.environ: - os.environ["OMPI_MCA_btl_vader_single_copy_mechanism"] = "none" - - # Set the OpenMPI variable PSM3_DEVICES to the "self" string value to - # fix issue https://github.com/KhiopsML/khiops/issues/307 on Rocky - if ( - get_linux_distribution_name() == "rocky linux" - and "PSM3_DEVICES" not in os.environ - ): - os.environ["PSM3_DEVICES"] = "self" - # Initialize the default samples dir self._initialize_default_samples_dir() - def _initialize_mpi_command_args(self): - """Creates the mpiexec call arguments for each platform""" - # Note: Unless enforced by `KHIOPS_MPIEXEC_PATH`, the mpiexec - # accessible in the path takes precedence over any other mpiexec install. - installation_method = _infer_khiops_installation_method() - # In Conda-based, but non-Conda environment, specify mpiexec path - if installation_method == "conda-based": - # Python `os.path.realpath` resolves symlinks recursively, like GNU - # `readlink -f`; Python `os.readlink` does not - mpiexec_path = os.environ.get("KHIOPS_MPIEXEC_PATH") or os.path.realpath( - os.path.join( - _infer_env_bin_dir_for_conda_based_installations(), "mpiexec" - ) - ) - if platform.system() == "Windows" and not os.path.splitext(mpiexec_path): - mpiexec_path += ".exe" - # If mpiexec[.exe] is not in the Conda environment's bin path, - # update mpiexec's path to the expected Windows path to mpiexec: - # - replace "bin" with "Library/bin" in the binary directory - # - re-add mpiexec[.exe] to the path: look it up in the original - # path (so that extension calculation is not repeated) - if not os.path.exists(mpiexec_path): - mpiexec_path = ( - Path(mpiexec_path) - .parents[1] - .joinpath("Library", "bin", Path(mpiexec_path).parts[-1]) - .as_posix() - ) - # In Conda or local installations, expect mpiexec in the PATH - else: - link_to_mpiexec = shutil.which("mpiexec") - mpiexec_path = ( - os.environ.get("KHIOPS_MPIEXEC_PATH") - or link_to_mpiexec - and os.path.realpath(link_to_mpiexec) - ) - # If mpiexec is not in the path, and the installation method is local, - # then try to load MPI environment module so that mpiexec is in the path - if mpiexec_path is None and installation_method == "binary+pip": - # If environment modules are installed, then load the MPI module - module_init_script_path = os.path.join( - os.path.sep, "etc", "profile.d", "modules.sh" - ) - - # List available environment modules - if os.path.exists(module_init_script_path): - shell_command = shlex.split( - f"sh -c 'source {module_init_script_path} && module avail'" - ) - with subprocess.Popen( - shell_command, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as env_modules_init_process: - stdout, stderr = env_modules_init_process.communicate() - - # If environment modules listing worked, look-up the MPI module - if env_modules_init_process.returncode == 0: - for line in sorted( - re.sub( - "\\s+", "\\n", (stdout + stderr).decode("utf-8") - ).splitlines(), - reverse=True, - ): - # If MPI environment module is found, attempt to load it - if f"openmpi-{platform.machine()}" in line: - mpi_module = line - # Use 'type -P' to get the path to executable, - # as 'which' is non-portable - shell_command = shlex.split( - f"sh -c 'source {module_init_script_path} && " - f"module unload mpi && module load {mpi_module} && " - "type -P mpiexec'" - ) - with subprocess.Popen( - shell_command, - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - ) as mpi_module_load_process: - stdout, _ = mpi_module_load_process.communicate() - - # If MPI module is loaded, get the path to mpiexec - if mpi_module_load_process.returncode == 0: - for line in stdout.decode("utf-8").splitlines(): - if "mpiexec" in line: - mpiexec_path = line - break - if mpiexec_path is not None: - self._set_mpi_command_args_with_mpiexec( - mpiexec_path, installation_method - ) - break - - # If MPI is found, then set the path to mpiexec accordingly - if mpiexec_path is not None: - self._set_mpi_command_args_with_mpiexec(mpiexec_path, installation_method) - # If MPI is still not found, then do not use MPI and warn the user - else: - self.mpi_command_args = [] - warnings.warn( - "mpiexec is not in PATH, Khiops will run with just one CPU. " - "We recommend you to reinstall khiops. " - "Go to https://khiops.org for more information." - ) - - def _set_mpi_command_args_with_mpiexec(self, mpiexec_path, installation_method): - assert mpiexec_path is not None - # User-specified MPI command args take precendence over automatic setting - if "KHIOPS_MPI_COMMAND_ARGS" in os.environ: - self.mpi_command_args = [mpiexec_path] + shlex.split( - os.environ["KHIOPS_MPI_COMMAND_ARGS"] - ) - # With only 1 or 2 processes run sequentially (without MPI) - elif self.max_cores in (1, 2): - self.mpi_command_args = [] - warnings.warn( - f"Too few cores: {self.max_cores}. " - "To efficiently run Khiops in parallel at least 3 processes " - "are needed. Khiops will run in a single process." - ) - # Otherwise, build the mpiexec command arguments - else: - self.mpi_command_args = [mpiexec_path] - if platform.system() == "Windows": - self.mpi_command_args += [ - "-al", - "spr:P", - "-n", - str(self.max_cores), - "/priority", - "1", - ] - elif platform.system() == "Linux": - # For Linux native installations we use OpenMPI - if installation_method == "binary+pip": - self.mpi_command_args += ["--allow-run-as-root", "--quiet"] - self.mpi_command_args += [ - "-n", - str(self.max_cores), - ] - elif platform.system() == "Darwin": - self.mpi_command_args += [ - "-host", - "localhost", - "-n", - str(self.max_cores), - ] - else: - raise KhiopsEnvironmentError( - f"Unsupported OS {platform.system()}. " - "Check the supported OSes at https://khiops.org." - ) - def _initialize_default_samples_dir(self): """See class docstring""" # Set the fallback value for the samples directory @@ -1328,7 +1046,7 @@ def _initialize_default_samples_dir(self): ok_statuses = ["ok", "remote"] if ( public_samples_dir is not None - and get_dir_status(public_samples_dir) in ok_statuses + and _get_dir_status(public_samples_dir) in ok_statuses ): self._samples_dir = public_samples_dir else: @@ -1342,73 +1060,15 @@ def _initialize_default_samples_dir(self): assert self._samples_dir is not None def _finish_khiops_environment_initialization(self): - # Initialize Khiops binary directory - self._initialize_khiops_bin_dir() - # Check the tools exist and are executable self._check_tools() # Initialize the khiops version self._initialize_khiops_version() - def _initialize_khiops_bin_dir(self): - # Initialize Khiops bin dir, according to the environment - installation_method = _infer_khiops_installation_method() - - # Conda case - if installation_method == "conda": - self._khiops_bin_dir = os.path.join(os.environ["CONDA_PREFIX"], "bin") - # Conda "based" (fallback) - elif installation_method == "conda-based": - self._khiops_bin_dir = _infer_env_bin_dir_for_conda_based_installations() - # System-wide installations - else: - self._initialize_default_system_wide_khiops_bin_dir() - assert self.khiops_bin_dir is not None - - def _initialize_default_system_wide_khiops_bin_dir(self): - # Warn if both KHIOPS_HOME and KhiopsHome are set - if "KHIOPS_HOME" in os.environ and "KhiopsHome" in os.environ: - warnings.warn( - "Both KHIOPS_HOME and KhiopsHome environment variables " - "are set. Only the KHIOPS_HOME will be used." - ) - # Windows: KHIOPS_HOME value - if platform.system() == "Windows": - # KHIOPS_HOME variable by default - if "KHIOPS_HOME" in os.environ: - self._khiops_bin_dir = os.path.join(os.environ["KHIOPS_HOME"], "bin") - # Look for KhiopsHome to support Khiops 9 - elif "KhiopsHome" in os.environ: - self._khiops_bin_dir = os.path.join(os.environ["KhiopsHome"], "bin") - # Raise error if KHIOPS_HOME is not set - else: - raise KhiopsEnvironmentError( - "No environment variable named 'KHIOPS_HOME' or " - "'KhiopsHome' found. We recommend you to reinstall Khiops. " - "Go to https://khiops.org for more information." - ) - # MacOS: /usr/local/bin - elif platform.system() == "Darwin": - self._khiops_bin_dir = os.path.join(os.path.sep, "usr", "local", "bin") - # Linux/Unix: /usr/bin - elif platform.system() == "Linux": - self._khiops_bin_dir = os.path.join(os.path.sep, "usr", "bin") - # Raise an error for unknown platforms - else: - raise KhiopsEnvironmentError( - f"Unsupported OS {platform.system()}. " - "Check the supported OSes at https://khiops.org." - ) - def _check_tools(self): """Checks the that the tool binaries exist and are executable""" - assert self.khiops_bin_dir is not None for tool_name in ["khiops", "khiops_coclustering"]: - if not os.path.exists(self._tool_path(tool_name)): - raise KhiopsEnvironmentError( - f"Inexistent Khiops executable path: {self._tool_path(tool_name)}" - ) _check_executable(self._tool_path(tool_name)) def _initialize_khiops_version(self): @@ -1447,7 +1107,7 @@ def _initialize_khiops_version(self): def _build_status_message(self): # Initialize if necessary with warnings.catch_warnings(record=True) as warning_list: - if self.khiops_bin_dir is None: + if self._khiops_version is None: self._finish_khiops_environment_initialization() # Call the parent's method @@ -1460,26 +1120,27 @@ def _build_status_message(self): else: khiops_temp_dir_msg = " (system's default)" install_type_msg = _infer_khiops_installation_method() - if self.mpi_command_args: - mpi_command_args_msg = " ".join(self.mpi_command_args) + if self._mpi_command_args: + mpi_command_args_msg = " ".join(self._mpi_command_args) else: mpi_command_args_msg = "" # Build the message status_msg += "\n\n" status_msg += "khiops local installation settings\n" - status_msg += f"version : {self._khiops_version}\n" - status_msg += f"executables dir : {self._khiops_bin_dir}\n" - status_msg += f"temp dir : {khiops_temp_dir_msg}\n" - status_msg += f"install type : {install_type_msg}\n" - status_msg += f"MPI command : {mpi_command_args_msg}\n" + status_msg += f"version : {self.khiops_version}\n" + status_msg += f"Khiops path : {self.khiops_path}\n" + status_msg += f"Khiops CC path : {self.khiops_coclustering_path}\n" + status_msg += f"temp dir : {khiops_temp_dir_msg}\n" + status_msg += f"install type : {install_type_msg}\n" + status_msg += f"MPI command : {mpi_command_args_msg}\n" status_msg += "\n" return status_msg, warning_list def _get_khiops_version(self): # Initialize the first time it is called - if self.khiops_bin_dir is None: + if self._khiops_version is None: self._finish_khiops_environment_initialization() assert isinstance(self._khiops_version, KhiopsVersion), type_error_message( self._khiops_version, "khiops_version", KhiopsVersion @@ -1487,67 +1148,81 @@ def _get_khiops_version(self): return self._khiops_version @property - def khiops_bin_dir(self): - r"""str: Path of the directory containing Khiops' binaries + def mpi_command_args(self): + return self._mpi_command_args - Default values: + @property + def khiops_path(self): + """str: Path to the ``MODL*`` Khiops binary - - conda installation: ``$CONDA_PREFIX/bin`` - - system-wide installations : + Set by the ``khiops_env`` script from the ``khiops-core`` package. - - Windows: + """ + return self._khiops_path - - ``%KHIOPS_HOME%\bin`` - - ``%KhiopsHome%\bin`` (deprecated) + @khiops_path.setter + def khiops_path(self, modl_path): + # Check that the path is a directory and it exists + if not os.path.exists(modl_path): + raise KhiopsEnvironmentError(f"Inexistent Khiops path: '{modl_path}'") + if not os.path.isfile(modl_path): + raise KhiopsEnvironmentError( + f"Khiops file path is a directory: {modl_path}" + ) - - Linux: ``/usr/bin`` - - Mac OS: ``/usr/local/bin`` + # Set the MODL path + self._khiops_path = modl_path + + @property + def khiops_coclustering_path(self): + """str: Path to the ``MODL_Coclustering`` Khiops Coclustering binary + + Set by the ``khiops_env`` script from the ``khiops-core`` package. """ - return self._khiops_bin_dir + return self._khiops_coclustering_path - @khiops_bin_dir.setter - def khiops_bin_dir(self, bin_dir): + @khiops_coclustering_path.setter + def khiops_coclustering_path(self, modl_coclustering_path): # Check that the path is a directory and it exists - if not os.path.exists(bin_dir): + if not os.path.exists(modl_coclustering_path): raise KhiopsEnvironmentError( - f"Inexistent Khiops binaries directory {bin_dir}" + f"Inexistent Khiops coclustering path: '{modl_coclustering_path}'" ) - if not os.path.isdir(bin_dir): + if not os.path.isfile(modl_coclustering_path): raise KhiopsEnvironmentError( - f"Khiops binaries directory is a file: {bin_dir}" + "Khiops coclustering file path is a directory: " + f"{modl_coclustering_path}" ) - # Set the directory, check and initialize the version - self._khiops_bin_dir = bin_dir - self._check_tools() - self._initialize_khiops_version() + # Set the MODL_Coclustering path + self._khiops_coclustering_path = modl_coclustering_path def _tool_path(self, tool_name): """Full path of a Khiops tool binary""" - assert self.khiops_bin_dir is not None + assert ( + self.khiops_path is not None and self.khiops_coclustering_path is not None + ) tool_name = tool_name.lower() if tool_name not in ["khiops", "khiops_coclustering"]: raise ValueError(f"Invalid tool name: {tool_name}") modl_binaries = { - "khiops": "MODL", - "khiops_coclustering": "MODL_Coclustering", + "khiops": self.khiops_path, + "khiops_coclustering": self.khiops_coclustering_path, } - bin_path = os.path.join(self.khiops_bin_dir, modl_binaries[tool_name]) - if platform.system() == "Windows": - bin_path += ".exe" + bin_path = modl_binaries[tool_name] return bin_path def _set_samples_dir(self, samples_dir): """Checks and sets the samples directory""" - check_samples_dir(samples_dir) + _check_samples_dir(samples_dir) super()._set_samples_dir(samples_dir) def _get_samples_dir(self): # Check the samples dir once (the check emmits only warnings) if not self._samples_dir_checked: - check_samples_dir(self._samples_dir) + _check_samples_dir(self._samples_dir) self._samples_dir_checked = True return self._samples_dir @@ -1583,7 +1258,7 @@ def raw_run(self, tool_name, command_line_args=None, use_mpi=True, trace=False): # Nota: Khiops Coclustering is executed without MPI khiops_process_args = [] if tool_name == "khiops" and use_mpi: - khiops_process_args += self.mpi_command_args + khiops_process_args += self._mpi_command_args khiops_process_args += [self._tool_path(tool_name)] if command_line_args: khiops_process_args += command_line_args @@ -1623,7 +1298,7 @@ def _run( trace, ): # Initialize if necessary (lazy initialization) - if self.khiops_bin_dir is None: + if self._khiops_version is None: self._finish_khiops_environment_initialization() # Execute the tool diff --git a/tests/test_core.py b/tests/test_core.py index 9361d423..9fc3dcde 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,7 +8,6 @@ import glob import io import os -import shlex import shutil import sys import textwrap @@ -21,11 +20,7 @@ import khiops.core as kh from khiops.core.internals.common import create_unambiguous_khiops_path from khiops.core.internals.io import KhiopsOutputWriter -from khiops.core.internals.runner import ( - KhiopsLocalRunner, - KhiopsRunner, - _get_system_cpu_cores, -) +from khiops.core.internals.runner import KhiopsLocalRunner, KhiopsRunner from khiops.core.internals.scenario import ConfigurableKhiopsScenario from khiops.core.internals.version import KhiopsVersion from tests.test_helper import KhiopsTestHelper @@ -663,7 +658,8 @@ def test_general_options(self): kh.set_runner(test_runner) # Set the general options - test_runner.max_cores = 10 + # Call private method for setting max_cores: + test_runner._set_max_cores(10) test_runner.max_memory_mb = 1000 test_runner.khiops_temp_dir = "/another/tmp" test_runner.scenario_prologue = "// Scenario prologue test" @@ -783,13 +779,22 @@ def __enter__(self): # Create the mock runner, patch the `raw_run` function, enter its context self.mocked_runner = KhiopsLocalRunner() - self.mocked_runner._finish_khiops_environment_initialization() kh.set_runner(self.mocked_runner) self.mock_context = mock.patch.object( self.mocked_runner, "raw_run", new=self.mocked_raw_run ) self.mock_context.__enter__() + # The original `KhiopsLocalRunner._get_khiops_version` method needs to + # call into the Khiops binary via `raw_run` which is mocked; hence, it + # needs to be mocked as well + self.mock_context = mock.patch.object( + self.mocked_runner, + "_get_khiops_version", + return_value=KhiopsVersion("10.2.2"), + ) + self.mock_context.__enter__() + # Return the runner to be used in the context return self.mocked_runner @@ -2614,12 +2619,6 @@ def test_khiops_environment_variables_basic(self): "runner_field": "khiops_temp_dir", "expected_field_value": os.path.join("", "mytmp"), }, - { - "variable": "KHIOPS_PROC_NUMBER", - "value": 0, - "runner_field": "max_cores", - "expected_field_value": _get_system_cpu_cores(), - }, { "variable": "KHIOPS_PROC_NUMBER", "value": 1, @@ -2692,132 +2691,6 @@ def test_khiops_environment_variables_basic(self): else: os.environ[fixture["variable"]] = old_value - def test_mpi_command_is_updated_on_max_cores_update(self): - """Test MPI command is updated on max_cores update""" - # Create a fresh runner and initialize its env - with MockedRunnerContext(create_mocked_raw_run(False, False, 0)) as runner: - pass - - # Update max_cores - max_cores_updated_value = 100 - runner.max_cores = max_cores_updated_value - - # Check MPI command arguments contain the updated max_cores - # The number of cores in the MPI command is the value after '-n' - mpi_command_args = runner.mpi_command_args - max_cores_in_mpi_command = int( - mpi_command_args[mpi_command_args.index("-n") + 1] - ) - self.assertEqual(max_cores_in_mpi_command, max_cores_updated_value) - - def test_undefined_khiops_proc_number_env_var(self): - """Test default value for KHIOPS_PROC_NUMBER env var - - If undefined, `KHIOPS_PROC_NUMBER` is set to the number of system CPU - cores. - """ - old_khiops_proc_number = None - if "KHIOPS_PROC_NUMBER" in os.environ: - old_khiops_proc_number = os.environ["KHIOPS_PROC_NUMBER"] - del os.environ["KHIOPS_PROC_NUMBER"] - - # Create a fresh runner and initialize its env - with MockedRunnerContext(create_mocked_raw_run(False, False, 0)) as runner: - pass - # Define default `KHIOPS_PROC_NUMBER` and check the `maxcores` attribute - # is set accordingly - default_khiops_proc_number = _get_system_cpu_cores() - self.assertEqual(runner.max_cores, default_khiops_proc_number) - - # Check default environment variable value is added - self.assertTrue("KHIOPS_PROC_NUMBER" in os.environ) - self.assertEqual( - os.environ["KHIOPS_PROC_NUMBER"], str(default_khiops_proc_number) - ) - - # Restore the old environment variable value (if applicable) - if old_khiops_proc_number is not None: - os.environ["KHIOPS_PROC_NUMBER"] = old_khiops_proc_number - else: - del os.environ["KHIOPS_PROC_NUMBER"] - - def test_khiops_mpi_lib_env_var(self): - """Test defined KHIOPS_MPI_LIB env var is added to LD_LIBRARY_PATH""" - variables = ("KHIOPS_MPI_LIB", "LD_LIBRARY_PATH") - old_variable_values = {} - - # Save current environment and update to new environment - for variable in variables: - if variable in os.environ: - old_variable_values[variable] = os.environ[variable] - os.environ[variable] = f"my{variable.lower()}" - - # Store the LD_LIBRARY_PATH before Khiops environment initialization - initial_ld_library_path = os.environ["LD_LIBRARY_PATH"] - - # Create a fresh runner and intialize its env - with MockedRunnerContext(create_mocked_raw_run(False, False, 0)): - pass - - # Check that `LD_LIBRARY_PATH` has been updated - self.assertEqual( - os.environ["LD_LIBRARY_PATH"], - f"{os.environ['KHIOPS_MPI_LIB']}{os.pathsep}{initial_ld_library_path}", - ) - - # Restore the old environment variable values (if any) - for variable in variables: - if variable in old_variable_values: - os.environ[variable] = old_variable_values[variable] - else: - del os.environ[variable] - - def test_khiops_mpiexec_path_var(self): - """Test defined KHIOPS_MPIEXEC_PATH env var specifies the path to `mpiexec`""" - - # Save current environment's `KHIOPS_MPIEXEC_PATH` and set it to test value - old_mpiexec_path = None - new_mpiexec_path = "/mypath/to/mpiexec" - if "KHIOPS_MPIEXEC_PATH" in os.environ: - old_mpiexec_path = os.environ["KHIOPS_MPIEXEC_PATH"] - os.environ["KHIOPS_MPIEXEC_PATH"] = new_mpiexec_path - - # Create a fresh runner and initialize its env - with MockedRunnerContext(create_mocked_raw_run(False, False, 0)) as runner: - pass - - # Check custom KHIOPS_MPIEXEC_PATH is used - self.assertEqual(runner.mpi_command_args[0], new_mpiexec_path) - - # Restore old KHIOPS_MPIEXEC_PATH if defined - if old_mpiexec_path is not None: - os.environ["KHIOPS_MPIEXEC_PATH"] = old_mpiexec_path - else: - del os.environ["KHIOPS_MPIEXEC_PATH"] - - def test_khiops_mpi_command_args_var(self): - """Test defined KHIOPS_MPI_COMMAND_ARGS env var customizes args to `mpiexec`""" - - # Save current environment's `KHIOPS_MPI_COMMAND_ARGS` and set it to test value - old_mpi_command_args = None - new_mpi_command_args = "-a foo -b bar -c-d baz" - if "KHIOPS_MPI_COMMAND_ARGS" in os.environ: - old_mpi_command_args = os.environ["KHIOPS_MPI_COMMAND_ARGS"] - os.environ["KHIOPS_MPI_COMMAND_ARGS"] = new_mpi_command_args - - # Create a fresh runner - with MockedRunnerContext(create_mocked_raw_run(False, False, 0)) as runner: - pass - - # Check custom MPI_COMMAND_ARGS is used - self.assertEqual(runner.mpi_command_args[1:], shlex.split(new_mpi_command_args)) - - # Restore old MPI_COMMAND_ARGS if defined - if old_mpi_command_args is not None: - os.environ["KHIOPS_MPI_COMMAND_ARGS"] = old_mpi_command_args - else: - del os.environ["KHIOPS_MPI_COMMAND_ARGS"] - if __name__ == "__main__": unittest.main() diff --git a/tests/test_khiops_integrations.py b/tests/test_khiops_integrations.py index 02a18392..52472517 100644 --- a/tests/test_khiops_integrations.py +++ b/tests/test_khiops_integrations.py @@ -10,12 +10,11 @@ import platform import shutil import subprocess -import tempfile import unittest import khiops.core as kh from khiops.core.exceptions import KhiopsEnvironmentError -from khiops.core.internals.runner import KhiopsLocalRunner, get_linux_distribution_name +from khiops.core.internals.runner import KhiopsLocalRunner from khiops.extras.docker import KhiopsDockerRunner from khiops.sklearn.estimators import KhiopsClassifier from tests.test_helper import KhiopsTestHelper @@ -34,7 +33,7 @@ def test_runner_has_mpiexec_on_linux(self): """Test that local runner has executable mpiexec on Linux if MPI is installed""" # Check package is installed on supported platform: # Check /etc/os-release for Linux version - linux_distribution = get_linux_distribution_name() + linux_distribution = None openmpi_found = None with open( os.path.join(os.sep, "etc", "os-release"), encoding="ascii" @@ -110,20 +109,14 @@ def test_runner_with_conda_based_environment(self): if "CONDA_PREFIX" in os.environ: del os.environ["CONDA_PREFIX"] - # Create a fresh local runner and initialize its default Khiops binary dir + # Create a fresh local runner runner = KhiopsLocalRunner() - runner._initialize_khiops_bin_dir() - # Get runner's default Khiops binary directory - default_bin_dir = runner.khiops_bin_dir - - # Check that MODL* are indeed in the runner's Khiops binary directory - self.assertTrue( - all( - binary_file in os.listdir(default_bin_dir) - for binary_file in ("MODL", "MODL_Coclustering") - ) - ) + # Check that MODL* files as set in the runner exist and are executable + self.assertTrue(os.path.isfile(runner.khiops_path)) + self.assertTrue(os.access(runner.khiops_path, os.X_OK)) + self.assertTrue(os.path.isfile(runner.khiops_coclustering_path)) + self.assertTrue(os.access(runner.khiops_coclustering_path, os.X_OK)) # Check that mpiexec is set correctly in the runner: mpi_command_args = runner.mpi_command_args @@ -133,52 +126,6 @@ def test_runner_with_conda_based_environment(self): self.assertTrue(os.path.isfile(mpiexec_path)) self.assertTrue(os.access(mpiexec_path, os.X_OK)) - def test_runner_with_custom_khiops_binary_directory(self): - """Test that local runner works with custom Khiops binary directory""" - # Get default runner - default_runner = kh.get_runner() - - # Test in a try block to restore the runner if there are unexpected errors - try: - # Create a fresh local runner and initialize its default Khiops binary dir - runner = KhiopsLocalRunner() - runner._initialize_khiops_bin_dir() - - # Get runner's default Khiops binary directory - default_bin_dir = runner.khiops_bin_dir - - # Create temporary directory - with tempfile.TemporaryDirectory() as tmp_khiops_bin_dir: - # Copy Khiops binaries into the temporary directory - for binary_file in os.listdir(default_bin_dir): - if binary_file.startswith("MODL"): - shutil.copy( - os.path.join(default_bin_dir, binary_file), - os.path.join(tmp_khiops_bin_dir, binary_file), - ) - - # Change runner's Khiops binary directory to the temporary directory - runner.khiops_bin_dir = tmp_khiops_bin_dir - - # Set current runner to the fresh runner - kh.set_runner(runner) - - # Test the core API works - # Call check_database (could be any other method) - with self.assertRaises(kh.KhiopsRuntimeError) as cm: - kh.check_database("a.kdic", "dict_name", "data.txt") - - # Test that MODL executable can be found and launched - # Note: The return code is not specified to support older khiops - # versions that returned 2 instead of 0 in this case. - self.assertIn( - "khiops execution had errors (return code ", str(cm.exception) - ) - - # Always set back to the default runner - finally: - kh.set_runner(default_runner) - class KhiopsMultitableFitTests(unittest.TestCase): """Test if Khiops estimator can be fitted on multi-table data""" From 8aecaa64d19d47282525499a2f12c87723422010 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Fri, 4 Oct 2024 15:23:11 +0200 Subject: [PATCH 02/12] Update estimator checks for sklearn >= 1.5 --- tests/test_sklearn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py index 0024f1b3..463fae80 100644 --- a/tests/test_sklearn.py +++ b/tests/test_sklearn.py @@ -2277,6 +2277,8 @@ def test_sklearn_check_estimator(self): if check_name in [ "check_fit_score_takes_y", "check_fit_idempotent", + # yields "empty" deployed table as of sklearn >= 1.5: + "check_estimators_dtypes", ] and isinstance(estimator, KhiopsEncoder): continue with self.subTest( From 45c5078412eef1c00c01b478c741ae245b3629b8 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Fri, 4 Oct 2024 18:57:10 +0200 Subject: [PATCH 03/12] Just look for `mpiexec` in `kh-status` `bin` is not necessarily present; `mpiexec` can be anywhere. --- .github/workflows/pip.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index 3628277d..ad11b5c0 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -100,11 +100,8 @@ jobs: kh-samples sklearn -i khiops_classifier -e # Test that the line containing "MPI command" also contains - # an executable name under a /bin directory - # Note: this executable name can be different, depending on the MPI - # backend and OS; for instance, "orterun" for OpenMPI on Ubuntu Linux, but - # "mpiexec" for OpenMPI on Rocky Linux - kh-status | grep "MPI command" | grep -Ewq "(/.+?)/bin/.+" + # an executable named "mpiexec" + kh-status | grep "MPI command" | grep -wq "mpiexec" release: if: github.ref_type == 'tag' needs: [build, test] From 1bfdef4638424d4101c59672b112faf84ff3be9b Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:18:03 +0200 Subject: [PATCH 04/12] Initialize Khiops environment upon creating KhiopsLocalRunner instance Namely: - call `_initialize_khiops_environment` only once, in `KhiopsLocalRunner`'s `__init__` method - initially set the global `_khiops_runner` to `None` - in the `get_runner` function: - if the global `khiops_runner` is `None`, then: - create `KhiopsLocalRunner` instance, which initializes the environment - set the global `_khiops_runner` to the `KhiopsLocalRunner` instance - return `_khiops_runner`, as before Thusly, `KhiopsLocalRunner`'s environment is fully initialized upon creation. Only `khiops_version` is initialized lazily because this needs a call to Khiops binaries, which incurs a time / performance cost and hence is done only when needed. --- khiops/core/internals/runner.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py index b98d646d..717c4ee8 100644 --- a/khiops/core/internals/runner.py +++ b/khiops/core/internals/runner.py @@ -1059,13 +1059,6 @@ def _initialize_default_samples_dir(self): assert self._samples_dir is not None - def _finish_khiops_environment_initialization(self): - # Check the tools exist and are executable - self._check_tools() - - # Initialize the khiops version - self._initialize_khiops_version() - def _check_tools(self): """Checks the that the tool binaries exist and are executable""" for tool_name in ["khiops", "khiops_coclustering"]: @@ -1105,14 +1098,8 @@ def _initialize_khiops_version(self): ) def _build_status_message(self): - # Initialize if necessary - with warnings.catch_warnings(record=True) as warning_list: - if self._khiops_version is None: - self._finish_khiops_environment_initialization() - # Call the parent's method - status_msg, parent_warning_list = super()._build_status_message() - warning_list += parent_warning_list + status_msg, warning_list = super()._build_status_message() # Build the messages for temp_dir, install type and mpi if self.khiops_temp_dir: @@ -1141,7 +1128,7 @@ def _build_status_message(self): def _get_khiops_version(self): # Initialize the first time it is called if self._khiops_version is None: - self._finish_khiops_environment_initialization() + self._initialize_khiops_version() assert isinstance(self._khiops_version, KhiopsVersion), type_error_message( self._khiops_version, "khiops_version", KhiopsVersion ) @@ -1297,10 +1284,6 @@ def _run( command_line_options, trace, ): - # Initialize if necessary (lazy initialization) - if self._khiops_version is None: - self._finish_khiops_environment_initialization() - # Execute the tool khiops_args = command_line_options.build_command_line_options(scenario_path) stdout, stderr, return_code = self.raw_run( @@ -1317,8 +1300,8 @@ def _run( # Disable pylint UPPER_CASE convention: _khiops_runner is non-constant # pylint: disable=invalid-name -# Runner (backend) of Khiops Python, by default one for a local Khiops installation -_khiops_runner = KhiopsLocalRunner() +# Runner (backend) of Khiops Python, by default None for lazy initialization +_khiops_runner = None def set_runner(runner): @@ -1337,6 +1320,10 @@ def get_runner(): `.KhiopsRunner` The current Khiops Python runner of the module. """ + # Define and initialize a runner for a local Khiops installation + global _khiops_runner + if _khiops_runner is None: + _khiops_runner = KhiopsLocalRunner() return _khiops_runner From a5be89c6af7116c110a21eccb839360c379d0f17 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:51:10 +0200 Subject: [PATCH 05/12] Keep vanilla `PATH` as on the supported Linux OSes This path change was meant to test issue https://github.com/KhiopsML/khiops-python/issues/209, which is moot in our case, because: - all currently supported Linux distributions (Ubuntu 20.04, 22.04, 24.04; Rocky 8, 9; Debian 10, 11, 12) have `/usr/bin` before `/bin` in `PATH` - the Arch Linux distribution that is mentioned in the above-cited issue is not currently supported. --- .github/workflows/unit-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 25647bda..dbf80c9a 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -106,9 +106,6 @@ jobs: OMPI_MCA_rmaps_base_oversubscribe: true PRTE_MCA_rmaps_default_mapping_policy: :oversubscribe run: | - # Make sure '/bin' is before '/usr/bin' in PATH - PATH=$(echo "/bin:"$PATH | sed 's#:/bin##') - # This is needed so that the Git tag is parsed and the khiops-python # version is retrieved git config --global --add safe.directory $(realpath .) @@ -191,9 +188,6 @@ jobs: # Force > 2 CPU cores to launch mpiexec KHIOPS_PROC_NUMBER: 4 run: |- - # Make sure '/bin' is before '/usr/bin' in PATH - PATH=$(echo "/bin:"$PATH | sed 's#:/bin##') - # Make sure MPI support is not loaded through env modules # Note: As Docker container's shell is non-interactive, environment # modules are currently not initializing the shell anyway From 6a3f72f0a086abdb95716dcfbf420f9c7bb4351c Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:10:57 +0200 Subject: [PATCH 06/12] Handle the Windows native + pip install case via KHIOPS_HOME --- khiops/core/internals/runner.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py index 717c4ee8..b60048ff 100644 --- a/khiops/core/internals/runner.py +++ b/khiops/core/internals/runner.py @@ -930,16 +930,29 @@ def __init__(self): def _initialize_khiops_environment(self): # Check the `khiops_env` script - # khiops_env is always in path for a proper installation - khiops_env_path = shutil.which("khiops_env") - if khiops_env_path is not None: - if platform.system() == "Windows": - command = f"cmd /C call {khiops_env_path} --env" + # On Windows native installations, rely on the `KHIOPS_HOME` environment + # variable set by the Khiops Desktop Application installer + installation_method = _infer_khiops_installation_method() + if platform.system() == "Windows" and installation_method == "binary+pip": + # KHIOPS_HOME variable by default + if "KHIOPS_HOME" in os.environ: + khiops_env_path = os.path.join( + os.environ["KHIOPS_HOME"], "bin", "khiops_env.cmd" + ) + # Raise error if KHIOPS_HOME is not set else: - command = shlex.split(f"bash -c '{khiops_env_path} --env'") + raise KhiopsEnvironmentError( + "No environment variable named 'KHIOPS_HOME' found. " + "Make sure you have installed Khiops >= 10.2.3. " + "Go to https://khiops.org for more information." + ) + # On UNIX or Conda, khiops_env is always in path for a proper installation + else: + khiops_env_path = shutil.which("khiops_env") + if khiops_env_path is not None: with subprocess.Popen( - command, + [khiops_env_path, "--env"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, @@ -997,14 +1010,13 @@ def _initialize_khiops_environment(self): else: raise KhiopsEnvironmentError( "The 'khiops_env' script not found. Make sure you have " - "installed khiops >= 10.2.3." + "installed khiops >= 10.2.3. " + "Go to https://khiops.org for more information." ) # Check the tools exist and are executable self._check_tools() - installation_method = _infer_khiops_installation_method() - # Switch to sequential mode if 0 < max_cores < 3 if self.max_cores in (1, 2): warnings.warn( From 018558ca9a016220243c8b544d9b8cb23f491b20 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:19:49 +0200 Subject: [PATCH 07/12] Add the binary status to the library status --- khiops/core/internals/runner.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/khiops/core/internals/runner.py b/khiops/core/internals/runner.py index b60048ff..7262bd28 100644 --- a/khiops/core/internals/runner.py +++ b/khiops/core/internals/runner.py @@ -1133,6 +1133,18 @@ def _build_status_message(self): status_msg += f"temp dir : {khiops_temp_dir_msg}\n" status_msg += f"install type : {install_type_msg}\n" status_msg += f"MPI command : {mpi_command_args_msg}\n" + + # Add output of khiops -s which gives the MODL_* binary status + status_msg += "\n\n" + khiops_executable = os.path.join(os.path.dirname(self.khiops_path), "khiops") + status_msg += f"Khiops executable status (output of '{khiops_executable} -s')\n" + stdout, stderr, return_code = self.raw_run("khiops", ["-s"], use_mpi=True) + + # On success retrieve the status and added to the message + if return_code == 0: + status_msg += stdout + else: + warning_list.append(stderr) status_msg += "\n" return status_msg, warning_list From 7d01a8dd74011c8544179c3890ca1f79aac55b3a Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:11:35 +0200 Subject: [PATCH 08/12] Add Windows integration check CI job --- .github/workflows/unit-tests.yml | 60 ++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index dbf80c9a..bffcedb0 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -2,6 +2,7 @@ name: Unit Tests env: DEFAULT_SAMPLES_REVISION: main + DEFAULT_KHIOPS_DESKTOP_REVISION: 10.2.3-b.3 on: workflow_dispatch: inputs: @@ -11,6 +12,9 @@ on: image-tag: default: latest description: Development Docker Image Tag + khiops-desktop-revision: + default: 10.2.3-b.3 + description: Khiops Windows Desktop Application Version run-long-tests: type: boolean required: false @@ -144,6 +148,62 @@ jobs: tests/resources/dictionary/copy_output_kdic/*.kdic tests/resources/general_options/general_options/*/*._kh retention-days: 7 + check-khiops-integration-on-windows: + runs-on: windows-2019 + steps: + - name: Download the Khiops Desktop NSIS Installer + shell: pwsh + run: | + $KHIOPS_DESKTOP_REVISION = '${{ inputs.khiops-desktop-revision || env.DEFAULT_KHIOPS_DESKTOP_REVISION }}' + $KHIOPS_DOWNLOAD_URL = "https://github.com/KhiopsML/khiops/releases/download/${KHIOPS_DESKTOP_REVISION}/khiops-${KHIOPS_DESKTOP_REVISION}-setup.exe" + Invoke-WebRequest "${KHIOPS_DOWNLOAD_URL}" ` + -OutFile .\khiops-setup.exe ` + -UseBasicParsing; ` + Unblock-File .\khiops-setup.exe + - name: Install the Khiops Desktop Application + shell: pwsh + run: | + # Execute the installer + $ErrorActionPreference = 'Stop' + $ProgressPreference = 'SilentlyContinue' + Start-Process ` + -FilePath .\khiops-setup.exe ` + -ArgumentList '/S' ` + -Wait + - name: Checkout sources + uses: actions/checkout@v4 + with: + # Get Git tags so that versioneer can function correctly + # See issue https://github.com/actions/checkout/issues/701 + fetch-depth: 0 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install khiops-python dev dependencies + shell: pwsh + run: | + # The following git command is required, + # as the Git repository is in a directory the current user does not own, + # Python versioneer fails to compute the current version correctly otherwise + git config --global --add safe.directory $(Resolve-Path '.' | % {$_.toString()}); ` + python setup.py egg_info; ` + Get-Content .\khiops.egg-info\requires.txt ` + | Select-String -Pattern '^\[' -NotMatch ` + | % {$_.Line} ` + | ForEach-Object {pip install $_.toString()}; ` + Remove-Item -r -force khiops.egg-info + - name: Setup and Install Test Requirements + run: pip install -r test-requirements.txt + - name: Test Khiops Integration + shell: pwsh + run: | + # Refresh environment variables by using the Chocolatey tool + # Otherwise, the env vars set in the registry by the Khiops installer do not get updated + # See also https://github.com/actions/runner-images/discussions/6065#discussioncomment-3517318 + Import-Module $env:ChocolateyInstall\helpers\chocolateyProfile.psm1 + refreshenv + python -c "import khiops.core as kh; kh.get_runner().print_status()" check-mpiexec-on-linux: strategy: fail-fast: false From f2887c3968cdbcf73e466ef8535b28a695b353f2 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Wed, 9 Oct 2024 19:12:09 +0200 Subject: [PATCH 09/12] Set KHIOPS_PROC_NUMBER > 2 so that mpiexec gets set to non-empty string --- .github/workflows/unit-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index bffcedb0..5aac3580 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -196,6 +196,8 @@ jobs: - name: Setup and Install Test Requirements run: pip install -r test-requirements.txt - name: Test Khiops Integration + env: + KHIOPS_PROC_NUMBER: 4 shell: pwsh run: | # Refresh environment variables by using the Chocolatey tool From 6942fd9f193398c4d64aa360057222dc2a40a011 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Fri, 4 Oct 2024 15:41:50 +0200 Subject: [PATCH 10/12] Update Conda dependency on khiops-core to 10.2.3b.4 Thus, we ensure a `khiops-core` version that uses the updated `khiops_env` is installed. --- packaging/conda/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/conda/meta.yaml b/packaging/conda/meta.yaml index 06de61a3..1a11492f 100644 --- a/packaging/conda/meta.yaml +++ b/packaging/conda/meta.yaml @@ -26,7 +26,7 @@ requirements: - python run: - python - - khiops-core >=10.2.2b.3,<11.0.0 + - khiops-core >=10.2.3b.4,<11.0.0 - pandas >=0.25.3 - scikit-learn >=0.22.2 run_constrained: From 81acfaeceb978224bd9f753945a810b9bd134968 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Fri, 4 Oct 2024 18:49:48 +0200 Subject: [PATCH 11/12] Add conda-forge to Windows test workflow (to comply with upstream khiops-core) TODO: Revert this just before merging PR https://github.com/KhiopsML/khiops-python/pull/223 --- .github/workflows/conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index 9f26b6be..b0dcd88f 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -95,7 +95,7 @@ jobs: - name: Install the Khiops Conda package (Windows) if: runner.os == 'Windows' run: | - conda install --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION + conda install --channel conda-forge --channel khiops-dev khiops-core=$KHIOPS_CORE_VERSION conda install --override-channels --channel conda-forge --channel ./khiops-conda/ khiops # In Linux/macOS we need the conda-forge channel to install their pinned versions - name: Install the Khiops Conda package (Linux/macOS) From 91ccb464009837990b03bb038813b5bd796798e6 Mon Sep 17 00:00:00 2001 From: Popescu V <136721202+popescu-v@users.noreply.github.com> Date: Tue, 8 Oct 2024 17:37:23 +0200 Subject: [PATCH 12/12] Update default Khiops Core version in the CI for tests to 10.2.3[-]b.4 TODO: Revert after: - building `latest` on `dev` with the proper `10.2.3` Khiops version (once the Khiops 10.2.3 release is done) - merging this PR --- .github/workflows/api-docs.yml | 2 +- .github/workflows/conda.yml | 2 +- .github/workflows/pip.yml | 2 +- .github/workflows/unit-tests.yml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/api-docs.yml b/.github/workflows/api-docs.yml index 62e7aafd..8828bdcd 100644 --- a/.github/workflows/api-docs.yml +++ b/.github/workflows/api-docs.yml @@ -39,7 +39,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || 'latest' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '10.2.3-b.4.with_native_10.2.3-b.4' }} # Use the 'runner' user (1001) from github so checkout actions work properly # https://github.com/actions/runner/issues/2033#issuecomment-1598547465 options: --user 1001 diff --git a/.github/workflows/conda.yml b/.github/workflows/conda.yml index b0dcd88f..030a88b4 100644 --- a/.github/workflows/conda.yml +++ b/.github/workflows/conda.yml @@ -4,7 +4,7 @@ env: DEFAULT_SAMPLES_VERSION: 10.2.0 # Note: The default Khiops version must never be an alpha release as they are # ephemeral. To test alpha versions run the workflow manually. - DEFAULT_KHIOPS_CORE_VERSION: 10.2.2 + DEFAULT_KHIOPS_CORE_VERSION: 10.2.3b.4 on: workflow_dispatch: inputs: diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml index ad11b5c0..3564ff46 100644 --- a/.github/workflows/pip.yml +++ b/.github/workflows/pip.yml @@ -56,7 +56,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || 'latest' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '10.2.3-b.4.with_native_10.2.3-b.4' }} steps: - name: Set parameters as env run: | diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5aac3580..40451d18 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -36,7 +36,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || 'latest' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-ubuntu22.04:${{ inputs.image-tag || '10.2.3-b.4.with_native_10.2.3-b.4' }} credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} @@ -217,7 +217,7 @@ jobs: # because the `env` context is only accessible at the step level; # hence, it is hard-coded image: |- - ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || 'latest' }} + ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.container }}:${{ inputs.image-tag || '10.2.3-b.4.with_native_10.2.3-b.4' }} credentials: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }}