diff --git a/pyproject.toml b/pyproject.toml index 4253b1dff5f..07948240e91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -476,5 +476,7 @@ module = [ "prodigy.components.db.*", "transformers.*", "langchain_community.*", + "vllm.*", + "numba.*", ] ignore_missing_imports = true diff --git a/src/zenml/integrations/evidently/__init__.py b/src/zenml/integrations/evidently/__init__.py index a87ee13b747..0d79edbfe0c 100644 --- a/src/zenml/integrations/evidently/__init__.py +++ b/src/zenml/integrations/evidently/__init__.py @@ -33,7 +33,7 @@ # Fix numba errors in Docker and suppress logs and deprecation warning spam try: - from numba.core.errors import ( # type: ignore[import-not-found] + from numba.core.errors import ( NumbaDeprecationWarning, NumbaPendingDeprecationWarning, ) diff --git a/src/zenml/integrations/vllm/services/vllm_deployment.py b/src/zenml/integrations/vllm/services/vllm_deployment.py index d33eb22061e..d86cb3d1bbd 100644 --- a/src/zenml/integrations/vllm/services/vllm_deployment.py +++ b/src/zenml/integrations/vllm/services/vllm_deployment.py @@ -13,6 +13,7 @@ # permissions and limitations under the License. """Implementation of the vLLM Inference Server Service.""" +import argparse import os from typing import Any, List, Optional, Union @@ -137,15 +138,23 @@ def run(self) -> None: self.endpoint.prepare_for_start() import uvloop - from vllm.entrypoints.openai.api_server import run_server - from vllm.entrypoints.openai.cli_args import make_arg_parser - from vllm.utils import FlexibleArgumentParser + from vllm.entrypoints.openai.api_server import ( + run_server, + ) + from vllm.entrypoints.openai.cli_args import ( + make_arg_parser, + ) + from vllm.utils import ( + FlexibleArgumentParser, + ) try: - parser = make_arg_parser(FlexibleArgumentParser()) - args = parser.parse_args() + parser: argparse.ArgumentParser = make_arg_parser( + FlexibleArgumentParser() + ) + args: argparse.Namespace = parser.parse_args() # Override port with the available port - self.config.port = self.endpoint.status.port + self.config.port = self.endpoint.status.port or self.config.port # Update the arguments in place args.__dict__.update(self.config.model_dump()) uvloop.run(run_server(args=args)) @@ -161,7 +170,7 @@ def prediction_url(self) -> Optional[str]: """ if not self.is_running: return None - return self.endpoint.prediction_url_path + return self.endpoint.prediction_url def predict(self, data: "Any") -> "Any": """Make a prediction using the service.