ModelCloud · Qubitium · Dec 6, 2024 · Dec 6, 2024
diff --git a/gptqmodel/nn_modules/qlinear/dynamic_cuda.py b/gptqmodel/nn_modules/qlinear/dynamic_cuda.py
@@ -8,8 +8,13 @@
 
 logger = setup_logger()
 
-import gptqmodel_cuda_64  # noqa: E402
-import gptqmodel_cuda_256  # noqa: E402
+
+gptqmodel_cuda_import_exception = None
+try:
+    import gptqmodel_cuda_64  # noqa: E402
+    import gptqmodel_cuda_256  # noqa: E402
+except ImportError as e:
+    gptqmodel_cuda_import_exception = e
 
 
 class DynamicCudaQuantLinear(TorchQuantLinear):
@@ -40,6 +45,10 @@ def __init__(
             kernel_switch_threshold=128,
             **kwargs,
     ):
+        if gptqmodel_cuda_import_exception is not None:
+            raise ValueError(
+                f"Trying to use the cuda backend, but could not import the C++/CUDA dependencies with the following error: {gptqmodel_cuda_import_exception}"
+            )
         super().__init__(bits=bits, group_size=group_size, sym=sym, desc_act=desc_act, infeatures=infeatures,
                          outfeatures=outfeatures, bias=bias, weight_dtype=weight_dtype, **kwargs)
 

diff --git a/gptqmodel/utils/importer.py b/gptqmodel/utils/importer.py
@@ -104,17 +104,23 @@ def select_quant_linear(
         allow_backends = format_dict[format]
         allow_quant_linears = backend_dict
         err = None
+        # Suppose all quant linears in the model should have the same backend.
+        has_logged = False
         for k, v in allow_quant_linears.items():
             in_allow_backends = k in allow_backends
             validate, err = v.validate(bits, group_size, desc_act, sym, dynamic=dynamic, device=device, trainable=trainable)
             if in_allow_backends and validate:
                 if pack:
                     check_pack_func = hasattr(v, "pack")
                     if check_pack_func:
-                        logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
+                        if not has_logged:
+                            logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
+                            has_logged = True
                         return v
                 else:
-                    logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
+                    if not has_logged:
+                        logger.info(f"Auto choose the fastest one based on quant model compatibility: {v}")
+                        has_logged = True
                     return v
 
         if err: