ModelCloud · Qubitium · Mar 11, 2025 · Mar 11, 2025 · Mar 11, 2025
diff --git a/gptqmodel/adapter/adapter.py b/gptqmodel/adapter/adapter.py
@@ -45,7 +45,7 @@ def remove(cls, path):
 class Adapter():
     def __init__(self, rank: int = None, path: str = None):
         self.rank = rank # rank may be zero, when loading, and rank will be re-populated by loading saved LoraConfig file
-        self.path = path.lower().strip() if isinstance(path, str) else path
+        self.path = path.strip() if isinstance(path, str) else path
 
     def validate_path(self, local=False):
         if not self.path or not isinstance(self.path, str):

diff --git a/gptqmodel/nn_modules/qlinear/__init__.py b/gptqmodel/nn_modules/qlinear/__init__.py
@@ -199,7 +199,7 @@ def post_init(self):
         if self.adapter is not None:
             self.adapter.post_init(
                 weight_key=self.name,
-                device=self.qweight.device,
+                device=next(self.parameters()).device,
                 lora_A=getattr(self, "lora_A", None),
                 lora_B=getattr(self, "lora_B", None))
 

diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py
@@ -535,7 +535,7 @@ def convert_gptq_v2_to_v1_format(
         for _, submodule in model.named_modules():
             # sym=False has underflow probability of ~<=13% during testing. No underflow possible for sym=True.
             if isinstance(submodule, qlinear_kernel):
-                convert_gptq_v2_to_v1_format_module(module=submodule, cfg=cfg)
+                convert_gptq_v2_to_v1_format_module(module=submodule, quantize_config=quantize_config)
 
     return model
 

diff --git a/tests/test_quant_and_eora.py b/tests/test_quant_and_eora.py
@@ -16,6 +16,10 @@
 # -- do not touch
 import os
 
+from parameterized import parameterized
+
+from gptqmodel.quantization import QUANT_METHOD, FORMAT
+
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 # -- end do not touch
 
@@ -45,7 +49,13 @@ class Test(ModelTest):
     def setUpClass(cls):
         pass
 
-    def test_quant_and_eora(self):
+    @parameterized.expand(
+        [
+            # (QUANT_METHOD.GPTQ, FORMAT.GPTQ),
+            (QUANT_METHOD.QQQ, FORMAT.QQQ),
+        ]
+    )
+    def test_quant_and_eora(self, quant_method: QUANT_METHOD, format: FORMAT):
         bits = 4
         group_size = 128
         desc_act = True
@@ -91,6 +101,8 @@ def test_quant_and_eora(self):
                 group_size=group_size,
                 desc_act=desc_act,  # bitblas only supports DESC_ACT=False
                 adapter=eora,
+                format=format,
+                quant_method=quant_method,
             )
 
             model = GPTQModel.load(