Skip to content

Commit 88e2a3a

Browse files
Split base.py file (#465)
* move from_quantize func to Loader.py * clean code * clean code * remove unused code
1 parent 10b9949 commit 88e2a3a

File tree

4 files changed

+451
-383
lines changed

4 files changed

+451
-383
lines changed

examples/quantization/basic_usage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
import os
2+
13
from gptqmodel import GPTQModel, QuantizeConfig
24
from transformers import AutoTokenizer
35

6+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
7+
48
pretrained_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
59
quantized_model_id = "TinyLlama-1.1B-Chat-v1.0-4bit-128g"
610

gptqmodel/models/auto.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ def from_quantized(
127127
device: Optional[Union[str, int]] = None,
128128
backend: BACKEND = BACKEND.AUTO,
129129
quantize_config: Optional[QuantizeConfig | Dict] = None,
130-
model_basename: Optional[str] = None,
131130
use_safetensors: bool = True,
132131
trust_remote_code: bool = False,
133132
# verify weight files matches predefined hash during loading
@@ -146,7 +145,6 @@ def from_quantized(
146145
device=device,
147146
backend=backend,
148147
quantize_config=quantize_config,
149-
model_basename=model_basename,
150148
use_safetensors=use_safetensors,
151149
trust_remote_code=trust_remote_code,
152150
verify_hash=verify_hash,

0 commit comments

Comments
 (0)