File tree Expand file tree Collapse file tree 4 files changed +451
-383
lines changed Expand file tree Collapse file tree 4 files changed +451
-383
lines changed Original file line number Diff line number Diff line change
1
+ import os
2
+
1
3
from gptqmodel import GPTQModel , QuantizeConfig
2
4
from transformers import AutoTokenizer
3
5
6
+ os .environ ["CUDA_DEVICE_ORDER" ] = "PCI_BUS_ID"
7
+
4
8
pretrained_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
5
9
quantized_model_id = "TinyLlama-1.1B-Chat-v1.0-4bit-128g"
6
10
Original file line number Diff line number Diff line change @@ -127,7 +127,6 @@ def from_quantized(
127
127
device : Optional [Union [str , int ]] = None ,
128
128
backend : BACKEND = BACKEND .AUTO ,
129
129
quantize_config : Optional [QuantizeConfig | Dict ] = None ,
130
- model_basename : Optional [str ] = None ,
131
130
use_safetensors : bool = True ,
132
131
trust_remote_code : bool = False ,
133
132
# verify weight files matches predefined hash during loading
@@ -146,7 +145,6 @@ def from_quantized(
146
145
device = device ,
147
146
backend = backend ,
148
147
quantize_config = quantize_config ,
149
- model_basename = model_basename ,
150
148
use_safetensors = use_safetensors ,
151
149
trust_remote_code = trust_remote_code ,
152
150
verify_hash = verify_hash ,
You can’t perform that action at this time.
0 commit comments