Skip to content

Commit f63b3b8

Browse files
[SECURITY] drop support of loading unsafe .bin weights (#460)
* remove .bin file * delete allow_unsafe_loading variable * add file extension * Update base.py * add test_pt unit test * add .pth extension * clean code * modify test_pt unit test --------- Co-authored-by: Qubitium-ModelCloud <[email protected]>
1 parent 3762668 commit f63b3b8

File tree

3 files changed

+44
-12
lines changed

3 files changed

+44
-12
lines changed

gptqmodel/models/base.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,8 @@ def save_quantized(
691691
state_dict = {k: v.clone().contiguous() for k, v in state_dict.items()}
692692
model_save_name = model_base_name + ".safetensors"
693693
else:
694-
model_save_name = model_base_name + ".bin"
694+
model_save_name = model_base_name + ".pt"
695+
695696
if not self.qlinear_kernel.SUPPORTS_SHARDS and max_shard_size is not None:
696697
logger.warning("Sharding is not supported for this quant. Disabling sharding.")
697698
max_shard_size = None
@@ -1106,7 +1107,6 @@ def from_quantized(
11061107
use_safetensors: bool = True,
11071108
trust_remote_code: bool = False,
11081109
format: Optional[FORMAT] = None,
1109-
allow_unsafe_loading: bool = False,
11101110
verify_hash: Optional[Union[str, List[str]]] = None,
11111111
**kwargs,
11121112
):
@@ -1247,7 +1247,7 @@ def from_quantized(
12471247
if use_safetensors:
12481248
extensions.append(".safetensors")
12491249
else:
1250-
extensions += [".bin", ".pt"]
1250+
extensions += [".pt", ".pth"]
12511251

12521252
model_name_or_path = str(model_name_or_path)
12531253

@@ -1260,14 +1260,9 @@ def from_quantized(
12601260

12611261
# bin files have security issues: disable loading by default
12621262
if ".bin" in resolved_archive_file:
1263-
if allow_unsafe_loading:
1264-
logger.warning(
1265-
"There are security risks when loading tensors from .bin files. Make sure you are loading model only from a trusted source."
1266-
)
1267-
else:
1268-
raise ValueError(
1269-
"Loading of unsafe .bin files are not allowed by default. Pass allow_unsafe_loading=True to bypass."
1270-
)
1263+
raise ValueError(
1264+
"Loading of .bin files are not allowed due to safety. Please convert your model to safetensor or pytorch format."
1265+
)
12711266

12721267
quantize_config.runtime_format = quantize_config.format
12731268

gptqmodel/utils/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ def get_checkpoints(
543543
model_name_or_path: str, extensions: List[str], **cached_file_kwargs
544544
):
545545
"""
546-
Retrives (and if necessary downloads from Hugging Face Hub) the model checkpoint. Sharding is supported. All the `possible_model_basenames` (e.g. `["model", "model-4bit-gptq"]`) will be explored over all `extensions` (e.g. `[".bin", ".safetensors"]`).
546+
Retrives (and if necessary downloads from Hugging Face Hub) the model checkpoint. Sharding is supported. All the `possible_model_basenames` (e.g. `["model", "model-4bit-gptq"]`) will be explored over all `extensions` (e.g. `[".safetensors"]`).
547547
"""
548548
searched_files = []
549549
resolved_archive_file = None

tests/test_pt.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import torch
2+
import unittest
3+
4+
from transformers import AutoTokenizer
5+
6+
from gptqmodel import GPTQModel, QuantizeConfig
7+
8+
pretrained_model_id = "facebook/opt-125m"
9+
quantized_model_id = "facebook-opt-125m"
10+
11+
class Test_save_load_pt_weight(unittest.TestCase):
12+
def test_pt(self):
13+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_id, use_fast=True)
14+
calibration_dataset = [
15+
tokenizer(
16+
"gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."
17+
)
18+
]
19+
20+
reference_output = "</s>gptqmodel is an easy-to-use model for creating a variety of a variety"
21+
22+
quantize_config = QuantizeConfig(
23+
bits=4,
24+
group_size=128,
25+
)
26+
27+
model = GPTQModel.from_pretrained(pretrained_model_id, quantize_config)
28+
29+
model.quantize(calibration_dataset)
30+
31+
model.save_quantized(quantized_model_id, use_safetensors=False)
32+
33+
model = GPTQModel.from_quantized(quantized_model_id, device="cuda:0", use_safetensors=False)
34+
35+
result = tokenizer.decode(model.generate(**tokenizer("gptqmodel is an easy-to-use model", return_tensors="pt").to(model.device))[0])
36+
37+
self.assertEqual(result, reference_output)

0 commit comments

Comments
 (0)