diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index 12eacac1d..a45338fd7 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -4,47 +4,47 @@ from ..utils import BACKEND from ..utils.model import check_and_get_model_type -from .baichuan import BaiChuanGPTQ +from .definitions.baichuan import BaiChuanGPTQ from .base import BaseGPTQModel, QuantizeConfig -from .bloom import BloomGPTQ -from .chatglm import ChatGLM -from .codegen import CodeGenGPTQ -from .cohere import CohereGPTQ -from .dbrx import DbrxGPTQ -from .dbrx_converted import DbrxConvertedGPTQ -from .decilm import DeciLMGPTQ -from .deepseek_v2 import DeepSeekV2GPTQ -from .exaone import ExaoneGPTQ -from .gemma import GemmaGPTQ -from .gemma2 import Gemma2GPTQ -from .gpt2 import GPT2GPTQ -from .gpt_bigcode import GPTBigCodeGPTQ -from .gpt_neox import GPTNeoXGPTQ -from .gptj import GPTJGPTQ -from .granite import GraniteGPTQ -from .grinmoe import GrinMOEGPTQ -from .internlm import InternLMGPTQ -from .internlm2 import InternLM2GPTQ -from .llama import LlamaGPTQ -from .longllama import LongLlamaGPTQ -from .minicpm import MiniCPMGPTQ -from .minicpm3 import MiniCPM3GPTQ -from .mistral import MistralGPTQ -from .mixtral import MixtralGPTQ -from .mllama import MLlamaGPTQ -from .moss import MOSSGPTQ -from .mpt import MPTGPTQ -from .opt import OPTGPTQ -from .phi import PhiGPTQ -from .phi3 import Phi3GPTQ -from .qwen import QwenGPTQ -from .qwen2 import Qwen2GPTQ -from .qwen2_moe import Qwen2MoeGPTQ -from .rw import RWGPTQ -from .stablelmepoch import StableLMEpochGPTQ -from .starcoder2 import Starcoder2GPTQ -from .xverse import XverseGPTQ -from .yi import YiGPTQ +from .definitions.bloom import BloomGPTQ +from .definitions.chatglm import ChatGLM +from .definitions.codegen import CodeGenGPTQ +from .definitions.cohere import CohereGPTQ +from .definitions.dbrx import DbrxGPTQ +from .definitions.dbrx_converted import DbrxConvertedGPTQ +from .definitions.decilm import DeciLMGPTQ +from .definitions.deepseek_v2 import DeepSeekV2GPTQ +from .definitions.exaone import ExaoneGPTQ +from .definitions.gemma import GemmaGPTQ +from .definitions.gemma2 import Gemma2GPTQ +from .definitions.gpt2 import GPT2GPTQ +from .definitions.gpt_bigcode import GPTBigCodeGPTQ +from .definitions.gpt_neox import GPTNeoXGPTQ +from .definitions.gptj import GPTJGPTQ +from .definitions.granite import GraniteGPTQ +from .definitions.grinmoe import GrinMOEGPTQ +from .definitions.internlm import InternLMGPTQ +from .definitions.internlm2 import InternLM2GPTQ +from .definitions.llama import LlamaGPTQ +from .definitions.longllama import LongLlamaGPTQ +from .definitions.minicpm import MiniCPMGPTQ +from .definitions.minicpm3 import MiniCPM3GPTQ +from .definitions.mistral import MistralGPTQ +from .definitions.mixtral import MixtralGPTQ +from .definitions.mllama import MLlamaGPTQ +from .definitions.moss import MOSSGPTQ +from .definitions.mpt import MPTGPTQ +from .definitions.opt import OPTGPTQ +from .definitions.phi import PhiGPTQ +from .definitions.phi3 import Phi3GPTQ +from .definitions.qwen import QwenGPTQ +from .definitions.qwen2 import Qwen2GPTQ +from .definitions.qwen2_moe import Qwen2MoeGPTQ +from .definitions.rw import RWGPTQ +from .definitions.stablelmepoch import StableLMEpochGPTQ +from .definitions.starcoder2 import Starcoder2GPTQ +from .definitions.xverse import XverseGPTQ +from .definitions.yi import YiGPTQ MODEL_MAP = { "bloom": BloomGPTQ, diff --git a/gptqmodel/models/definitions/baichuan.py b/gptqmodel/models/definitions/baichuan.py index 3719fc1e9..54f8ae14d 100644 --- a/gptqmodel/models/definitions/baichuan.py +++ b/gptqmodel/models/definitions/baichuan.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class BaiChuanGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/bloom.py b/gptqmodel/models/definitions/bloom.py index 85712bd81..d5ba23093 100644 --- a/gptqmodel/models/definitions/bloom.py +++ b/gptqmodel/models/definitions/bloom.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class BloomGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/chatglm.py b/gptqmodel/models/definitions/chatglm.py index 50d61767a..43b0285eb 100644 --- a/gptqmodel/models/definitions/chatglm.py +++ b/gptqmodel/models/definitions/chatglm.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class ChatGLM(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/codegen.py b/gptqmodel/models/definitions/codegen.py index 0d167aca8..55f6622dd 100644 --- a/gptqmodel/models/definitions/codegen.py +++ b/gptqmodel/models/definitions/codegen.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class CodeGenGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/cohere.py b/gptqmodel/models/definitions/cohere.py index bc1d2a280..75b14b44c 100644 --- a/gptqmodel/models/definitions/cohere.py +++ b/gptqmodel/models/definitions/cohere.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class CohereGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/dbrx.py b/gptqmodel/models/definitions/dbrx.py index bf206e767..1701da545 100644 --- a/gptqmodel/models/definitions/dbrx.py +++ b/gptqmodel/models/definitions/dbrx.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel # placer=holder only as dbrx original models are not supported diff --git a/gptqmodel/models/definitions/dbrx_converted.py b/gptqmodel/models/definitions/dbrx_converted.py index 5eb3f488f..a45aac4d5 100644 --- a/gptqmodel/models/definitions/dbrx_converted.py +++ b/gptqmodel/models/definitions/dbrx_converted.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class DbrxConvertedGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/decilm.py b/gptqmodel/models/definitions/decilm.py index 416be21e0..2eb17156b 100644 --- a/gptqmodel/models/definitions/decilm.py +++ b/gptqmodel/models/definitions/decilm.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class DeciLMGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/deepseek_v2.py b/gptqmodel/models/definitions/deepseek_v2.py index 9f9dbb8d3..915a080fe 100644 --- a/gptqmodel/models/definitions/deepseek_v2.py +++ b/gptqmodel/models/definitions/deepseek_v2.py @@ -1,5 +1,5 @@ -from ._const import EXPERT_INDEX_PLACEHOLDER -from .base import BaseGPTQModel +from .._const import EXPERT_INDEX_PLACEHOLDER +from ..base import BaseGPTQModel # Both DeepSeek-v2 and DeepSeek-v2-lite are supported in this model def diff --git a/gptqmodel/models/definitions/exaone.py b/gptqmodel/models/definitions/exaone.py index 014e176bb..669f19bb7 100644 --- a/gptqmodel/models/definitions/exaone.py +++ b/gptqmodel/models/definitions/exaone.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class ExaoneGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/gemma.py b/gptqmodel/models/definitions/gemma.py index 379c553fe..de38bd544 100644 --- a/gptqmodel/models/definitions/gemma.py +++ b/gptqmodel/models/definitions/gemma.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GemmaGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/gemma2.py b/gptqmodel/models/definitions/gemma2.py index bef7af1d6..ede434ee0 100644 --- a/gptqmodel/models/definitions/gemma2.py +++ b/gptqmodel/models/definitions/gemma2.py @@ -1,8 +1,8 @@ import logging from logging import getLogger -from ..utils import BACKEND -from .base import BaseGPTQModel +from ...utils import BACKEND +from ..base import BaseGPTQModel logger = getLogger(__name__) handler = logging.StreamHandler() @@ -38,13 +38,13 @@ def __init__(self, *args, **kwargs): return # quantized gemma-2 27b model only support vLLM/SGLang load. - from ..utils.vllm import VLLM_AVAILABLE + from ...utils.vllm import VLLM_AVAILABLE if VLLM_AVAILABLE: from vllm import LLM if isinstance(self.model, LLM): backend = BACKEND.VLLM - from ..utils.sglang import SGLANG_AVAILABLE + from ...utils.sglang import SGLANG_AVAILABLE if SGLANG_AVAILABLE: from sglang.srt.server import Runtime if isinstance(self.model, Runtime): diff --git a/gptqmodel/models/definitions/gpt2.py b/gptqmodel/models/definitions/gpt2.py index ebd1b92be..e50a479b7 100644 --- a/gptqmodel/models/definitions/gpt2.py +++ b/gptqmodel/models/definitions/gpt2.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GPT2GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/gpt_bigcode.py b/gptqmodel/models/definitions/gpt_bigcode.py index 11e6ab2bd..89330b587 100644 --- a/gptqmodel/models/definitions/gpt_bigcode.py +++ b/gptqmodel/models/definitions/gpt_bigcode.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GPTBigCodeGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/gpt_neox.py b/gptqmodel/models/definitions/gpt_neox.py index 2805684b2..1a6ae71da 100644 --- a/gptqmodel/models/definitions/gpt_neox.py +++ b/gptqmodel/models/definitions/gpt_neox.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GPTNeoXGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/gptj.py b/gptqmodel/models/definitions/gptj.py index 0d46ea0d6..125f7f3d2 100644 --- a/gptqmodel/models/definitions/gptj.py +++ b/gptqmodel/models/definitions/gptj.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GPTJGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/granite.py b/gptqmodel/models/definitions/granite.py index 8bcbd91db..a8072718b 100644 --- a/gptqmodel/models/definitions/granite.py +++ b/gptqmodel/models/definitions/granite.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class GraniteGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/grinmoe.py b/gptqmodel/models/definitions/grinmoe.py index 694b2021b..65df7c245 100644 --- a/gptqmodel/models/definitions/grinmoe.py +++ b/gptqmodel/models/definitions/grinmoe.py @@ -1,5 +1,5 @@ -from ._const import EXPERT_INDEX_PLACEHOLDER -from .base import BaseGPTQModel +from .._const import EXPERT_INDEX_PLACEHOLDER +from ..base import BaseGPTQModel class GrinMOEGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/internlm.py b/gptqmodel/models/definitions/internlm.py index bb5d1cc4d..cab84f969 100644 --- a/gptqmodel/models/definitions/internlm.py +++ b/gptqmodel/models/definitions/internlm.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class InternLMGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/internlm2.py b/gptqmodel/models/definitions/internlm2.py index 41ec20042..a0e824043 100644 --- a/gptqmodel/models/definitions/internlm2.py +++ b/gptqmodel/models/definitions/internlm2.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class InternLM2GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/llama.py b/gptqmodel/models/definitions/llama.py index 390b2cdc7..83f2bde5a 100644 --- a/gptqmodel/models/definitions/llama.py +++ b/gptqmodel/models/definitions/llama.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class LlamaGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/longllama.py b/gptqmodel/models/definitions/longllama.py index 5bdf1796d..c1c76941a 100644 --- a/gptqmodel/models/definitions/longllama.py +++ b/gptqmodel/models/definitions/longllama.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class LongLlamaGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/minicpm.py b/gptqmodel/models/definitions/minicpm.py index 0d965a8af..3750025de 100644 --- a/gptqmodel/models/definitions/minicpm.py +++ b/gptqmodel/models/definitions/minicpm.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MiniCPMGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/minicpm3.py b/gptqmodel/models/definitions/minicpm3.py index 5977fc1b8..0332c1f08 100644 --- a/gptqmodel/models/definitions/minicpm3.py +++ b/gptqmodel/models/definitions/minicpm3.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MiniCPM3GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/mistral.py b/gptqmodel/models/definitions/mistral.py index a740ffec0..54e4bdd54 100644 --- a/gptqmodel/models/definitions/mistral.py +++ b/gptqmodel/models/definitions/mistral.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MistralGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/mixtral.py b/gptqmodel/models/definitions/mixtral.py index 91a3ff69c..a45d4f3c1 100644 --- a/gptqmodel/models/definitions/mixtral.py +++ b/gptqmodel/models/definitions/mixtral.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MixtralGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/mllama.py b/gptqmodel/models/definitions/mllama.py index 6aa1843e6..d27c78396 100644 --- a/gptqmodel/models/definitions/mllama.py +++ b/gptqmodel/models/definitions/mllama.py @@ -1,6 +1,6 @@ from transformers import AutoModelForPreTraining -from .base import BaseGPTQModel +from ..base import BaseGPTQModel # TODO FIXME: we currently do not support quantizing cross attention layer (pixel_values) diff --git a/gptqmodel/models/definitions/moss.py b/gptqmodel/models/definitions/moss.py index a97aab2ca..b4b59c639 100644 --- a/gptqmodel/models/definitions/moss.py +++ b/gptqmodel/models/definitions/moss.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MOSSGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/mpt.py b/gptqmodel/models/definitions/mpt.py index 8e60e5198..15aa1df48 100644 --- a/gptqmodel/models/definitions/mpt.py +++ b/gptqmodel/models/definitions/mpt.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class MPTGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/opt.py b/gptqmodel/models/definitions/opt.py index 1cbf407e4..0af97fb76 100644 --- a/gptqmodel/models/definitions/opt.py +++ b/gptqmodel/models/definitions/opt.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class OPTGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/phi.py b/gptqmodel/models/definitions/phi.py index f417131c3..31c30d44f 100644 --- a/gptqmodel/models/definitions/phi.py +++ b/gptqmodel/models/definitions/phi.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class PhiGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py index 9c3cb20fb..a77a1e9b7 100644 --- a/gptqmodel/models/definitions/phi3.py +++ b/gptqmodel/models/definitions/phi3.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class Phi3GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/qwen.py b/gptqmodel/models/definitions/qwen.py index 1dda67180..c6ec6c496 100644 --- a/gptqmodel/models/definitions/qwen.py +++ b/gptqmodel/models/definitions/qwen.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class QwenGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/qwen2.py b/gptqmodel/models/definitions/qwen2.py index e006d486e..be1ad1fbb 100644 --- a/gptqmodel/models/definitions/qwen2.py +++ b/gptqmodel/models/definitions/qwen2.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class Qwen2GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/qwen2_moe.py b/gptqmodel/models/definitions/qwen2_moe.py index 08b2934f5..2900128d8 100644 --- a/gptqmodel/models/definitions/qwen2_moe.py +++ b/gptqmodel/models/definitions/qwen2_moe.py @@ -1,5 +1,5 @@ -from ._const import EXPERT_INDEX_PLACEHOLDER -from .base import BaseGPTQModel +from .._const import EXPERT_INDEX_PLACEHOLDER +from ..base import BaseGPTQModel class Qwen2MoeGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/rw.py b/gptqmodel/models/definitions/rw.py index ad624a0b3..1a77d60ba 100644 --- a/gptqmodel/models/definitions/rw.py +++ b/gptqmodel/models/definitions/rw.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class RWGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/stablelmepoch.py b/gptqmodel/models/definitions/stablelmepoch.py index 8aab61bae..2ad7dd80f 100644 --- a/gptqmodel/models/definitions/stablelmepoch.py +++ b/gptqmodel/models/definitions/stablelmepoch.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class StableLMEpochGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/starcoder2.py b/gptqmodel/models/definitions/starcoder2.py index 7af620fd1..3ab375f99 100644 --- a/gptqmodel/models/definitions/starcoder2.py +++ b/gptqmodel/models/definitions/starcoder2.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class Starcoder2GPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/xverse.py b/gptqmodel/models/definitions/xverse.py index 9133008e5..b6d6bdbda 100644 --- a/gptqmodel/models/definitions/xverse.py +++ b/gptqmodel/models/definitions/xverse.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class XverseGPTQ(BaseGPTQModel): diff --git a/gptqmodel/models/definitions/yi.py b/gptqmodel/models/definitions/yi.py index 1f2cc51b9..b4252263b 100644 --- a/gptqmodel/models/definitions/yi.py +++ b/gptqmodel/models/definitions/yi.py @@ -1,4 +1,4 @@ -from .base import BaseGPTQModel +from ..base import BaseGPTQModel class YiGPTQ(BaseGPTQModel):