Skip to content

chore(autogptq): drop archived backend #5214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ updates:
schedule:
# Check for updates to GitHub Actions every weekday
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/autogptq"
schedule:
interval: "weekly"
- package-ecosystem: "pip"
directory: "/backend/python/bark"
schedule:
Expand Down
5 changes: 1 addition & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ ARG TARGETARCH
ARG TARGETVARIANT

ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"

RUN apt-get update && \
apt-get install -y --no-install-recommends \
Expand Down Expand Up @@ -431,9 +431,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/vllm \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "autogptq" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/autogptq \
; fi && \
if [[ ( "${EXTRA_BACKENDS}" =~ "bark" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
make -C backend/python/bark \
; fi && \
Expand Down
13 changes: 2 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -505,18 +505,10 @@ protogen-go-clean:
$(RM) bin/*

.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen

.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean

.PHONY: autogptq-protogen
autogptq-protogen:
$(MAKE) -C backend/python/autogptq protogen

.PHONY: autogptq-protogen-clean
autogptq-protogen-clean:
$(MAKE) -C backend/python/autogptq protogen-clean
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean

.PHONY: bark-protogen
bark-protogen:
Expand Down Expand Up @@ -593,7 +585,6 @@ vllm-protogen-clean:
## GRPC
# Note: it is duplicated in the Dockerfile
prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/autogptq
$(MAKE) -C backend/python/bark
$(MAKE) -C backend/python/coqui
$(MAKE) -C backend/python/diffusers
Expand Down
6 changes: 1 addition & 5 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,7 @@ message ModelOptions {
int32 NGQA = 20;
string ModelFile = 21;

// AutoGPTQ
string Device = 22;
bool UseTriton = 23;
string ModelBaseName = 24;
bool UseFastTokenizer = 25;


// Diffusers
string PipelineType = 26;
Expand Down
17 changes: 0 additions & 17 deletions backend/python/autogptq/Makefile

This file was deleted.

5 changes: 0 additions & 5 deletions backend/python/autogptq/README.md

This file was deleted.

158 changes: 0 additions & 158 deletions backend/python/autogptq/backend.py

This file was deleted.

14 changes: 0 additions & 14 deletions backend/python/autogptq/install.sh

This file was deleted.

2 changes: 0 additions & 2 deletions backend/python/autogptq/requirements-cublas11.txt

This file was deleted.

1 change: 0 additions & 1 deletion backend/python/autogptq/requirements-cublas12.txt

This file was deleted.

2 changes: 0 additions & 2 deletions backend/python/autogptq/requirements-hipblas.txt

This file was deleted.

6 changes: 0 additions & 6 deletions backend/python/autogptq/requirements-intel.txt

This file was deleted.

6 changes: 0 additions & 6 deletions backend/python/autogptq/requirements.txt

This file was deleted.

4 changes: 0 additions & 4 deletions backend/python/autogptq/run.sh

This file was deleted.

6 changes: 0 additions & 6 deletions backend/python/autogptq/test.sh

This file was deleted.

5 changes: 0 additions & 5 deletions core/backend/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,6 @@ func grpcModelOpts(c config.BackendConfig) *pb.ModelOptions {
MainGPU: c.MainGPU,
Threads: int32(*c.Threads),
TensorSplit: c.TensorSplit,
// AutoGPTQ
ModelBaseName: c.AutoGPTQ.ModelBaseName,
Device: c.AutoGPTQ.Device,
UseTriton: c.AutoGPTQ.Triton,
UseFastTokenizer: c.AutoGPTQ.UseFastTokenizer,
// RWKV
Tokenizer: c.Tokenizer,
}
Expand Down
11 changes: 0 additions & 11 deletions core/config/backend_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ type BackendConfig struct {
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig `yaml:",inline"`

// AutoGPTQ specifics
AutoGPTQ AutoGPTQ `yaml:"autogptq"`

// Diffusers
Diffusers Diffusers `yaml:"diffusers"`
Step int `yaml:"step"`
Expand Down Expand Up @@ -176,14 +173,6 @@ type LimitMMPerPrompt struct {
LimitAudioPerPrompt int `yaml:"audio"`
}

// AutoGPTQ is a struct that holds the configuration specific to the AutoGPTQ backend
type AutoGPTQ struct {
ModelBaseName string `yaml:"model_base_name"`
Device string `yaml:"device"`
Triton bool `yaml:"triton"`
UseFastTokenizer bool `yaml:"use_fast_tokenizer"`
}

// TemplateConfig is a struct that holds the configuration of the templating system
type TemplateConfig struct {
// Chat is the template used in the chat completion endpoint
Expand Down
8 changes: 0 additions & 8 deletions core/http/middleware/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,10 @@ func mergeOpenAIRequestAndBackendConfig(config *config.BackendConfig, input *sch
config.Diffusers.ClipSkip = input.ClipSkip
}

if input.ModelBaseName != "" {
config.AutoGPTQ.ModelBaseName = input.ModelBaseName
}

if input.NegativePromptScale != 0 {
config.NegativePromptScale = input.NegativePromptScale
}

if input.UseFastTokenizer {
config.UseFastTokenizer = input.UseFastTokenizer
}

if input.NegativePrompt != "" {
config.NegativePrompt = input.NegativePrompt
}
Expand Down
1 change: 0 additions & 1 deletion core/schema/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ type OpenAIRequest struct {

Backend string `json:"backend" yaml:"backend"`

// AutoGPTQ
ModelBaseName string `json:"model_base_name" yaml:"model_base_name"`
}

Expand Down
2 changes: 0 additions & 2 deletions core/schema/prediction.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ type PredictionOptions struct {
RopeFreqBase float32 `json:"rope_freq_base" yaml:"rope_freq_base"`
RopeFreqScale float32 `json:"rope_freq_scale" yaml:"rope_freq_scale"`
NegativePromptScale float32 `json:"negative_prompt_scale" yaml:"negative_prompt_scale"`
// AutoGPTQ
UseFastTokenizer bool `json:"use_fast_tokenizer" yaml:"use_fast_tokenizer"`

// Diffusers
ClipSkip int `json:"clip_skip" yaml:"clip_skip"`
Expand Down
8 changes: 0 additions & 8 deletions docs/content/docs/advanced/advanced-usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,6 @@ yarn_ext_factor: 0
yarn_attn_factor: 0
yarn_beta_fast: 0
yarn_beta_slow: 0

# AutoGPT-Q settings, for configurations specific to GPT models.
autogptq:
model_base_name: "" # Base name of the model.
device: "" # Device to run the model on.
triton: false # Whether to use Triton Inference Server.
use_fast_tokenizer: false # Whether to use a fast tokenizer for quicker processing.

# configuration for diffusers model
diffusers:
cuda: false # Whether to use CUDA
Expand Down
Loading
Loading