Skip to content

Commit 44cca1c

Browse files
authored
build(docker): switch from pip to uv for package management (#3229)
- Upgrade base image from Python 3.10 to Python 3.11 slim on Debian Bullseye. - Replace manual PyTorch and dependencies installation with uv environment management and installation. - Add installation of large dependencies (torch, triton, tensorflow, onnxruntime-gpu) explicitly to optimize image layer caching. - Switch from pip to uv for package management and dependency synchronization. - Replace pillow with pillow-simd on amd64 architecture using uv-managed pip for performance improvements. - Adjust environment variables to use the virtual environment under /venv instead of local user installs. - Create /venv directory with appropriate permissions and copy the virtual environment from build stage. - Modify CMD to add --noverify flag when launching kohya_gui.py. - Remove requirements_linux_docker.txt file, consolidating dependency management into uv and pyproject.toml. Signed-off-by: CHEN, CHUN <[email protected]>
1 parent f28f96b commit 44cca1c

File tree

2 files changed

+88
-54
lines changed

2 files changed

+88
-54
lines changed

Dockerfile

Lines changed: 88 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,81 +3,116 @@ ARG UID=1000
33
ARG VERSION=EDGE
44
ARG RELEASE=0
55

6-
FROM python:3.10-slim as build
6+
########################################
7+
# Base stage
8+
########################################
9+
FROM docker.io/library/python:3.11-slim-bullseye AS base
10+
11+
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
12+
ARG TARGETARCH
13+
ARG TARGETVARIANT
14+
15+
WORKDIR /tmp
16+
17+
ENV NVIDIA_VISIBLE_DEVICES=all
18+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
19+
20+
# Install CUDA partially
21+
# https://docs.nvidia.com/cuda/cuda-installation-guide-linux/#debian
22+
# Installing the complete CUDA Toolkit system-wide usually adds around 8GB to the image size.
23+
# Since most CUDA packages already installed through pip, there's no need to download the entire toolkit.
24+
# Therefore, we opt to install only the essential libraries.
25+
# Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64
26+
27+
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb /tmp/cuda-keyring_x86_64.deb
28+
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
29+
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
30+
dpkg -i cuda-keyring_x86_64.deb && \
31+
rm -f cuda-keyring_x86_64.deb && \
32+
apt-get update && \
33+
apt-get install -y --no-install-recommends \
34+
# !If you experience any related issues, replace the following line with `cuda-12-4` to obtain the complete CUDA package.
35+
cuda-nvcc-12-4
36+
37+
ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}"
38+
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64
39+
ENV CUDA_VERSION=12.4
40+
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.4
41+
ENV CUDA_HOME=/usr/local/cuda
42+
43+
########################################
44+
# Build stage
45+
########################################
46+
FROM base AS build
747

848
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
949
ARG TARGETARCH
1050
ARG TARGETVARIANT
1151

1252
WORKDIR /app
1353

14-
# Install under /root/.local
15-
ENV PIP_USER="true"
16-
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
17-
ARG PIP_ROOT_USER_ACTION="ignore"
54+
# Install uv
55+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
56+
57+
ENV UV_PROJECT_ENVIRONMENT=/venv
58+
ENV VIRTUAL_ENV=/venv
59+
ENV UV_LINK_MODE=copy
60+
ENV UV_PYTHON_DOWNLOADS=0
61+
ENV UV_INDEX=https://download.pytorch.org/whl/cu124
1862

1963
# Install build dependencies
2064
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
2165
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
2266
apt-get update && apt-get upgrade -y && \
2367
apt-get install -y --no-install-recommends python3-launchpadlib git curl
2468

25-
# Install PyTorch
26-
# The versions must align and be in sync with the requirements_linux_docker.txt
27-
# hadolint ignore=SC2102
28-
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
29-
pip install --upgrade pip setuptools wheel ninja &&\
30-
pip install -U \
31-
--index-url https://download.pytorch.org/whl/cu124 \
32-
--extra-index-url https://pypi.nvidia.com \
33-
torch==2.5.0+cu124 \
34-
torchvision==0.20.0+cu124 &&\
35-
pip install -U xformers --index-url https://download.pytorch.org/whl/cu124
36-
37-
# Install requirements
38-
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
39-
--mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \
40-
--mount=source=requirements.txt,target=requirements.txt \
41-
--mount=source=setup/docker_setup.py,target=setup.py \
42-
--mount=source=sd-scripts,target=sd-scripts,rw \
43-
pip install -r requirements_linux_docker.txt -r requirements.txt
69+
# Install big dependencies separately for layer caching
70+
# !Please note that the version restrictions should be the same as pyproject.toml
71+
# No packages listed should be removed in the next `uv sync` command
72+
# If this happens, please update the version restrictions or update the uv.lock file
73+
RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
74+
uv venv --system-site-packages /venv && \
75+
uv pip install --no-deps \
76+
# torch (866.2MiB)
77+
torch==2.5.1+cu124 \
78+
# triton (199.8MiB)
79+
triton==3.1.0 \
80+
# tensorflow (615.0MiB)
81+
tensorflow>=2.16.1 \
82+
# onnxruntime-gpu (215.7MiB)
83+
onnxruntime-gpu==1.19.2
84+
85+
# Install dependencies
86+
RUN --mount=type=cache,id=uv-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/uv \
87+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
88+
--mount=type=bind,source=uv.lock,target=uv.lock \
89+
--mount=type=bind,source=sd-scripts,target=sd-scripts,rw \
90+
uv sync --frozen --no-dev --no-install-project --no-editable
4491

4592
# Replace pillow with pillow-simd (Only for x86)
4693
ARG TARGETPLATFORM
4794
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
4895
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
4996
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
5097
apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \
51-
pip uninstall -y pillow && \
52-
CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \
98+
uv pip uninstall pillow && \
99+
CC="cc -mavx2" uv pip install pillow-simd; \
53100
fi
54101

55-
FROM python:3.10-slim as final
102+
########################################
103+
# Final stage
104+
########################################
105+
FROM base AS final
56106

57107
ARG TARGETARCH
58108
ARG TARGETVARIANT
59109

60-
ENV NVIDIA_VISIBLE_DEVICES all
61-
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
62-
63110
WORKDIR /tmp
64111

65-
66-
# Install CUDA partially
67-
ADD https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb .
68-
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
69-
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
70-
dpkg -i cuda-keyring_1.1-1_all.deb && \
71-
rm cuda-keyring_1.1-1_all.deb && \
72-
sed -i 's/^Components: main$/& contrib/' /etc/apt/sources.list.d/debian.sources && \
73-
apt-get update && \
74-
apt-get install -y --no-install-recommends \
75-
cuda-toolkit-12-4
76-
77112
# Install runtime dependencies
78113
RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \
79114
--mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \
80-
apt-get update && \
115+
apt-get update && apt-get upgrade -y && \
81116
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init
82117

83118
# Fix missing libnvinfer7
@@ -92,20 +127,23 @@ RUN groupadd -g $UID $UID && \
92127
# Create directories with correct permissions
93128
RUN install -d -m 775 -o $UID -g 0 /dataset && \
94129
install -d -m 775 -o $UID -g 0 /licenses && \
95-
install -d -m 775 -o $UID -g 0 /app
130+
install -d -m 775 -o $UID -g 0 /app && \
131+
install -d -m 775 -o $UID -g 0 /venv
96132

97133
# Copy licenses (OpenShift Policy)
98134
COPY --link --chmod=775 LICENSE.md /licenses/LICENSE.md
99135

100136
# Copy dependencies and code (and support arbitrary uid for OpenShift best practice)
101-
COPY --link --chown=$UID:0 --chmod=775 --from=build /root/.local /home/$UID/.local
137+
COPY --link --chown=$UID:0 --chmod=775 --from=build /venv /venv
102138
COPY --link --chown=$UID:0 --chmod=775 . /app
103139

104-
ENV PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:/home/$UID/.local/bin:$PATH"
105-
ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages"
106-
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
140+
ENV PATH="/venv/bin${PATH:+:${PATH}}"
141+
ENV PYTHONPATH="/venv/lib/python3.11/site-packages"
142+
143+
ENV LD_LIBRARY_PATH="/venv/lib/python3.11/site-packages/nvidia/cudnn/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
107144
ENV LD_PRELOAD=libtcmalloc.so
108145
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
146+
109147
# Rich logging
110148
# https://rich.readthedocs.io/en/stable/console.html#interactive-mode
111149
ENV FORCE_COLOR="true"
@@ -124,7 +162,7 @@ STOPSIGNAL SIGINT
124162

125163
# Use dumb-init as PID 1 to handle signals properly
126164
ENTRYPOINT ["dumb-init", "--"]
127-
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless"]
165+
CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless", "--noverify"]
128166

129167
ARG VERSION
130168
ARG RELEASE
@@ -138,4 +176,4 @@ LABEL name="bmaltais/kohya_ss" \
138176
release=${RELEASE} \
139177
io.k8s.display-name="kohya_ss" \
140178
summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \
141-
description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."
179+
description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss."

requirements_linux_docker.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)