feat(build,docker): add multi-stage build to compile and bundle SageAttention 2.2; enable via --use-sage-attention

Introduce a two-stage Docker build that compiles SageAttention 2.2/2++ from the upstream repository using Debian’s CUDA toolkit (nvcc) and the same Torch stack (cu129) as the runtime, then installs the produced wheel in the final slim image. This ensures the sageattention module is present at launch and makes the existing --use-sage-attention flag functional. The runtime image remains minimal while the builder stage carries heavy toolchains; matching Torch across stages prevents CUDA/ABI mismatch. Also retains the previous launch command so ComfyUI auto-enables SageAttention on startup.
2026-07-13 09:57:20 +08:00 · 2025-09-21 21:45:26 -06:00 · 2025-09-21 21:45:26 -06:00 · f655b2a960
commit f655b2a960
parent 77f35a886c
1 changed files with 35 additions and 3 deletions
--- a/38
+++ b/38
@ -1,4 +1,32 @@
-# Use a recent slim base image
+# --------------------------
+# Stage 1: build SageAttention 2.2 wheel from source
+# --------------------------
+FROM python:3.12.11-slim-trixie AS sage-builder
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1
+
+# Build deps + CUDA toolkit (nvcc) from Debian repos
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git build-essential cmake \
+    nvidia-cuda-toolkit nvidia-cuda-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /tmp/sage
+
+# Match Torch in final image (cu129) before building extension
+RUN python -m pip install --upgrade pip setuptools wheel \
+ && python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129
+
+# Shallow clone latest SageAttention and build a wheel
+# (compiles 2.2/2++ from source at repo tip)
+RUN git clone --depth 1 https://github.com/thu-ml/SageAttention.git . \
+ && python -m pip wheel . --no-deps --no-build-isolation -w /dist
+
+# --------------------------
+# Stage 2: your runtime image
+# --------------------------
 FROM python:3.12.11-slim-trixie

 # Environment
@ -28,15 +56,19 @@ RUN groupadd --gid 1000 appuser \
 # Workdir
 WORKDIR /app/ComfyUI

-# Leverage layer caching: install deps before copying full tree
+# Copy requirements first for layer caching
 COPY requirements.txt ./

-# Core Python deps (torch CUDA 12.9, ComfyUI reqs), media/NVML libs
+# Core Python deps (Torch CUDA 12.9, ComfyUI reqs), media/NVML libs
 RUN python -m pip install --upgrade pip setuptools wheel \
 && python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129 \
 && python -m pip install -r requirements.txt \
 && python -m pip install imageio-ffmpeg "av>=14.2" nvidia-ml-py

+# Bring in the SageAttention 2.2 wheel compiled in the builder stage and install it
+COPY --from=sage-builder /dist/sageattention-*.whl /tmp/
+RUN python -m pip install /tmp/sageattention-*.whl
+
 # Copy the application
 COPY . .