From f655b2a96034dc517da844bf4767da7731367c87 Mon Sep 17 00:00:00 2001
From: clsferguson <48876201+clsferguson@users.noreply.github.com>
Date: Sun, 21 Sep 2025 21:45:26 -0600
Subject: [PATCH] feat(build,docker): add multi-stage build to compile and
 bundle SageAttention 2.2; enable via --use-sage-attention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a two-stage Docker build that compiles SageAttention 2.2/2++ from the upstream repository using Debian’s CUDA toolkit (nvcc) and the same Torch stack (cu129) as the runtime, then installs the produced wheel in the final slim image. This ensures the sageattention module is present at launch and makes the existing --use-sage-attention flag functional. The runtime image remains minimal while the builder stage carries heavy toolchains; matching Torch across stages prevents CUDA/ABI mismatch. Also retains the previous launch command so ComfyUI auto-enables SageAttention on startup.
---
 Dockerfile | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index e8739d1da..f36b5f527 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,32 @@
-# Use a recent slim base image
+# --------------------------
+# Stage 1: build SageAttention 2.2 wheel from source
+# --------------------------
+FROM python:3.12.11-slim-trixie AS sage-builder
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1
+
+# Build deps + CUDA toolkit (nvcc) from Debian repos
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git build-essential cmake \
+    nvidia-cuda-toolkit nvidia-cuda-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /tmp/sage
+
+# Match Torch in final image (cu129) before building extension
+RUN python -m pip install --upgrade pip setuptools wheel \
+ && python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129
+
+# Shallow clone latest SageAttention and build a wheel
+# (compiles 2.2/2++ from source at repo tip)
+RUN git clone --depth 1 https://github.com/thu-ml/SageAttention.git . \
+ && python -m pip wheel . --no-deps --no-build-isolation -w /dist
+
+# --------------------------
+# Stage 2: your runtime image
+# --------------------------
 FROM python:3.12.11-slim-trixie
 
 # Environment
@@ -28,15 +56,19 @@ RUN groupadd --gid 1000 appuser \
 # Workdir
 WORKDIR /app/ComfyUI
 
-# Leverage layer caching: install deps before copying full tree
+# Copy requirements first for layer caching
 COPY requirements.txt ./
 
-# Core Python deps (torch CUDA 12.9, ComfyUI reqs), media/NVML libs
+# Core Python deps (Torch CUDA 12.9, ComfyUI reqs), media/NVML libs
 RUN python -m pip install --upgrade pip setuptools wheel \
  && python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu129 \
  && python -m pip install -r requirements.txt \
  && python -m pip install imageio-ffmpeg "av>=14.2" nvidia-ml-py
 
+# Bring in the SageAttention 2.2 wheel compiled in the builder stage and install it
+COPY --from=sage-builder /dist/sageattention-*.whl /tmp/
+RUN python -m pip install /tmp/sageattention-*.whl
+
 # Copy the application
 COPY . .