diff --git a/Dockerfile b/Dockerfile index 3b20683b1..d6ce1521d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ ENV DEBIAN_FRONTEND=noninteractive \ EXT_PARALLEL=4 \ NVCC_APPEND_FLAGS="--threads 8" \ MAX_JOBS=32 \ + SAGE_ATTENTION_AVAILABLE=0 \ COMFYUI_PATH=/app/ComfyUI \ COMFYUI_MODEL_PATH=/app/ComfyUI/models \ COMFYUI_MODELS_PATH=/app/ComfyUI/models diff --git a/README.md b/README.md index 661b6211e..837c6da44 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ I created this repo for myself as a simple way to stay up to date with the lates - CUDA-enabled PyTorch + Triton on Debian trixie with CUDA 12.8 dev libs so custom CUDA builds work at runtime. - Non-root runtime with PUID/PGID mapping handled by entrypoint for volume permissions. - ComfyUI-Manager auto-sync on startup; entrypoint scans custom_nodes and installs requirements when COMFY_AUTO_INSTALL=1. +- SageAttention build-on-start for compatible NVIDIA GPUs (Turing/SM 7.5+); enabling is opt-in via FORCE_SAGE_ATTENTION=1. --- @@ -71,13 +72,7 @@ services: - TZ=America/Edmonton - PUID=1000 - PGID=1000 - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] + gpus: all volumes: - comfyui_data:/app/ComfyUI/user/default - comfyui_nodes:/app/ComfyUI/custom_nodes @@ -94,10 +89,19 @@ Run with `docker compose up -d`. - Open http://localhost:8188 after the container is up; change the external port via -p HOST:8188. - To target specific GPUs, use Docker's GPU device selections or Compose device_ids in reservations. +### SageAttention +SageAttention is compiled at container startup when a compatible GPU (Turing SM 7.5+) is detected and cached to a volume-mapped directory for subsequent starts. It delivers 2-5x faster attention vs FlashAttention for video and high-res image workflows. + +- To enable: set `FORCE_SAGE_ATTENTION=1`. If the build or import fails, ComfyUI starts normally without it. +- The first startup with SageAttention will be slower due to compilation; subsequent starts use the cached build. +- Turing GPUs (RTX 20xx) use the v1.0 branch with Triton 3.2.0; Ampere and newer use the latest release. + ### Environment Variables - PUID/PGID: map container user to host UID/GID for volume write access. - COMFY_AUTO_INSTALL=1: auto-install Python requirements from custom_nodes on startup (default: 1). - COMFY_FORCE_INSTALL=1: force reinstall of custom_nodes requirements even after first run. +- FORCE_SAGE_ATTENTION=0|1: compile and enable SageAttention on startup (requires compatible NVIDIA GPU). +- SAGE_MAX_JOBS=N: override the number of parallel compile jobs for SageAttention (default: auto from RAM). - CM_*: seed ComfyUI-Manager config.ini keys on first start (e.g. CM_SKIP_UPDATE_CHECK=1). --- diff --git a/entrypoint.sh b/entrypoint.sh index 79f393b77..fb4e26acc 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -8,6 +8,8 @@ PUID=${PUID:-1000} PGID=${PGID:-1000} BASE_DIR=/app/ComfyUI CUSTOM_NODES_DIR="$BASE_DIR/custom_nodes" +SAGE_ATTENTION_DIR="$BASE_DIR/.sage_attention" +SAGE_ATTENTION_BUILT_FLAG="$SAGE_ATTENTION_DIR/.built" PERMISSIONS_SET_FLAG="$BASE_DIR/.permissions_set" FIRST_RUN_FLAG="$BASE_DIR/.first_run_done" @@ -23,6 +25,138 @@ log() { echo "[$(date '+%H:%M:%S')] $1"; } # Make newly created files group-writable (helps in shared volumes) umask 0002 +# --- build parallelism --- +decide_build_jobs() { + if [ -n "${SAGE_MAX_JOBS:-}" ]; then echo "$SAGE_MAX_JOBS"; return; fi + local mem_kb; mem_kb=$(awk '/MemTotal:/ {print $2}' /proc/meminfo 2>/dev/null || echo 0) + local cpu; cpu=$(nproc); local cap=24; local jobs + if [ "$mem_kb" -le $((8*1024*1024)) ]; then jobs=2 + elif [ "$mem_kb" -le $((12*1024*1024)) ]; then jobs=3 + elif [ "$mem_kb" -le $((24*1024*1024)) ]; then jobs=4 + elif [ "$mem_kb" -le $((64*1024*1024)) ]; then jobs=$(( cpu<8 ? cpu : 8 )) + else jobs=$cpu; [ "$jobs" -gt "$cap" ] && jobs=$cap + fi + echo "$jobs" +} + +# --- GPU probe (torch-based) --- +probe_gpu() { +python - <<'PY' 2>/dev/null +import sys +try: + import torch +except Exception: + print("GPU_COUNT=0"); print("COMPAT_GE_75=0"); print("TORCH_CUDA_ARCH_LIST="); print("SAGE_STRATEGY=fallback"); sys.exit(0) +if not torch.cuda.is_available(): + print("GPU_COUNT=0"); print("COMPAT_GE_75=0"); print("TORCH_CUDA_ARCH_LIST="); print("SAGE_STRATEGY=fallback"); sys.exit(0) +n = torch.cuda.device_count() +ccs = []; compat = False; has_turing = False; has_ampere_plus = False +for i in range(n): + p = torch.cuda.get_device_properties(i) + mj, mn = p.major, p.minor + ccs.append(f"{mj}.{mn}") + if (mj*10+mn) >= 75: compat = True + if (mj, mn) == (7, 5): has_turing = True + if mj >= 8: has_ampere_plus = True +ordered = sorted(set(ccs), key=lambda s: tuple(map(int, s.split(".")))) +arch_list = ";".join(ordered) +if has_turing and has_ampere_plus: strategy = "mixed_with_turing" +elif has_turing: strategy = "turing_only" +else: strategy = "ampere_ada_or_newer" +print(f"GPU_COUNT={n}") +print(f"COMPAT_GE_75={1 if compat else 0}") +print(f"TORCH_CUDA_ARCH_LIST={arch_list}") +print(f"SAGE_STRATEGY={strategy}") +for i in range(n): + p = torch.cuda.get_device_properties(i) + print(f"[GPU] cuda:{i} - {p.name} (CC {p.major}.{p.minor})", file=sys.stderr) +PY +} + +# --- SageAttention --- +needs_sage_rebuild() { + [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ] && return 0 + local stored; stored=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "") + local prev_strategy="${stored%%|*}"; local prev_arch="${stored#*|}" + [ "$prev_strategy" != "${SAGE_STRATEGY:-fallback}" ] && return 0 + [ "$prev_arch" != "${TORCH_CUDA_ARCH_LIST:-}" ] && return 0 + return 1 +} + +test_sage_attention() { + python -c "import sageattention; print('[TEST] SageAttention import: OK')" 2>/dev/null +} + +build_sage_attention() { + log "Building SageAttention (strategy=${SAGE_STRATEGY:-fallback})..." + mkdir -p "$SAGE_ATTENTION_DIR"; cd "$SAGE_ATTENTION_DIR" + export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-8.0;8.6;8.9;9.0;10.0;12.0}" + + # Turing (SM 7.5) requires the v1.0 branch; newer GPUs use main + case "${SAGE_STRATEGY:-fallback}" in + "mixed_with_turing"|"turing_only") + log "Cloning SageAttention v1.0 (Turing compatibility)" + if [ -d "SageAttention/.git" ]; then + cd SageAttention; git fetch --depth 1 origin || return 1 + git checkout v1.0 2>/dev/null || git checkout -b v1.0 origin/v1.0 || return 1 + git reset --hard origin/v1.0 || return 1 + else + rm -rf SageAttention + git clone --depth 1 https://github.com/thu-ml/SageAttention.git -b v1.0 || return 1 + cd SageAttention + fi + # Turing needs Triton 3.2.0 + local cur_triton; cur_triton=$(python -c "import importlib.metadata as m; print(m.version('triton'))" 2>/dev/null || echo "") + if [ "$cur_triton" != "3.2.0" ]; then + log "Installing Triton 3.2.0 for Turing (current: ${cur_triton:-none})" + python -m pip install --no-cache-dir "triton==3.2.0" || true + fi + ;; + *) + log "Cloning SageAttention (latest)" + if [ -d "SageAttention/.git" ]; then + cd SageAttention; git fetch --depth 1 origin || return 1 + git reset --hard origin/main || return 1 + else + rm -rf SageAttention + git clone --depth 1 https://github.com/thu-ml/SageAttention.git || return 1 + cd SageAttention + fi + ;; + esac + + local jobs; jobs="$(decide_build_jobs)" + log "Compiling with MAX_JOBS=${jobs}" + if MAX_JOBS="${jobs}" python -m pip install --no-build-isolation .; then + echo "${SAGE_STRATEGY:-fallback}|${TORCH_CUDA_ARCH_LIST:-}" > "$SAGE_ATTENTION_BUILT_FLAG" + cd "$SAGE_ATTENTION_DIR"; rm -rf SageAttention || true + cd "$BASE_DIR" + log "SageAttention built successfully" + return 0 + else + cd "$BASE_DIR" + log "WARNING: SageAttention build failed" + return 1 + fi +} + +setup_sage_attention() { + export SAGE_ATTENTION_AVAILABLE=0 + if [ "${GPU_COUNT:-0}" -eq 0 ] || [ "${COMPAT_GE_75:-0}" -ne 1 ]; then + log "SageAttention: skipped (no compatible GPU)" + return 0 + fi + if needs_sage_rebuild || ! test_sage_attention 2>/dev/null; then + if build_sage_attention && test_sage_attention 2>/dev/null; then + export SAGE_ATTENTION_AVAILABLE=1 + log "SageAttention ready; set FORCE_SAGE_ATTENTION=1 to enable" + fi + else + export SAGE_ATTENTION_AVAILABLE=1 + log "SageAttention already built and importable" + fi +} + # --- ComfyUI-Manager config from CM_* env --- configure_manager_config() { python - "$CM_CFG" "$CM_SEEDED_FLAG" <<'PY' @@ -48,10 +182,8 @@ cfg = configparser.ConfigParser() first_seed = not seed_flag.exists() if cfg_path.exists(): cfg.read(cfg_path) - if "default" not in cfg: cfg["default"] = {} - if first_seed: cfg["default"].clear() for k,v in sorted(env_items.items()): @@ -75,6 +207,11 @@ PY # --- root: set up permissions then drop to appuser --- if [ "$(id -u)" = "0" ]; then + # GPU probe (needed for SageAttention strategy) + eval "$(probe_gpu)" + export GPU_COUNT COMPAT_GE_75 TORCH_CUDA_ARCH_LIST SAGE_STRATEGY + log "GPU probe: ${GPU_COUNT:-0} device(s); arch=${TORCH_CUDA_ARCH_LIST:-none}; strategy=${SAGE_STRATEGY:-fallback}" + if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then log "Setting up user permissions..." if getent group "${PGID}" >/dev/null; then @@ -85,26 +222,18 @@ if [ "$(id -u)" = "0" ]; then for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; done readarray -t PY_PATHS < <(python - <<'PY' -import sys, sysconfig, os, site, datetime -def log(m): print(f"[bootstrap:python {datetime.datetime.now().strftime('%H:%M:%S')}] {m}", file=sys.stderr, flush=True) -log("Determining writable Python install targets via sysconfig.get_paths(), site.getsitepackages(), and site.getusersitepackages()") +import sys, sysconfig, os, site seen=set() for k in ("purelib","platlib","scripts","include","platinclude","data"): v = sysconfig.get_paths().get(k) if v and v.startswith("/usr/local") and v not in seen: - print(v); seen.add(v); log(f"emit {k} -> {v}") + print(v); seen.add(v) for v in (site.getusersitepackages(),): if v and v not in seen: - print(v); seen.add(v); log(f"emit usersite -> {v}") + print(v); seen.add(v) for v in site.getsitepackages(): if v and v.startswith("/usr/local") and v not in seen: - print(v); seen.add(v); log(f"emit sitepkg -> {v}") -d = sysconfig.get_paths().get("data") -if d: - share=os.path.join(d,"share"); man1=os.path.join(share,"man","man1") - for v in (share, man1): - if v and v.startswith("/usr/local") and v not in seen: - print(v); seen.add(v); log(f"emit wheel data -> {v}") + print(v); seen.add(v) PY ) for d in "${PY_PATHS[@]}"; do @@ -130,6 +259,9 @@ fi # --- From here on, running as $APP_USER --- +# --- SageAttention setup --- +setup_sage_attention + # --- ComfyUI-Manager sync --- if [ -d "$CUSTOM_NODES_DIR/ComfyUI-Manager/.git" ]; then log "Updating ComfyUI-Manager" @@ -173,13 +305,20 @@ fi configure_manager_config # --- launch ComfyUI --- -log "Starting ComfyUI..." +COMFYUI_ARGS="" +if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ] && [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then + COMFYUI_ARGS="--use-sage-attention" + log "Starting ComfyUI with SageAttention enabled" +elif [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ]; then + log "WARNING: FORCE_SAGE_ATTENTION=1 but SageAttention is not available; starting without it" +fi + cd "$BASE_DIR" if [ $# -eq 0 ]; then - exec python main.py --listen 0.0.0.0 + exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS else if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then - shift 2; exec python main.py "$@" + shift 2; exec python main.py $COMFYUI_ARGS "$@" else exec "$@" fi