ComfyUI/entrypoint.sh
clsferguson b0b95e5cc5
feat(entrypoint): fail-fast when no compatible NVIDIA GPU, mirror Manager’s dependency install steps, and harden permissions for Manager operations
- Add an early runtime check that exits cleanly when no compatible NVIDIA GPU is detected, preventing unnecessary installs and builds on hosts without GPUs, which matches the repo’s requirement to target recent-gen NVIDIA GPUs and avoids work on GitHub runners. 
- Mirror ComfyUI-Manager’s dependency behavior for custom nodes by: installing requirements*.txt and requirements/*.txt, building nodes with pyproject.toml using pip, and invoking node-provided install.py scripts when present, aligning with documented custom-node install flows. 
- Enforce user-level pip installs (PIP_USER=1) and ensure /usr/local site-packages trees are owned and writable by the runtime user; this resolves permission-denied errors seen when Manager updates or removes packages (e.g., numpy __pycache__), improving reliability of Manager-driven installs and uninstalls.
2025-09-29 22:36:35 -06:00

394 lines
18 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -euo pipefail
# --- config ---
APP_USER=${APP_USER:-appuser}
APP_GROUP=${APP_GROUP:-appuser}
PUID=${PUID:-1000}
PGID=${PGID:-1000}
BASE_DIR=/app/ComfyUI
CUSTOM_NODES_DIR="$BASE_DIR/custom_nodes"
SAGE_ATTENTION_DIR="$BASE_DIR/.sage_attention"
SAGE_ATTENTION_BUILT_FLAG="$SAGE_ATTENTION_DIR/.built"
PERMISSIONS_SET_FLAG="$BASE_DIR/.permissions_set"
FIRST_RUN_FLAG="$BASE_DIR/.first_run_done"
# --- logging ---
log() { echo "[$(date '+%H:%M:%S')] $1"; }
# Make newly created files group-writable (helps in shared volumes)
umask 0002
# --- build parallelism (single knob) ---
# Public knob: SAGE_MAX_JOBS. If unset, pick RAM/CPU heuristic.
decide_build_jobs() {
if [ -n "${SAGE_MAX_JOBS:-}" ]; then echo "$SAGE_MAX_JOBS"; return; fi
local mem_kb=$(awk '/MemTotal:/ {print $2}' /proc/meminfo 2>/dev/null || echo 0)
local cpu=$(nproc) cap=24 jobs
if [ "$mem_kb" -le $((8*1024*1024)) ]; then jobs=2
elif [ "$mem_kb" -le $((12*1024*1024)) ]; then jobs=3
elif [ "$mem_kb" -le $((24*1024*1024)) ]; then jobs=4
elif [ "$mem_kb" -le $((64*1024*1024)) ]; then jobs=$(( cpu<8 ? cpu : 8 ))
else jobs=$cpu; [ "$jobs" -gt "$cap" ] && jobs=$cap
fi
echo "$jobs"
}
# --- CUDA/Torch checks ---
test_pytorch_cuda() {
python -c "
import torch, sys
if not torch.cuda.is_available():
print('[ERROR] PyTorch CUDA not available'); sys.exit(1)
c=torch.cuda.device_count(); print(f'[TEST] PyTorch CUDA available with {c} devices')
for i in range(c):
p=torch.cuda.get_device_properties(i)
print(f'[TEST] GPU {i}: {p.name} (Compute {p.major}.{p.minor})')
" 2>/dev/null
}
# Determine if there is a compatible NVIDIA GPU (>= sm_75, i.e., 16-series/Turing and newer)
gpu_is_compatible() {
python - <<'PY' 2>/dev/null
import sys
try:
import torch
if not torch.cuda.is_available():
sys.exit(2)
ok=False
for i in range(torch.cuda.device_count()):
p=torch.cuda.get_device_properties(i)
cc=float(f"{p.major}.{p.minor}")
if cc >= 7.5:
ok=True
sys.exit(0 if ok else 3)
except Exception:
sys.exit(4)
PY
}
# Derive arch list directly from Torch; optional +PTX via SAGE_PTX_FALLBACK=1
compute_arch_list_from_torch() {
python - <<'PY' 2>/dev/null
import os, sys
try:
import torch
if not torch.cuda.is_available():
print(""); sys.exit(0)
caps = {f"{torch.cuda.get_device_properties(i).major}.{torch.cuda.get_device_properties(i).minor}"
for i in range(torch.cuda.device_count())}
ordered = sorted(caps, key=lambda s: tuple(int(x) for x in s.split(".")))
if not ordered: print(""); sys.exit(0)
if os.environ.get("SAGE_PTX_FALLBACK","0")=="1":
highest = ordered[-1]; print(";".join(ordered+[highest + "+PTX"]))
else:
print(";".join(ordered))
except Exception:
print("")
PY
}
# Fallback name-based mapping across Turing→Blackwell
detect_gpu_generations() {
local info=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null || echo "")
local has_turing=false has_amp_ga100=false has_amp_ga10x=false has_amp_ga10b=false
local has_ada=false has_hopper=false has_bw_cons=false has_bw_dc=false
local n=0
[ -z "$info" ] && { log "No NVIDIA GPUs detected"; return 1; }
log "Detecting GPU generations:"
while IFS= read -r g; do
n=$((n+1)); log " GPU $n: $g"
case "$g" in
*"RTX 20"*|*"T4"*) has_turing=true ;;
*"A100"*|*"A30"*|*"A40"*) has_amp_ga100=true ;;
*"RTX 30"*|*"RTX 3090"*|*"RTX 3080"*|*"RTX 3070"*|*"RTX 3060"*) has_amp_ga10x=true ;;
*"Orin"*|*"Jetson"*) has_amp_ga10b=true ;;
*"RTX 40"*|*"4090"*|*"L40"*|*"L4"*) has_ada=true ;;
*"H100"*|*"H200"*|*"GH200"*) has_hopper=true ;;
*"RTX 50"*|*"5090"*|*"5080"*|*"5070"*|*"5060"*|*"PRO "*Blackwell*|*"PRO 4000 Blackwell"*) has_bw_cons=true ;;
*"B200"*|*"B100"*|*"GB200"*|*"B40"*|*"RTX 6000 Blackwell"*|*"RTX 5000 Blackwell"*) has_bw_dc=true ;;
esac
done <<< "$info"
export DET_TURING=$has_turing DET_AMP80=$has_amp_ga100 DET_AMP86=$has_amp_ga10x DET_AMP87=$has_amp_ga10b
export DET_ADA=$has_ada DET_HOPPER=$has_hopper DET_BW12=$has_bw_cons DET_BW10=$has_bw_dc
export GPU_COUNT=$n
log "Summary: Turing=$has_turing Amp(8.0)=$has_amp_ga100 Amp(8.6)=$has_amp_ga10x Amp(8.7)=$has_amp_ga10b Ada=$has_ada Hopper=$has_hopper Blackwell(12.x)=$has_bw_cons Blackwell(10.0)=$has_bw_dc"
test_pytorch_cuda && log "PyTorch CUDA compatibility confirmed" || log "WARNING: PyTorch CUDA compatibility issues detected"
}
determine_sage_strategy() {
local s=""
if [ "${DET_TURING:-false}" = "true" ]; then
if [ "${DET_AMP80:-false}" = "true" ] || [ "${DET_AMP86:-false}" = "true" ] || [ "${DET_AMP87:-false}" = "true" ] || [ "${DET_ADA:-false}" = "true" ] || [ "${DET_HOPPER:-false}" = "true" ] || [ "${DET_BW12:-false}" = "true" ] || [ "${DET_BW10:-false}" = "true" ]; then
s="mixed_with_turing"; log "Mixed rig including Turing - using compatibility mode"
else s="turing_only"; log "Turing-only rig detected"; fi
elif [ "${DET_BW12:-false}" = "true" ] || [ "${DET_BW10:-false}" = "true" ]; then s="blackwell_capable"; log "Blackwell detected - using latest optimizations"
elif [ "${DET_HOPPER:-false}" = "true" ]; then s="hopper_capable"; log "Hopper detected - using modern optimizations"
elif [ "${DET_ADA:-false}" = "true" ] || [ "${DET_AMP86:-false}" = "true" ] || [ "${DET_AMP87:-false}" = "true" ] || [ "${DET_AMP80:-false}" = "true" ]; then
s="ampere_ada_optimized"; log "Ampere/Ada detected - using standard optimizations"
else s="fallback"; log "Unknown configuration - using fallback"; fi
export SAGE_STRATEGY=$s
}
install_triton_version() {
case "$SAGE_STRATEGY" in
"mixed_with_turing"|"turing_only")
log "Installing Triton 3.2.0 for Turing compatibility"
python -m pip install --user --force-reinstall "triton==3.2.0" || python -m pip install --user --force-reinstall triton || true
;;
"blackwell_capable"|"hopper_capable")
log "Installing latest Triton for Hopper/Blackwell"
python -m pip install --user --force-reinstall triton || python -m pip install --user --force-reinstall --pre triton || python -m pip install --user --force-reinstall "triton>=3.2.0" || true
;;
*)
log "Installing latest stable Triton"
python -m pip install --user --force-reinstall triton || { log "WARNING: Triton installation failed"; return 1; }
;;
esac
}
build_sage_attention_mixed() {
log "Building Sage Attention..."
mkdir -p "$SAGE_ATTENTION_DIR"; cd "$SAGE_ATTENTION_DIR"
local arch_list="${SAGE_ARCH_LIST_OVERRIDE:-$(compute_arch_list_from_torch)}"
if [ -z "$arch_list" ]; then
local tmp=""
[ "${DET_TURING:-false}" = "true" ] && tmp="${tmp}7.5;"
[ "${DET_AMP80:-false}" = "true" ] && tmp="${tmp}8.0;"
[ "${DET_AMP86:-false}" = "true" ] && tmp="${tmp}8.6;"
[ "${DET_AMP87:-false}" = "true" ] && tmp="${tmp}8.7;"
[ "${DET_ADA:-false}" = "true" ] && tmp="${tmp}8.9;"
[ "${DET_HOPPER:-false}" = "true" ] && tmp="${tmp}9.0;"
[ "${DET_BW10:-false}" = "true" ] && tmp="${tmp}10.0;"
[ "${DET_BW12:-false}" = "true" ] && tmp="${tmp}12.0;"
arch_list="${tmp%;}"
fi
export TORCH_CUDA_ARCH_LIST="$arch_list"
log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
case "$SAGE_STRATEGY" in
"mixed_with_turing"|"turing_only")
log "Cloning SageAttention v1.0 for Turing"
if [ -d "SageAttention/.git" ]; then cd SageAttention; git fetch --depth 1 origin || return 1; git checkout v1.0 2>/dev/null || git checkout -b v1.0 origin/v1.0 || return 1; git reset --hard origin/v1.0 || return 1
else rm -rf SageAttention; git clone --depth 1 https://github.com/thu-ml/SageAttention.git -b v1.0 || return 1; cd SageAttention; fi
;;
*)
log "Cloning latest SageAttention"
if [ -d "SageAttention/.git" ]; then cd SageAttention; git fetch --depth 1 origin || return 1; git reset --hard origin/main || return 1
else rm -rf SageAttention; git clone --depth 1 https://github.com/thu-ml/SageAttention.git || return 1; cd SageAttention; fi
;;
esac
[ "${SAGE_VERBOSE_BUILD:-0}" = "1" ] && export TORCH_CPP_BUILD_VERBOSE=1
local jobs; jobs="$(decide_build_jobs)"
log "Using MAX_JOBS=${jobs} for SageAttention build"
if MAX_JOBS="${jobs}" python -m pip install --user --no-build-isolation .; then
echo "$SAGE_STRATEGY|$TORCH_CUDA_ARCH_LIST" > "$SAGE_ATTENTION_BUILT_FLAG"
log "SageAttention built successfully"
cd "$BASE_DIR"; return 0
else
log "ERROR: SageAttention build failed"
cd "$BASE_DIR"; return 1
fi
}
needs_rebuild() {
if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then return 0; fi
local x; x=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "")
local prev_strategy="${x%%|*}"; local prev_arch="${x#*|}"
if [ "$prev_strategy" != "$SAGE_STRATEGY" ] || [ "$prev_arch" != "$TORCH_CUDA_ARCH_LIST" ]; then return 0; fi
return 1
}
test_sage_attention() {
python -c "
import sys
try:
import sageattention; print('[TEST] SageAttention import: SUCCESS')
v=getattr(sageattention,'__version__',None)
if v: print(f'[TEST] Version: {v}'); sys.exit(0)
except ImportError as e:
print(f'[TEST] SageAttention import: FAILED - {e}'); sys.exit(1)
except Exception as e:
print(f'[TEST] SageAttention test: ERROR - {e}'); sys.exit(1)
" 2>/dev/null
}
setup_sage_attention() {
export SAGE_ATTENTION_BUILT=0 SAGE_ATTENTION_AVAILABLE=0
if ! detect_gpu_generations; then log "No GPUs detected, skipping SageAttention setup"; return 0; fi
determine_sage_strategy
export TORCH_CUDA_ARCH_LIST="${SAGE_ARCH_LIST_OVERRIDE:-$(compute_arch_list_from_torch)}"
if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then
local tmp=""
[ "${DET_TURING:-false}" = "true" ] && tmp="${tmp}7.5;"
[ "${DET_AMP80:-false}" = "true" ] && tmp="${tmp}8.0;"
[ "${DET_AMP86:-false}" = "true" ] && tmp="${tmp}8.6;"
[ "${DET_AMP87:-false}" = "true" ] && tmp="${tmp}8.7;"
[ "${DET_ADA:-false}" = "true" ] && tmp="${tmp}8.9;"
[ "${DET_HOPPER:-false}" = "true" ] && tmp="${tmp}9.0;"
[ "${DET_BW10:-false}" = "true" ] && tmp="${tmp}10.0;"
[ "${DET_BW12:-false}" = "true" ] && tmp="${tmp}12.0;"
export TORCH_CUDA_ARCH_LIST="${tmp%;}"
fi
log "Resolved TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
if needs_rebuild || ! test_sage_attention; then
log "Building SageAttention..."
if install_triton_version && build_sage_attention_mixed && test_sage_attention; then
export SAGE_ATTENTION_BUILT=1 SAGE_ATTENTION_AVAILABLE=1
log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable it at startup"
else
export SAGE_ATTENTION_BUILT=0 SAGE_ATTENTION_AVAILABLE=0
log "WARNING: SageAttention is not available after build attempt"
fi
else
export SAGE_ATTENTION_BUILT=1 SAGE_ATTENTION_AVAILABLE=1
log "SageAttention already built and importable"
fi
}
# --- root to runtime user ---
if [ "$(id -u)" = "0" ]; then
if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then
log "Setting up user permissions..."
if getent group "${PGID}" >/dev/null; then
EXISTING_GRP="$(getent group "${PGID}" | cut -d: -f1)"; usermod -g "${EXISTING_GRP}" "${APP_USER}" || true; APP_GROUP="${EXISTING_GRP}"
else groupmod -o -g "${PGID}" "${APP_GROUP}" || true; fi
usermod -o -u "${PUID}" "${APP_USER}" || true
mkdir -p "/home/${APP_USER}"
for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; done
# Discover both system and user site dirs and make them writable by the runtime user
readarray -t PY_PATHS < <(python - <<'PY'
import sys, sysconfig, os, site, datetime
def log(m): print(f"[bootstrap:python {datetime.datetime.now().strftime('%H:%M:%S')}] {m}", file=sys.stderr, flush=True)
log("Determining writable Python install targets via sysconfig.get_paths(), site.getsitepackages(), and site.getusersitepackages()")
seen=set()
for k in ("purelib","platlib","scripts","include","platinclude","data"):
v = sysconfig.get_paths().get(k)
if v and v.startswith("/usr/local") and v not in seen:
print(v); seen.add(v); log(f"emit {k} -> {v}")
for v in (site.getusersitepackages(),):
if v and v not in seen:
print(v); seen.add(v); log(f"emit usersite -> {v}")
for v in site.getsitepackages():
if v and v.startswith("/usr/local") and v not in seen:
print(v); seen.add(v); log(f"emit sitepkg -> {v}")
d = sysconfig.get_paths().get("data")
if d:
share=os.path.join(d,"share"); man1=os.path.join(share,"man","man1")
for v in (share, man1):
if v and v.startswith("/usr/local") and v not in seen:
print(v); seen.add(v); log(f"emit wheel data -> {v}")
PY
)
for d in "${PY_PATHS[@]}"; do
[ -n "$d" ] || continue
mkdir -p "$d" || true
chown -R "${APP_USER}:${APP_GROUP}" "$d" || true
chmod -R u+rwX,g+rwX "$d" || true
done
# Also ensure the main site-packages tree is writable if present (guards numpy uninstall/upgrade)
if [ -d "/usr/local/lib/python3.12/site-packages" ]; then
chown -R "${APP_USER}:${APP_GROUP}" /usr/local/lib/python3.12/site-packages || true
chmod -R u+rwX,g+rwX /usr/local/lib/python3.12/site-packages || true
fi
touch "$PERMISSIONS_SET_FLAG"; chown "${APP_USER}:${APP_GROUP}" "$PERMISSIONS_SET_FLAG"
log "User permissions configured"
else
log "User permissions already configured, skipping..."
fi
exec runuser -u "${APP_USER}" -- "$0" "$@"
fi
# From here on, running as $APP_USER
# Favor user installs everywhere to avoid touching system packages
export PATH="$HOME/.local/bin:$PATH"
pyver="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}"
export PIP_USER=1
export PIP_PREFER_BINARY=1
# Abort early if no compatible NVIDIA GPU (>= sm_75) is present
if ! gpu_is_compatible; then
log "No compatible NVIDIA GPU detected (compute capability 7.5+ required). Shutting down container."
# Exit 0 to avoid restart loops in some runtimes
exit 0
fi
# --- SageAttention setup (runs only if compatible GPU is present) ---
setup_sage_attention
# --- ComfyUI-Manager sync ---
if [ -d "$CUSTOM_NODES_DIR/ComfyUI-Manager/.git" ]; then
log "Updating ComfyUI-Manager"
git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" fetch --depth 1 origin || true
git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" reset --hard origin/HEAD || true
git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" clean -fdx || true
elif [ ! -d "$CUSTOM_NODES_DIR/ComfyUI-Manager" ]; then
log "Installing ComfyUI-Manager"
git clone --depth 1 https://github.com/ltdrdata/ComfyUI-Manager.git "$CUSTOM_NODES_DIR/ComfyUI-Manager" || true
fi
# --- first-run install of custom_nodes ---
if [ ! -f "$FIRST_RUN_FLAG" ] || [ "${COMFY_FORCE_INSTALL:-0}" = "1" ]; then
if [ "${COMFY_AUTO_INSTALL:-1}" = "1" ]; then
log "First run or forced; installing custom node dependencies..."
# 1) Install requirements files (Manager-like behavior)
while IFS= read -r -d '' req; do
log "python -m pip install --user --upgrade --upgrade-strategy only-if-needed -r $req"
python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$req" || true
done < <(find "$CUSTOM_NODES_DIR" -maxdepth 3 -type f \( -iname 'requirements.txt' -o -iname 'requirements-*.txt' -o -path '*/requirements/*.txt' \) -print0)
# 2) Install from pyproject (editable build avoided to mimic Managers typical install)
while IFS= read -r -d '' pjt; do
d="$(dirname "$pjt")"
log "python -m pip install --user . in $d"
(cd "$d" && python -m pip install --no-cache-dir --user .) || true
done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0)
# 3) Run node-provided install.py if present (Manager runs install scripts; mirror that)
while IFS= read -r -d '' inst; do
d="$(dirname "$inst")"
log "Running node install script: $inst"
(cd "$d" && python "$inst") || true
done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'install.py' -not -path '*/ComfyUI-Manager/*' -print0)
python -m pip check || true
else
log "COMFY_AUTO_INSTALL=0; skipping dependency install"
fi
touch "$FIRST_RUN_FLAG"
else
log "Not first run; skipping custom_nodes dependency install"
fi
# --- launch ComfyUI ---
COMFYUI_ARGS=""
if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ]; then
if test_sage_attention; then COMFYUI_ARGS="--use-sage-attention"; log "Starting ComfyUI with SageAttention (FORCE_SAGE_ATTENTION=1)"
else log "WARNING: FORCE_SAGE_ATTENTION=1 but import failed; starting without"; fi
else
if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable"
else log "SageAttention not available; starting without it"; fi
fi
cd "$BASE_DIR"
if [ $# -eq 0 ]; then
exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS
else
if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then
shift 2; exec python main.py $COMFYUI_ARGS "$@"
else
exec "$@"
fi
fi