From 45b87c7c9997c16077b4e2697e843ec34d315764 Mon Sep 17 00:00:00 2001 From: clsferguson <48876201+clsferguson@users.noreply.github.com> Date: Fri, 26 Sep 2025 20:04:35 -0600 Subject: [PATCH] Refactor entrypoint: first-run installs, fix Sage flags, arch map, logs Introduce a first-run flag to install custom_nodes dependencies only on the initial container start, with COMFY_FORCE_INSTALL=1 to override on demand; correct Sage Attention flag semantics so SAGE_ATTENTION_AVAILABLE=1 only indicates the build is present while FORCE_SAGE_ATTENTION=1 enables it at startup; fix the misleading log to reference FORCE_SAGE_ATTENTION. Update TORCH_CUDA_ARCH_LIST mapping to 7.5 (Turing), 8.6 (Ampere), 8.9 (Ada), and 10.0 (Blackwell/RTX 50); retain Triton strategy with a compatibility pin on Turing and latest for Blackwell, including fallbacks. Clean up git clone URLs, standardize on python -m pip, and tighten logs; preserve user remapping and strategy-based rebuild detection via the .built flag. --- entrypoint.sh | 241 ++++++++++++++++++++------------------------------ 1 file changed, 96 insertions(+), 145 deletions(-) diff --git a/entrypoint.sh b/entrypoint.sh index bbb93b9be..f843bc87d 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -17,36 +17,22 @@ log() { echo "[$(date '+%H:%M:%S')] $1" } -# Function to test PyTorch CUDA compatibility and warn for Blackwell (sm_120) if unsupported +# Function to test PyTorch CUDA compatibility test_pytorch_cuda() { - python - <<'PY' 2>/dev/null -import sys, torch -ok = torch.cuda.is_available() -if not ok: - print("[ERROR] PyTorch CUDA not available") + python -c " +import torch, sys +if not torch.cuda.is_available(): + print('[ERROR] PyTorch CUDA not available') sys.exit(1) -dc = torch.cuda.device_count() -print(f"[TEST] PyTorch CUDA available with {dc} devices") -majors = set() -for i in range(dc): - p = torch.cuda.get_device_properties(i) - majors.add(p.major) - print(f"[TEST] GPU {i}: {p.name} (Compute {p.major}.{p.minor})") -# Blackwell warning: require binaries that know sm_120 -cuda_ver = getattr(torch.version, "cuda", None) -arch_list = [] -try: - arch_list = list(getattr(torch.cuda, "get_arch_list")()) -except Exception: - pass -if any(m >= 12 for m in majors): - has_120 = any("sm_120" in a or "compute_120" in a for a in arch_list) - if not has_120: - print("[WARN] Detected Blackwell (sm_120) GPU but current torch build does not expose sm_120; use torch 2.7+ with CUDA 12.8+ binaries or a source build", flush=True) -PY +c = torch.cuda.device_count() +print(f'[TEST] PyTorch CUDA available with {c} devices') +for i in range(c): + props = torch.cuda.get_device_properties(i) + print(f'[TEST] GPU {i}: {props.name} (Compute {props.major}.{props.minor})') +" 2>/dev/null } -# Function to detect all GPUs and their generations (best-effort labels) +# Function to detect all GPUs and their generations detect_gpu_generations() { local gpu_info gpu_info=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null || echo "") @@ -88,21 +74,21 @@ detect_gpu_generations() { fi } -# Decide optimal Sage Attention strategy +# Function to determine optimal Sage Attention strategy for mixed GPUs determine_sage_strategy() { local strategy="" - if [ "${DETECTED_RTX20:-false}" = "true" ]; then - if [ "${DETECTED_RTX30:-false}" = "true" ] || [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX50:-false}" = "true" ]; then + if [ "$DETECTED_RTX20" = "true" ]; then + if [ "$DETECTED_RTX30" = "true" ] || [ "$DETECTED_RTX40" = "true" ] || [ "$DETECTED_RTX50" = "true" ]; then strategy="mixed_with_rtx20" log "Mixed GPU setup detected with RTX 20 series - using compatibility mode" else strategy="rtx20_only" log "RTX 20 series only detected" fi - elif [ "${DETECTED_RTX50:-false}" = "true" ] ; then + elif [ "$DETECTED_RTX50" = "true" ]; then strategy="rtx50_capable" log "RTX 50 series detected - using latest optimizations" - elif [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX30:-false}" = "true" ]; then + elif [ "$DETECTED_RTX40" = "true" ] || [ "$DETECTED_RTX30" = "true" ]; then strategy="rtx30_40_optimized" log "RTX 30/40 series detected - using standard optimizations" else @@ -112,27 +98,27 @@ determine_sage_strategy() { export SAGE_STRATEGY=$strategy } -# Install Triton appropriate to strategy +# Function to install appropriate Triton version based on strategy install_triton_version() { case "$SAGE_STRATEGY" in "mixed_with_rtx20"|"rtx20_only") - log "Installing Triton 3.2.0 for RTX 20 series compatibility" - python -m pip install --no-cache-dir --user --force-reinstall "triton==3.2.0" || { - log "WARNING: Failed to install triton==3.2.0; falling back to latest" - python -m pip install --no-cache-dir --user --force-reinstall triton || true + log "Installing Triton 3.2.0 for broader compatibility on Turing-era GPUs" + python -m pip install --user --force-reinstall "triton==3.2.0" || { + log "WARNING: Failed to pin Triton 3.2.0, trying latest" + python -m pip install --user --force-reinstall triton || true } ;; "rtx50_capable") - log "Installing latest Triton for RTX 50 series" - python -m pip install --no-cache-dir --user --force-reinstall triton || \ - python -m pip install --no-cache-dir --user --force-reinstall --pre triton || { - log "WARNING: Failed to install latest Triton; trying >=3.2.0" - python -m pip install --no-cache-dir --user --force-reinstall "triton>=3.2.0" || true + log "Installing latest Triton for Blackwell/RTX 50" + python -m pip install --user --force-reinstall triton || \ + python -m pip install --user --force-reinstall --pre triton || { + log "WARNING: Latest Triton install failed, falling back to >=3.2.0" + python -m pip install --user --force-reinstall "triton>=3.2.0" || true } ;; *) log "Installing latest stable Triton" - python -m pip install --no-cache-dir --user --force-reinstall triton || { + python -m pip install --user --force-reinstall triton || { log "WARNING: Triton installation failed, continuing without" return 1 } @@ -140,55 +126,28 @@ install_triton_version() { esac } -# Compute TORCH_CUDA_ARCH_LIST from runtime devices; append +PTX on the highest arch for forward-compat -compute_cuda_arch_list() { - python - <<'PY' 2>/dev/null -import sys, torch, re -archs = set() -if torch.cuda.is_available(): - for i in range(torch.cuda.device_count()): - p = torch.cuda.get_device_properties(i) - archs.add((p.major, p.minor)) -# Fallback: parse compiled archs from torch binary if devices unavailable -if not archs: - try: - got = torch.cuda.get_arch_list() - for a in got: - m = re.match(r".*?(\d+)(\d+)$", a.replace("sm_", "").replace("compute_", "")) - if m: - archs.add((int(m.group(1)), int(m.group(2)))) - except Exception: - pass -if not archs: - print("") # nothing - sys.exit(0) -archs = sorted(archs) -parts = [f"{M}.{m}" for (M,m) in archs] -# add +PTX to the highest arch for forward-compat builds of extensions -parts[-1] = parts[-1] + "+PTX" -print(";".join(parts)) -PY -} - -# Build Sage Attention with architecture-specific optimizations +# Function to build Sage Attention with architecture-specific optimizations build_sage_attention_mixed() { log "Building Sage Attention for current GPU environment..." - mkdir -p "$SAGE_ATTENTION_DIR" cd "$SAGE_ATTENTION_DIR" - local cuda_arch_list - cuda_arch_list="$(compute_cuda_arch_list || true)" - if [ -n "${cuda_arch_list:-}" ]; then - export TORCH_CUDA_ARCH_LIST="$cuda_arch_list" - log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" - else - log "Could not infer TORCH_CUDA_ARCH_LIST from torch; proceeding with PyTorch defaults" - fi + # Compute capability mapping for TORCH_CUDA_ARCH_LIST: + # Turing = 7.5, Ampere = 8.6, Ada = 8.9, Blackwell (RTX 50) = 10.0 + # See NVIDIA Blackwell guide (sm_100/compute_100 ~ 10.0) and PyTorch arch list semantics. [doc refs in text] + local cuda_arch_list="" + [ "$DETECTED_RTX20" = "true" ] && cuda_arch_list="${cuda_arch_list}7.5;" + [ "$DETECTED_RTX30" = "true" ] && cuda_arch_list="${cuda_arch_list}8.6;" + [ "$DETECTED_RTX40" = "true" ] && cuda_arch_list="${cuda_arch_list}8.9;" + [ "$DETECTED_RTX50" = "true" ] && cuda_arch_list="${cuda_arch_list}10.0;" + cuda_arch_list=${cuda_arch_list%;} + + export TORCH_CUDA_ARCH_LIST="$cuda_arch_list" + log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" case "$SAGE_STRATEGY" in "mixed_with_rtx20"|"rtx20_only") - log "Cloning Sage Attention v1.0 for RTX 20 series compatibility" + log "Cloning SageAttention v1.0 for RTX 20 series compatibility" if [ -d "SageAttention/.git" ]; then cd SageAttention git fetch --depth 1 origin || return 1 @@ -201,7 +160,7 @@ build_sage_attention_mixed() { fi ;; *) - log "Cloning latest Sage Attention for modern GPUs" + log "Cloning latest SageAttention for modern GPUs" if [ -d "SageAttention/.git" ]; then cd SageAttention git fetch --depth 1 origin || return 1 @@ -214,20 +173,20 @@ build_sage_attention_mixed() { ;; esac - log "Building Sage Attention..." - if MAX_JOBS=$(nproc) python -m pip install --no-cache-dir --user --no-build-isolation .; then + log "Building SageAttention (no-build-isolation) ..." + if MAX_JOBS=$(nproc) python -m pip install --user --no-build-isolation .; then echo "$SAGE_STRATEGY" > "$SAGE_ATTENTION_BUILT_FLAG" - log "Sage Attention built successfully for strategy: $SAGE_STRATEGY" + log "SageAttention built successfully for strategy: $SAGE_STRATEGY" cd "$BASE_DIR" return 0 else - log "ERROR: Sage Attention build failed" + log "ERROR: SageAttention build failed" cd "$BASE_DIR" return 1 fi } -# Check if current build matches detected GPUs +# Function to check if current build matches detected GPUs needs_rebuild() { if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then return 0 @@ -241,56 +200,56 @@ needs_rebuild() { return 1 } -# Verify Sage Attention imports +# Function to check if SageAttention is working test_sage_attention() { - python - <<'PY' 2>/dev/null + python -c " import sys try: import sageattention - print("[TEST] Sage Attention import: SUCCESS") + print('[TEST] SageAttention import: SUCCESS') try: - v = getattr(sageattention, "__version__", None) - if v: - print(f"[TEST] Version: {v}") - except Exception: + v = getattr(sageattention, '__version__', None) + if v: print(f'[TEST] Version: {v}') + except: pass sys.exit(0) except ImportError as e: - print(f"[TEST] Sage Attention import: FAILED - {e}") + print(f'[TEST] SageAttention import: FAILED - {e}') sys.exit(1) except Exception as e: - print(f"[TEST] Sage Attention test: ERROR - {e}") + print(f'[TEST] SageAttention test: ERROR - {e}') sys.exit(1) -PY +" 2>/dev/null } -# Main GPU detection and Sage Attention setup +# Main GPU detection and SageAttention setup setup_sage_attention() { + # Export build-visible status flags export SAGE_ATTENTION_BUILT=0 export SAGE_ATTENTION_AVAILABLE=0 if ! detect_gpu_generations; then - log "No GPUs detected, skipping Sage Attention setup" + log "No GPUs detected, skipping SageAttention setup" return 0 fi determine_sage_strategy if needs_rebuild || ! test_sage_attention; then - log "Building Sage Attention..." + log "Building SageAttention..." if install_triton_version && build_sage_attention_mixed && test_sage_attention; then export SAGE_ATTENTION_BUILT=1 export SAGE_ATTENTION_AVAILABLE=1 - log "Sage Attention is built and importable; enable with FORCE_SAGE_ATTENTION=1" + log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable it at startup" else export SAGE_ATTENTION_BUILT=0 export SAGE_ATTENTION_AVAILABLE=0 - log "WARNING: Sage Attention is not available after build attempt" + log "WARNING: SageAttention is not available after build attempt" fi else export SAGE_ATTENTION_BUILT=1 export SAGE_ATTENTION_AVAILABLE=1 - log "Sage Attention already built and importable for current GPU configuration" + log "SageAttention already built and importable for current GPU configuration" fi } @@ -298,7 +257,6 @@ setup_sage_attention() { if [ "$(id -u)" = "0" ]; then if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then log "Setting up user permissions..." - if getent group "${PGID}" >/dev/null; then EXISTING_GRP="$(getent group "${PGID}" | cut -d: -f1)" usermod -g "${EXISTING_GRP}" "${APP_USER}" || true @@ -306,18 +264,16 @@ if [ "$(id -u)" = "0" ]; then else groupmod -o -g "${PGID}" "${APP_GROUP}" || true fi - usermod -o -u "${PUID}" "${APP_USER}" || true - mkdir -p "/home/${APP_USER}" for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true done - # Make Python system install targets writable (under /usr/local only) readarray -t PY_PATHS < <(python - <<'PY' import sys, sysconfig, os, datetime def log(msg): + import datetime ts = datetime.datetime.now().strftime("%H:%M:%S") print(f"[bootstrap:python {ts}] {msg}", file=sys.stderr, flush=True) log("Determining writable Python install targets via sysconfig.get_paths()") @@ -338,7 +294,6 @@ if d: log("Finished emitting target directories") PY ) - for d in "${PY_PATHS[@]}"; do case "$d" in /usr/local|/usr/local/*) @@ -349,18 +304,16 @@ PY *) : ;; esac done - touch "$PERMISSIONS_SET_FLAG" chown "${APP_USER}:${APP_GROUP}" "$PERMISSIONS_SET_FLAG" log "User permissions configured" else log "User permissions already configured, skipping..." fi - exec runuser -u "${APP_USER}" -- "$0" "$@" fi -# Setup Sage Attention for detected GPU configuration +# Setup SageAttention for detected GPU configuration setup_sage_attention # Ensure ComfyUI-Manager exists or update it (shallow) @@ -379,58 +332,56 @@ export PATH="$HOME/.local/bin:$PATH" pyver="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')" export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}" -# First-run detection for custom node deps -RUN_NODE_INSTALL=0 -if [ ! -f "$FIRST_RUN_FLAG" ]; then - RUN_NODE_INSTALL=1 - log "First run detected: installing custom node dependencies" -elif [ "${COMFY_AUTO_INSTALL:-0}" = "1" ]; then - RUN_NODE_INSTALL=1 - log "COMFY_AUTO_INSTALL=1: forcing custom node dependency install" +# First-run driven auto-install of custom node deps +if [ ! -f "$FIRST_RUN_FLAG" ] || [ "${COMFY_FORCE_INSTALL:-0}" = "1" ]; then + if [ "${COMFY_AUTO_INSTALL:-1}" = "1" ]; then + log "First run detected or forced; scanning custom nodes for requirements..." + # requirements*.txt + while IFS= read -r -d '' req; do + log "python -m pip install --user --upgrade -r $req" + python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$req" || true + done < <(find "$CUSTOM_NODES_DIR" -maxdepth 3 -type f \( -iname 'requirements.txt' -o -iname 'requirements-*.txt' -o -path '*/requirements/*.txt' \) -print0) + + # pyproject.toml (exclude ComfyUI-Manager) + while IFS= read -r -d '' pjt; do + d="$(dirname "$pjt")" + log "python -m pip install --user . in $d" + (cd "$d" && python -m pip install --no-cache-dir --user .) || true + done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0) + + python -m pip check || true + else + log "COMFY_AUTO_INSTALL=0; skipping dependency install on first run" + fi + touch "$FIRST_RUN_FLAG" else - log "Not first run and COMFY_AUTO_INSTALL!=1: skipping custom node dependency install" + log "Not first run; skipping custom_nodes dependency install" fi -if [ "$RUN_NODE_INSTALL" = "1" ]; then - log "Scanning custom nodes for requirements..." - while IFS= read -r -d '' req; do - log "python -m pip install --user --upgrade -r $req" - python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$req" || true - done < <(find "$CUSTOM_NODES_DIR" -maxdepth 3 -type f \( -iname 'requirements.txt' -o -iname 'requirements-*.txt' -o -path '*/requirements/*.txt' \) -print0) - - while IFS= read -r -d '' pjt; do - d="$(dirname "$pjt")" - log "python -m pip install --user . in $d" - (cd "$d" && python -m pip install --no-cache-dir --user .) || true - done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0) - - python -m pip check || true - touch "$FIRST_RUN_FLAG" || true -fi - -# Build ComfyUI command with Sage Attention flag only if forced +# Build ComfyUI command with SageAttention usage controlled only by FORCE_SAGE_ATTENTION COMFYUI_ARGS="" if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ]; then if test_sage_attention; then COMFYUI_ARGS="--use-sage-attention" - log "Starting ComfyUI with Sage Attention forced by environment (FORCE_SAGE_ATTENTION=1)" + log "Starting ComfyUI with SageAttention enabled by environment (FORCE_SAGE_ATTENTION=1)" else - log "WARNING: FORCE_SAGE_ATTENTION=1 but Sage Attention import failed; starting without" + log "WARNING: FORCE_SAGE_ATTENTION=1 but SageAttention import failed; starting without" fi else if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then - log "Sage Attention is built and available; set FORCE_SAGE_ATTENTION=1 to enable it on boot" + log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable it at startup" else - log "Sage Attention not available; starting without it" + log "SageAttention not available; starting without it" fi fi cd "$BASE_DIR" +# Handle both direct execution and passed arguments if [ $# -eq 0 ]; then exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS else - if [ "${1:-}" = "python" ] && [ "${2:-}" = "main.py" ]; then + if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then shift 2 exec python main.py $COMFYUI_ARGS "$@" else