From f6d49f33b74a344c92998d42c19743322dff8753 Mon Sep 17 00:00:00 2001
From: clsferguson <48876201+clsferguson@users.noreply.github.com>
Date: Fri, 26 Sep 2025 22:37:24 -0600
Subject: [PATCH] entrypoint: derive correct arch list; add user-tunable build
 parallelism; fix Sage flags; first-run installs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Auto-derive TORCH_CUDA_ARCH_LIST from torch device capabilities (unique, sorted, optional +PTX) to cover all charted GPUs:
  Turing 7.5, Ampere 8.0/8.6/8.7, Ada 8.9, Hopper 9.0, and Blackwell 10.0 & 12.0/12.1; add name-based fallbacks for mixed or torch-less scenarios.
- Add user-tunable build parallelism with SAGE_MAX_JOBS (preferred) and MAX_JOBS (alias) that cap PyTorch cpp_extension/ninja -j; fall back to a RAM/CPU heuristic to prevent OOM “Killed” during CUDA/C++ builds.
- Correct Sage flags: SAGE_ATTENTION_AVAILABLE only signals “built/installed,” while FORCE_SAGE_ATTENTION=1 enables Sage at startup; fix logs to reference FORCE_SAGE_ATTENTION.
- Maintain Triton install strategy by GPU generation for compatibility and performance.
- Add first-run dependency installation with COMFY_FORCE_INSTALL override; keep permissions bootstrap and minor logging/URL cleanups.
---
 entrypoint.sh | 435 +++++++++++++++++++++-----------------------------
 1 file changed, 184 insertions(+), 251 deletions(-)

diff --git a/entrypoint.sh b/entrypoint.sh
index f843bc87d..0af58905f 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 set -euo pipefail
 
+# --- config ---
 APP_USER=${APP_USER:-appuser}
 APP_GROUP=${APP_GROUP:-appuser}
 PUID=${PUID:-1000}
@@ -12,379 +13,311 @@ SAGE_ATTENTION_BUILT_FLAG="$SAGE_ATTENTION_DIR/.built"
 PERMISSIONS_SET_FLAG="$BASE_DIR/.permissions_set"
 FIRST_RUN_FLAG="$BASE_DIR/.first_run_done"
 
-# Function to log with timestamp
-log() {
-    echo "[$(date '+%H:%M:%S')] $1"
+# Sage flags:
+# - SAGE_ATTENTION_AVAILABLE: set by script to indicate built/importable
+# - FORCE_SAGE_ATTENTION=1: force app to enable at startup (default 0)
+
+# --- logging ---
+log() { echo "[$(date '+%H:%M:%S')] $1"; }
+
+# --- build parallelism (single knob) ---
+# Public knob: SAGE_MAX_JOBS. If unset, pick RAM/CPU heuristic.
+decide_build_jobs() {
+    if [ -n "${SAGE_MAX_JOBS:-}" ]; then echo "$SAGE_MAX_JOBS"; return; fi
+    local mem_kb=$(awk '/MemTotal:/ {print $2}' /proc/meminfo 2>/dev/null || echo 0)
+    local cpu=$(nproc) cap=24 jobs
+    if   [ "$mem_kb" -le $((8*1024*1024)) ];  then jobs=2
+    elif [ "$mem_kb" -le $((12*1024*1024)) ]; then jobs=3
+    elif [ "$mem_kb" -le $((24*1024*1024)) ]; then jobs=4
+    elif [ "$mem_kb" -le $((64*1024*1024)) ]; then jobs=$(( cpu<8 ? cpu : 8 ))
+    else jobs=$cpu; [ "$jobs" -gt "$cap" ] && jobs=$cap
+    fi
+    echo "$jobs"
 }
 
-# Function to test PyTorch CUDA compatibility
+# --- CUDA/Torch checks ---
 test_pytorch_cuda() {
     python -c "
 import torch, sys
 if not torch.cuda.is_available():
-    print('[ERROR] PyTorch CUDA not available')
-    sys.exit(1)
-c = torch.cuda.device_count()
-print(f'[TEST] PyTorch CUDA available with {c} devices')
+    print('[ERROR] PyTorch CUDA not available'); sys.exit(1)
+c=torch.cuda.device_count(); print(f'[TEST] PyTorch CUDA available with {c} devices')
 for i in range(c):
-    props = torch.cuda.get_device_properties(i)
-    print(f'[TEST] GPU {i}: {props.name} (Compute {props.major}.{props.minor})')
+    p=torch.cuda.get_device_properties(i)
+    print(f'[TEST] GPU {i}: {p.name} (Compute {p.major}.{p.minor})')
 " 2>/dev/null
 }
 
-# Function to detect all GPUs and their generations
+# Derive arch list directly from Torch; optional +PTX via SAGE_PTX_FALLBACK=1
+compute_arch_list_from_torch() {
+    python - <<'PY' 2>/dev/null
+import os, sys
+try:
+    import torch
+    if not torch.cuda.is_available():
+        print(""); sys.exit(0)
+    caps = {f"{torch.cuda.get_device_properties(i).major}.{torch.cuda.get_device_properties(i).minor}"
+            for i in range(torch.cuda.device_count())}
+    ordered = sorted(caps, key=lambda s: tuple(int(x) for x in s.split(".")))
+    if not ordered: print(""); sys.exit(0)
+    if os.environ.get("SAGE_PTX_FALLBACK","0")=="1":
+        highest = ordered[-1]; print(";".join(ordered+[highest + "+PTX"]))
+    else:
+        print(";".join(ordered))
+except Exception:
+    print("")
+PY
+}
+
+# Fallback name-based mapping across Turing→Blackwell
 detect_gpu_generations() {
-    local gpu_info
-    gpu_info=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null || echo "")
-    local has_rtx20=false
-    local has_rtx30=false
-    local has_rtx40=false
-    local has_rtx50=false
-    local gpu_count=0
-
-    if [ -z "$gpu_info" ]; then
-        log "No NVIDIA GPUs detected"
-        return 1
-    fi
-
+    local info=$(nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null || echo "")
+    local has_turing=false has_amp_ga100=false has_amp_ga10x=false has_amp_ga10b=false
+    local has_ada=false has_hopper=false has_bw_cons=false has_bw_dc=false
+    local n=0
+    [ -z "$info" ] && { log "No NVIDIA GPUs detected"; return 1; }
     log "Detecting GPU generations:"
-    while IFS= read -r gpu; do
-        gpu_count=$((gpu_count + 1))
-        log "  GPU $gpu_count: $gpu"
-        case "$gpu" in
-            *"RTX 20"*|*"2060"*|*"2070"*|*"2080"*|*"2090"*) has_rtx20=true ;;
-            *"RTX 30"*|*"3060"*|*"3070"*|*"3080"*|*"3090"*) has_rtx30=true ;;
-            *"RTX 40"*|*"4060"*|*"4070"*|*"4080"*|*"4090"*) has_rtx40=true ;;
-            *"RTX 50"*|*"5060"*|*"5070"*|*"5080"*|*"5090"*) has_rtx50=true ;;
+    while IFS= read -r g; do
+        n=$((n+1)); log "  GPU $n: $g"
+        case "$g" in
+            *"RTX 20"*|*"T4"*) has_turing=true ;;
+            *"A100"*|*"A30"*|*"A40"*) has_amp_ga100=true ;;
+            *"RTX 30"*|*"RTX 3090"*|*"RTX 3080"*|*"RTX 3070"*|*"RTX 3060"*) has_amp_ga10x=true ;;
+            *"Orin"*|*"Jetson"*) has_amp_ga10b=true ;;
+            *"RTX 40"*|*"4090"*|*"L40"*|*"L4"*) has_ada=true ;;
+            *"H100"*|*"H200"*|*"GH200"*) has_hopper=true ;;
+            *"RTX 50"*|*"5090"*|*"5080"*|*"5070"*|*"5060"*|*"PRO "*Blackwell*|*"PRO 4000 Blackwell"*) has_bw_cons=true ;;
+            *"B200"*|*"B100"*|*"GB200"*|*"B40"*|*"RTX 6000 Blackwell"*|*"RTX 5000 Blackwell"*) has_bw_dc=true ;;
         esac
-    done <<< "$gpu_info"
-
-    export DETECTED_RTX20=$has_rtx20
-    export DETECTED_RTX30=$has_rtx30
-    export DETECTED_RTX40=$has_rtx40
-    export DETECTED_RTX50=$has_rtx50
-    export GPU_COUNT=$gpu_count
-
-    log "Detection summary: RTX20=$has_rtx20, RTX30=$has_rtx30, RTX40=$has_rtx40, RTX50=$has_rtx50"
-
-    if test_pytorch_cuda; then
-        log "PyTorch CUDA compatibility confirmed"
-    else
-        log "WARNING: PyTorch CUDA compatibility issues detected"
-    fi
+    done <<< "$info"
+    export DET_TURING=$has_turing DET_AMP80=$has_amp_ga100 DET_AMP86=$has_amp_ga10x DET_AMP87=$has_amp_ga10b
+    export DET_ADA=$has_ada DET_HOPPER=$has_hopper DET_BW12=$has_bw_cons DET_BW10=$has_bw_dc
+    export GPU_COUNT=$n
+    log "Summary: Turing=$has_turing Amp(8.0)=$has_amp_ga100 Amp(8.6)=$has_amp_ga10x Amp(8.7)=$has_amp_ga10b Ada=$has_ada Hopper=$has_hopper Blackwell(12.x)=$has_bw_cons Blackwell(10.0)=$has_bw_dc"
+    test_pytorch_cuda && log "PyTorch CUDA compatibility confirmed" || log "WARNING: PyTorch CUDA compatibility issues detected"
 }
 
-# Function to determine optimal Sage Attention strategy for mixed GPUs
 determine_sage_strategy() {
-    local strategy=""
-    if [ "$DETECTED_RTX20" = "true" ]; then
-        if [ "$DETECTED_RTX30" = "true" ] || [ "$DETECTED_RTX40" = "true" ] || [ "$DETECTED_RTX50" = "true" ]; then
-            strategy="mixed_with_rtx20"
-            log "Mixed GPU setup detected with RTX 20 series - using compatibility mode"
-        else
-            strategy="rtx20_only"
-            log "RTX 20 series only detected"
-        fi
-    elif [ "$DETECTED_RTX50" = "true" ]; then
-        strategy="rtx50_capable"
-        log "RTX 50 series detected - using latest optimizations"
-    elif [ "$DETECTED_RTX40" = "true" ] || [ "$DETECTED_RTX30" = "true" ]; then
-        strategy="rtx30_40_optimized"
-        log "RTX 30/40 series detected - using standard optimizations"
-    else
-        strategy="fallback"
-        log "Unknown or unsupported GPU configuration - using fallback"
-    fi
-    export SAGE_STRATEGY=$strategy
+    local s=""
+    if [ "${DET_TURING:-false}" = "true" ]; then
+        if [ "${DET_AMP80:-false}" = "true" ] || [ "${DET_AMP86:-false}" = "true" ] || [ "${DET_AMP87:-false}" = "true" ] || [ "${DET_ADA:-false}" = "true" ] || [ "${DET_HOPPER:-false}" = "true" ] || [ "${DET_BW12:-false}" = "true" ] || [ "${DET_BW10:-false}" = "true" ]; then
+            s="mixed_with_turing"; log "Mixed rig including Turing - using compatibility mode"
+        else s="turing_only"; log "Turing-only rig detected"; fi
+    elif [ "${DET_BW12:-false}" = "true" ] || [ "${DET_BW10:-false}" = "true" ]; then s="blackwell_capable"; log "Blackwell detected - using latest optimizations"
+    elif [ "${DET_HOPPER:-false}" = "true" ]; then s="hopper_capable"; log "Hopper detected - using modern optimizations"
+    elif [ "${DET_ADA:-false}" = "true" ] || [ "${DET_AMP86:-false}" = "true" ] || [ "${DET_AMP87:-false}" = "true" ] || [ "${DET_AMP80:-false}" = "true" ]; then
+        s="ampere_ada_optimized"; log "Ampere/Ada detected - using standard optimizations"
+    else s="fallback"; log "Unknown configuration - using fallback"; fi
+    export SAGE_STRATEGY=$s
 }
 
-# Function to install appropriate Triton version based on strategy
 install_triton_version() {
     case "$SAGE_STRATEGY" in
-        "mixed_with_rtx20"|"rtx20_only")
-            log "Installing Triton 3.2.0 for broader compatibility on Turing-era GPUs"
-            python -m pip install --user --force-reinstall "triton==3.2.0" || {
-                log "WARNING: Failed to pin Triton 3.2.0, trying latest"
-                python -m pip install --user --force-reinstall triton || true
-            }
+        "mixed_with_turing"|"turing_only")
+            log "Installing Triton 3.2.0 for Turing compatibility"
+            python -m pip install --user --force-reinstall "triton==3.2.0" || python -m pip install --user --force-reinstall triton || true
             ;;
-        "rtx50_capable")
-            log "Installing latest Triton for Blackwell/RTX 50"
-            python -m pip install --user --force-reinstall triton || \
-            python -m pip install --user --force-reinstall --pre triton || {
-                log "WARNING: Latest Triton install failed, falling back to >=3.2.0"
-                python -m pip install --user --force-reinstall "triton>=3.2.0" || true
-            }
+        "blackwell_capable"|"hopper_capable")
+            log "Installing latest Triton for Hopper/Blackwell"
+            python -m pip install --user --force-reinstall triton || python -m pip install --user --force-reinstall --pre triton || python -m pip install --user --force-reinstall "triton>=3.2.0" || true
             ;;
         *)
             log "Installing latest stable Triton"
-            python -m pip install --user --force-reinstall triton || {
-                log "WARNING: Triton installation failed, continuing without"
-                return 1
-            }
+            python -m pip install --user --force-reinstall triton || { log "WARNING: Triton installation failed"; return 1; }
             ;;
     esac
 }
 
-# Function to build Sage Attention with architecture-specific optimizations
 build_sage_attention_mixed() {
-    log "Building Sage Attention for current GPU environment..."
-    mkdir -p "$SAGE_ATTENTION_DIR"
-    cd "$SAGE_ATTENTION_DIR"
+    log "Building Sage Attention..."
+    mkdir -p "$SAGE_ATTENTION_DIR"; cd "$SAGE_ATTENTION_DIR"
 
-    # Compute capability mapping for TORCH_CUDA_ARCH_LIST:
-    # Turing = 7.5, Ampere = 8.6, Ada = 8.9, Blackwell (RTX 50) = 10.0
-    # See NVIDIA Blackwell guide (sm_100/compute_100 ~ 10.0) and PyTorch arch list semantics. [doc refs in text]
-    local cuda_arch_list=""
-    [ "$DETECTED_RTX20" = "true" ] && cuda_arch_list="${cuda_arch_list}7.5;"
-    [ "$DETECTED_RTX30" = "true" ] && cuda_arch_list="${cuda_arch_list}8.6;"
-    [ "$DETECTED_RTX40" = "true" ] && cuda_arch_list="${cuda_arch_list}8.9;"
-    [ "$DETECTED_RTX50" = "true" ] && cuda_arch_list="${cuda_arch_list}10.0;"
-    cuda_arch_list=${cuda_arch_list%;}
-
-    export TORCH_CUDA_ARCH_LIST="$cuda_arch_list"
+    local arch_list="${SAGE_ARCH_LIST_OVERRIDE:-$(compute_arch_list_from_torch)}"
+    if [ -z "$arch_list" ]; then
+        local tmp=""
+        [ "${DET_TURING:-false}" = "true" ] && tmp="${tmp}7.5;"
+        [ "${DET_AMP80:-false}" = "true" ] && tmp="${tmp}8.0;"
+        [ "${DET_AMP86:-false}" = "true" ] && tmp="${tmp}8.6;"
+        [ "${DET_AMP87:-false}" = "true" ] && tmp="${tmp}8.7;"
+        [ "${DET_ADA:-false}" = "true" ] && tmp="${tmp}8.9;"
+        [ "${DET_HOPPER:-false}" = "true" ] && tmp="${tmp}9.0;"
+        [ "${DET_BW10:-false}" = "true" ] && tmp="${tmp}10.0;"
+        [ "${DET_BW12:-false}" = "true" ] && tmp="${tmp}12.0;"
+        arch_list="${tmp%;}"
+    fi
+    export TORCH_CUDA_ARCH_LIST="$arch_list"
     log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
 
     case "$SAGE_STRATEGY" in
-        "mixed_with_rtx20"|"rtx20_only")
-            log "Cloning SageAttention v1.0 for RTX 20 series compatibility"
-            if [ -d "SageAttention/.git" ]; then
-                cd SageAttention
-                git fetch --depth 1 origin || return 1
-                git checkout v1.0 2>/dev/null || git checkout -b v1.0 origin/v1.0 || return 1
-                git reset --hard origin/v1.0 || return 1
-            else
-                rm -rf SageAttention
-                git clone --depth 1 https://github.com/thu-ml/SageAttention.git -b v1.0 || return 1
-                cd SageAttention
-            fi
+        "mixed_with_turing"|"turing_only")
+            log "Cloning SageAttention v1.0 for Turing"
+            if [ -d "SageAttention/.git" ]; then cd SageAttention; git fetch --depth 1 origin || return 1; git checkout v1.0 2>/dev/null || git checkout -b v1.0 origin/v1.0 || return 1; git reset --hard origin/v1.0 || return 1
+            else rm -rf SageAttention; git clone --depth 1 https://github.com/thu-ml/SageAttention.git -b v1.0 || return 1; cd SageAttention; fi
             ;;
         *)
-            log "Cloning latest SageAttention for modern GPUs"
-            if [ -d "SageAttention/.git" ]; then
-                cd SageAttention
-                git fetch --depth 1 origin || return 1
-                git reset --hard origin/main || return 1
-            else
-                rm -rf SageAttention
-                git clone --depth 1 https://github.com/thu-ml/SageAttention.git || return 1
-                cd SageAttention
-            fi
+            log "Cloning latest SageAttention"
+            if [ -d "SageAttention/.git" ]; then cd SageAttention; git fetch --depth 1 origin || return 1; git reset --hard origin/main || return 1
+            else rm -rf SageAttention; git clone --depth 1 https://github.com/thu-ml/SageAttention.git || return 1; cd SageAttention; fi
             ;;
     esac
 
-    log "Building SageAttention (no-build-isolation) ..."
-    if MAX_JOBS=$(nproc) python -m pip install --user --no-build-isolation .; then
-        echo "$SAGE_STRATEGY" > "$SAGE_ATTENTION_BUILT_FLAG"
-        log "SageAttention built successfully for strategy: $SAGE_STRATEGY"
-        cd "$BASE_DIR"
-        return 0
+    [ "${SAGE_VERBOSE_BUILD:-0}" = "1" ] && export TORCH_CPP_BUILD_VERBOSE=1
+    local jobs; jobs="$(decide_build_jobs)"
+    log "Using MAX_JOBS=${jobs} for SageAttention build"
+
+    # Set MAX_JOBS only for this build call to avoid leaking globally
+    if MAX_JOBS="${jobs}" python -m pip install --user --no-build-isolation .; then
+        echo "$SAGE_STRATEGY|$TORCH_CUDA_ARCH_LIST" > "$SAGE_ATTENTION_BUILT_FLAG"
+        log "SageAttention built successfully"
+        cd "$BASE_DIR"; return 0
     else
         log "ERROR: SageAttention build failed"
-        cd "$BASE_DIR"
-        return 1
+        cd "$BASE_DIR"; return 1
     fi
 }
 
-# Function to check if current build matches detected GPUs
 needs_rebuild() {
-    if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then
-        return 0
-    fi
-    local built_strategy
-    built_strategy=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "unknown")
-    if [ "$built_strategy" != "$SAGE_STRATEGY" ]; then
-        log "GPU configuration changed (was: $built_strategy, now: $SAGE_STRATEGY) - rebuild needed"
-        return 0
-    fi
+    if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then return 0; fi
+    local x; x=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "")
+    local prev_strategy="${x%%|*}"; local prev_arch="${x#*|}"
+    if [ "$prev_strategy" != "$SAGE_STRATEGY" ] || [ "$prev_arch" != "$TORCH_CUDA_ARCH_LIST" ]; then return 0; fi
     return 1
 }
 
-# Function to check if SageAttention is working
 test_sage_attention() {
     python -c "
 import sys
 try:
-    import sageattention
-    print('[TEST] SageAttention import: SUCCESS')
-    try:
-        v = getattr(sageattention, '__version__', None)
-        if v: print(f'[TEST] Version: {v}')
-    except:
-        pass
-    sys.exit(0)
+    import sageattention; print('[TEST] SageAttention import: SUCCESS')
+    v=getattr(sageattention,'__version__',None)
+    if v: print(f'[TEST] Version: {v}'); sys.exit(0)
 except ImportError as e:
-    print(f'[TEST] SageAttention import: FAILED - {e}')
-    sys.exit(1)
+    print(f'[TEST] SageAttention import: FAILED - {e}'); sys.exit(1)
 except Exception as e:
-    print(f'[TEST] SageAttention test: ERROR - {e}')
-    sys.exit(1)
+    print(f'[TEST] SageAttention test: ERROR - {e}'); sys.exit(1)
 " 2>/dev/null
 }
 
-# Main GPU detection and SageAttention setup
 setup_sage_attention() {
-    # Export build-visible status flags
-    export SAGE_ATTENTION_BUILT=0
-    export SAGE_ATTENTION_AVAILABLE=0
-
-    if ! detect_gpu_generations; then
-        log "No GPUs detected, skipping SageAttention setup"
-        return 0
-    fi
-
+    export SAGE_ATTENTION_BUILT=0 SAGE_ATTENTION_AVAILABLE=0
+    if ! detect_gpu_generations; then log "No GPUs detected, skipping SageAttention setup"; return 0; fi
     determine_sage_strategy
 
+    # Resolve arch list early
+    export TORCH_CUDA_ARCH_LIST="${SAGE_ARCH_LIST_OVERRIDE:-$(compute_arch_list_from_torch)}"
+    if [ -z "$TORCH_CUDA_ARCH_LIST" ]; then
+        local tmp=""
+        [ "${DET_TURING:-false}" = "true" ] && tmp="${tmp}7.5;"
+        [ "${DET_AMP80:-false}" = "true" ] && tmp="${tmp}8.0;"
+        [ "${DET_AMP86:-false}" = "true" ] && tmp="${tmp}8.6;"
+        [ "${DET_AMP87:-false}" = "true" ] && tmp="${tmp}8.7;"
+        [ "${DET_ADA:-false}" = "true" ] && tmp="${tmp}8.9;"
+        [ "${DET_HOPPER:-false}" = "true" ] && tmp="${tmp}9.0;"
+        [ "${DET_BW10:-false}" = "true" ] && tmp="${tmp}10.0;"
+        [ "${DET_BW12:-false}" = "true" ] && tmp="${tmp}12.0;"
+        export TORCH_CUDA_ARCH_LIST="${tmp%;}"
+    fi
+    log "Resolved TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
+
     if needs_rebuild || ! test_sage_attention; then
         log "Building SageAttention..."
         if install_triton_version && build_sage_attention_mixed && test_sage_attention; then
-            export SAGE_ATTENTION_BUILT=1
-            export SAGE_ATTENTION_AVAILABLE=1
+            export SAGE_ATTENTION_BUILT=1 SAGE_ATTENTION_AVAILABLE=1
             log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable it at startup"
         else
-            export SAGE_ATTENTION_BUILT=0
-            export SAGE_ATTENTION_AVAILABLE=0
+            export SAGE_ATTENTION_BUILT=0 SAGE_ATTENTION_AVAILABLE=0
             log "WARNING: SageAttention is not available after build attempt"
         fi
     else
-        export SAGE_ATTENTION_BUILT=1
-        export SAGE_ATTENTION_AVAILABLE=1
-        log "SageAttention already built and importable for current GPU configuration"
+        export SAGE_ATTENTION_BUILT=1 SAGE_ATTENTION_AVAILABLE=1
+        log "SageAttention already built and importable"
     fi
 }
 
-# If running as root, handle permissions and user mapping
+# --- root to runtime user ---
 if [ "$(id -u)" = "0" ]; then
     if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then
         log "Setting up user permissions..."
         if getent group "${PGID}" >/dev/null; then
-            EXISTING_GRP="$(getent group "${PGID}" | cut -d: -f1)"
-            usermod -g "${EXISTING_GRP}" "${APP_USER}" || true
-            APP_GROUP="${EXISTING_GRP}"
-        else
-            groupmod -o -g "${PGID}" "${APP_GROUP}" || true
-        fi
+            EXISTING_GRP="$(getent group "${PGID}" | cut -d: -f1)"; usermod -g "${EXISTING_GRP}" "${APP_USER}" || true; APP_GROUP="${EXISTING_GRP}"
+        else groupmod -o -g "${PGID}" "${APP_GROUP}" || true; fi
         usermod -o -u "${PUID}" "${APP_USER}" || true
         mkdir -p "/home/${APP_USER}"
-        for d in "$BASE_DIR" "/home/$APP_USER"; do
-            [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true
-        done
+        for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; done
 
         readarray -t PY_PATHS < <(python - <<'PY'
 import sys, sysconfig, os, datetime
-def log(msg):
-    import datetime
-    ts = datetime.datetime.now().strftime("%H:%M:%S")
-    print(f"[bootstrap:python {ts}] {msg}", file=sys.stderr, flush=True)
+def log(m): print(f"[bootstrap:python {datetime.datetime.now().strftime('%H:%M:%S')}] {m}", file=sys.stderr, flush=True)
 log("Determining writable Python install targets via sysconfig.get_paths()")
-keys = ("purelib","platlib","scripts","include","platinclude","data")
-paths = sysconfig.get_paths()
-for k in keys:
-    v = paths.get(k)
-    if v:
-        print(v)
-        log(f"emit {k} -> {v}")
-d = paths.get("data")
+for k in ("purelib","platlib","scripts","include","platinclude","data"):
+    v = sysconfig.get_paths().get(k)
+    if v: print(v); log(f"emit {k} -> {v}")
+d = sysconfig.get_paths().get("data")
 if d:
-    share = os.path.join(d, "share")
-    man1 = os.path.join(share, "man", "man1")
-    print(share)
-    print(man1)
-    log(f"emit wheel data dirs -> {share}, {man1}")
-log("Finished emitting target directories")
+    share=os.path.join(d,"share"); man1=os.path.join(share,"man","man1")
+    print(share); print(man1); log(f"emit wheel data dirs -> {share}, {man1}")
 PY
 )
         for d in "${PY_PATHS[@]}"; do
             case "$d" in
-                /usr/local|/usr/local/*)
-                    mkdir -p "$d" || true
-                    chown -R "${APP_USER}:${APP_GROUP}" "$d" || true
-                    chmod -R u+rwX,g+rwX "$d" || true
-                    ;;
+                /usr/local|/usr/local/*) mkdir -p "$d" || true; chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; chmod -R u+rwX,g+rwX "$d" || true ;;
                 *) : ;;
             esac
         done
-        touch "$PERMISSIONS_SET_FLAG"
-        chown "${APP_USER}:${APP_GROUP}" "$PERMISSIONS_SET_FLAG"
+        touch "$PERMISSIONS_SET_FLAG"; chown "${APP_USER}:${APP_GROUP}" "$PERMISSIONS_SET_FLAG"
         log "User permissions configured"
-    else
-        log "User permissions already configured, skipping..."
-    fi
+    else log "User permissions already configured, skipping..."; fi
     exec runuser -u "${APP_USER}" -- "$0" "$@"
 fi
 
-# Setup SageAttention for detected GPU configuration
+# --- SageAttention setup ---
 setup_sage_attention
 
-# Ensure ComfyUI-Manager exists or update it (shallow)
+# --- ComfyUI-Manager sync ---
 if [ -d "$CUSTOM_NODES_DIR/ComfyUI-Manager/.git" ]; then
-    log "Updating ComfyUI-Manager in $CUSTOM_NODES_DIR/ComfyUI-Manager"
-    git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" fetch --depth 1 origin || true
-    git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" reset --hard origin/HEAD || true
-    git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" clean -fdx || true
+    log "Updating ComfyUI-Manager"; git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" fetch --depth 1 origin || true
+    git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" reset --hard origin/HEAD || true; git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" clean -fdx || true
 elif [ ! -d "$CUSTOM_NODES_DIR/ComfyUI-Manager" ]; then
-    log "Installing ComfyUI-Manager into $CUSTOM_NODES_DIR/ComfyUI-Manager"
-    git clone --depth 1 https://github.com/ltdrdata/ComfyUI-Manager.git "$CUSTOM_NODES_DIR/ComfyUI-Manager" || true
+    log "Installing ComfyUI-Manager"; git clone --depth 1 https://github.com/ltdrdata/ComfyUI-Manager.git "$CUSTOM_NODES_DIR/ComfyUI-Manager" || true
 fi
 
-# User-site PATHs for --user installs (custom nodes)
+# --- PATH/PYTHONPATH  ---
 export PATH="$HOME/.local/bin:$PATH"
 pyver="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
 export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}"
 
-# First-run driven auto-install of custom node deps
+# --- first-run install of custom_nodes ---
 if [ ! -f "$FIRST_RUN_FLAG" ] || [ "${COMFY_FORCE_INSTALL:-0}" = "1" ]; then
     if [ "${COMFY_AUTO_INSTALL:-1}" = "1" ]; then
-        log "First run detected or forced; scanning custom nodes for requirements..."
-        # requirements*.txt
+        log "First run or forced; installing custom node dependencies..."
         while IFS= read -r -d '' req; do
             log "python -m pip install --user --upgrade -r $req"
             python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$req" || true
         done < <(find "$CUSTOM_NODES_DIR" -maxdepth 3 -type f \( -iname 'requirements.txt' -o -iname 'requirements-*.txt' -o -path '*/requirements/*.txt' \) -print0)
-
-        # pyproject.toml (exclude ComfyUI-Manager)
         while IFS= read -r -d '' pjt; do
-            d="$(dirname "$pjt")"
-            log "python -m pip install --user . in $d"
+            d="$(dirname "$pjt")"; log "python -m pip install --user . in $d"
             (cd "$d" && python -m pip install --no-cache-dir --user .) || true
         done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0)
-
         python -m pip check || true
-    else
-        log "COMFY_AUTO_INSTALL=0; skipping dependency install on first run"
-    fi
+    else log "COMFY_AUTO_INSTALL=0; skipping dependency install"; fi
     touch "$FIRST_RUN_FLAG"
 else
     log "Not first run; skipping custom_nodes dependency install"
 fi
 
-# Build ComfyUI command with SageAttention usage controlled only by FORCE_SAGE_ATTENTION
+# --- launch ComfyUI ---
 COMFYUI_ARGS=""
 if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ]; then
-    if test_sage_attention; then
-        COMFYUI_ARGS="--use-sage-attention"
-        log "Starting ComfyUI with SageAttention enabled by environment (FORCE_SAGE_ATTENTION=1)"
-    else
-        log "WARNING: FORCE_SAGE_ATTENTION=1 but SageAttention import failed; starting without"
-    fi
+    if test_sage_attention; then COMFYUI_ARGS="--use-sage-attention"; log "Starting ComfyUI with SageAttention (FORCE_SAGE_ATTENTION=1)"
+    else log "WARNING: FORCE_SAGE_ATTENTION=1 but import failed; starting without"; fi
 else
-    if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then
-        log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable it at startup"
-    else
-        log "SageAttention not available; starting without it"
-    fi
+    if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then log "SageAttention is built; set FORCE_SAGE_ATTENTION=1 to enable"
+    else log "SageAttention not available; starting without it"; fi
 fi
 
 cd "$BASE_DIR"
-
-# Handle both direct execution and passed arguments
-if [ $# -eq 0 ]; then
-    exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS
+if [ $# -eq 0 ]; then exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS
 else
-    if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then
-        shift 2
-        exec python main.py $COMFYUI_ARGS "$@"
-    else
-        exec "$@"
-    fi
+    if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then shift 2; exec python main.py $COMFYUI_ARGS "$@"
+    else exec "$@"; fi
 fi