From 03908b9b047dcb6f42fe23690de0dc8ba9502ee1 Mon Sep 17 00:00:00 2001 From: clsferguson <48876201+clsferguson@users.noreply.github.com> Date: Wed, 1 Oct 2025 21:25:12 -0600 Subject: [PATCH] perf(entrypoint): probe GPUs first, log count/CC, exit early; unify installs as system-wide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move a torch.cuda-based GPU probe to the top of the entrypoint, logging device count and compute capabilities and exiting immediately when no compatible GPU is found. Remove pip --user usage and PIP_USER so all runtime installs are system-wide (enabled by early chown of site-packages), avoiding uv’s lack of --user support while honoring the “no venvs” constraint. Keep Triton re-pin only when Turing strategy is detected; otherwise re-use baked Triton. Preserve SageAttention runtime build and Manager update behavior. --- entrypoint.sh | 91 +++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/entrypoint.sh b/entrypoint.sh index db3e70dbf..dcf0b2972 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -88,24 +88,34 @@ print(f"TORCH_CUDA_ARCH_LIST='{arch_list}'") for k,v in flags.items(): print(f"{k}={'true' if v else 'false'}") print(f"SAGE_STRATEGY='{strategy}'") -print(f"[GPU] Found {n} CUDA device(s); CC list: {arch_list or 'none'}; strategy={strategy}; compat>={7.5}:{compat}", file=sys.stderr) +print(f"[GPU] Found {n} CUDA device(s); CC list: {arch_list or 'none'}; strategy={strategy}; compat>=7.5:{compat}", file=sys.stderr) PY } -# --- install triton versions based on strategy --- +# --- Triton management (conditional, system-wide) --- install_triton_version() { + # Query existing version; only change if strategy truly requires + local cur="" + cur="$(python - <<'PY' 2>/dev/null || true +try: + import importlib.metadata as md + print(md.version("triton")) +except Exception: + pass +PY +)" case "${SAGE_STRATEGY:-fallback}" in "mixed_with_turing"|"turing_only") - log "Installing Triton 3.2.0 for Turing compatibility" - python -m pip install --user --force-reinstall "triton==3.2.0" || python -m pip install --user --force-reinstall triton || true - ;; - "blackwell_capable"|"hopper_capable") - log "Installing latest Triton for Hopper/Blackwell" - python -m pip install --user --force-reinstall triton || python -m pip install --user --force-reinstall --pre triton || python -m pip install --user --force-reinstall "triton>=3.2.0" || true + if [ "$cur" != "3.2.0" ]; then + log "Installing Triton 3.2.0 for Turing compatibility (current: ${cur:-none})" + python -m pip install --no-cache-dir "triton==3.2.0" || true + else + log "Triton 3.2.0 already present; skipping" + fi ;; *) - log "Installing latest stable Triton" - python -m pip install --user --force-reinstall triton || { log "WARNING: Triton installation failed"; return 1; } + # Image bakes Triton==3.4.0; leave as-is + log "Using baked Triton (${cur:-unknown}); no change" ;; esac } @@ -136,7 +146,7 @@ build_sage_attention_mixed() { local jobs; jobs="$(decide_build_jobs)" log "Using MAX_JOBS=${jobs} for SageAttention build" - if MAX_JOBS="${jobs}" python -m pip install --user --no-build-isolation .; then + if MAX_JOBS="${jobs}" python -m pip install --no-build-isolation .; then echo "${SAGE_STRATEGY:-fallback}|${TORCH_CUDA_ARCH_LIST:-}" > "$SAGE_ATTENTION_BUILT_FLAG" log "SageAttention built successfully" cd "$BASE_DIR"; return 0 @@ -187,6 +197,18 @@ setup_sage_attention() { fi } +# --- early GPU probe and exit (before heavy setup) --- +eval "$(probe_and_prepare_gpu)" +log "GPU probe: ${GPU_COUNT:-0} CUDA device(s); CC list: ${TORCH_CUDA_ARCH_LIST:-none}; strategy=${SAGE_STRATEGY:-fallback}" +if [ "${GPU_COUNT:-0}" -eq 0 ]; then + log "No NVIDIA GPU detected; shutting down." + exit 0 +fi +if [ "${COMPAT_GE_75:-0}" -ne 1 ]; then + log "GPU compute capability < 7.5; shutting down." + exit 0 +fi + # --- root to runtime user --- if [ "$(id -u)" = "0" ]; then if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then @@ -198,6 +220,7 @@ if [ "$(id -u)" = "0" ]; then mkdir -p "/home/${APP_USER}" for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; done + # Make system site-packages writable by the runtime user (no venvs; system-wide installs) readarray -t PY_PATHS < <(python - <<'PY' import sys, sysconfig, os, site, datetime def log(m): print(f"[bootstrap:python {datetime.datetime.now().strftime('%H:%M:%S')}] {m}", file=sys.stderr, flush=True) @@ -246,29 +269,24 @@ fi export PATH="$HOME/.local/bin:$PATH" pyver="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')" export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}" -export PIP_USER=1 export PIP_PREFER_BINARY=1 -# --- single GPU probe + early exit --- +# --- refresh GPU probe after user switch (no exit) --- eval "$(probe_and_prepare_gpu)" -if [ "${GPU_COUNT:-0}" -eq 0 ] || [ "${COMPAT_GE_75:-0}" -ne 1 ]; then - log "No compatible NVIDIA GPU (compute capability >= 7.5) detected; shutting down." - exit 0 -fi +log "GPU probe (post-switch): ${GPU_COUNT:-0} CUDA device(s); CC list: ${TORCH_CUDA_ARCH_LIST:-none}; strategy=${SAGE_STRATEGY:-fallback}" -# --- Ensure package manager and Manager deps are available --- -# Ensure python -m pip works (bootstrap if needed) +# Ensure pip works python -m pip --version >/dev/null 2>&1 || python -m ensurepip --upgrade >/dev/null 2>&1 || true python -m pip --version >/dev/null 2>&1 || log "WARNING: pip still not available after ensurepip" -# Ensure ComfyUI-Manager minimal Python deps -python - <<'PY' || python -m pip install --no-cache-dir --user toml || true +# Ensure minimal Python deps for ComfyUI-Manager (pre-baked, but verify) +python - <<'PY' || python -m pip install --no-cache-dir toml GitPython || true import sys -try: - import toml # noqa - sys.exit(0) -except Exception: - sys.exit(1) +import importlib +for m in ("toml","git"): + try: importlib.import_module(m) + except Exception: sys.exit(1) +sys.exit(0) PY # --- SageAttention setup using probed data --- @@ -296,7 +314,7 @@ if [ ! -f "$FIRST_RUN_FLAG" ] || [ "${COMFY_FORCE_INSTALL:-0}" = "1" ]; then [ "$base" = "ComfyUI-Manager" ] && continue if [ -f "$d/requirements.txt" ]; then log "Installing requirements for node: $base" - python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$d/requirements.txt" || true + python -m pip install --no-cache-dir --upgrade --upgrade-strategy only-if-needed -r "$d/requirements.txt" || true fi if [ -f "$d/install.py" ]; then log "Running install.py for node: $base" @@ -313,25 +331,6 @@ else log "Not first run; skipping custom_nodes dependency install" fi -# --- Ensure ONNX Runtime has CUDA provider (GPU) --- -python - <<'PY' || -import sys -try: - import onnxruntime as ort - ok = "CUDAExecutionProvider" in ort.get_available_providers() - sys.exit(0 if ok else 1) -except Exception: - sys.exit(1) -PY - log "Installing onnxruntime-gpu for CUDAExecutionProvider..." - python -m pip uninstall -y onnxruntime || true - python -m pip install --no-cache-dir --user "onnxruntime-gpu>=1.19" || true - python - <<'P2' || log "WARNING: ONNX Runtime CUDA provider not available after installation" -import onnxruntime as ort, sys -print("ORT providers:", ort.get_available_providers()) -sys.exit(0 if "CUDAExecutionProvider" in ort.get_available_providers() else 1) -P2 - # --- launch ComfyUI --- COMFYUI_ARGS="" if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ] && test_sage_attention; then