mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-15 13:02:35 +08:00
perf(entrypoint): probe GPUs first, log count/CC, exit early; unify installs as system-wide
Move a torch.cuda-based GPU probe to the top of the entrypoint, logging device count and compute capabilities and exiting immediately when no compatible GPU is found. Remove pip --user usage and PIP_USER so all runtime installs are system-wide (enabled by early chown of site-packages), avoiding uv’s lack of --user support while honoring the “no venvs” constraint. Keep Triton re-pin only when Turing strategy is detected; otherwise re-use baked Triton. Preserve SageAttention runtime build and Manager update behavior.
This commit is contained in:
parent
c8d47b2560
commit
03908b9b04
@ -88,24 +88,34 @@ print(f"TORCH_CUDA_ARCH_LIST='{arch_list}'")
|
||||
for k,v in flags.items():
|
||||
print(f"{k}={'true' if v else 'false'}")
|
||||
print(f"SAGE_STRATEGY='{strategy}'")
|
||||
print(f"[GPU] Found {n} CUDA device(s); CC list: {arch_list or 'none'}; strategy={strategy}; compat>={7.5}:{compat}", file=sys.stderr)
|
||||
print(f"[GPU] Found {n} CUDA device(s); CC list: {arch_list or 'none'}; strategy={strategy}; compat>=7.5:{compat}", file=sys.stderr)
|
||||
PY
|
||||
}
|
||||
|
||||
# --- install triton versions based on strategy ---
|
||||
# --- Triton management (conditional, system-wide) ---
|
||||
install_triton_version() {
|
||||
# Query existing version; only change if strategy truly requires
|
||||
local cur=""
|
||||
cur="$(python - <<'PY' 2>/dev/null || true
|
||||
try:
|
||||
import importlib.metadata as md
|
||||
print(md.version("triton"))
|
||||
except Exception:
|
||||
pass
|
||||
PY
|
||||
)"
|
||||
case "${SAGE_STRATEGY:-fallback}" in
|
||||
"mixed_with_turing"|"turing_only")
|
||||
log "Installing Triton 3.2.0 for Turing compatibility"
|
||||
python -m pip install --user --force-reinstall "triton==3.2.0" || python -m pip install --user --force-reinstall triton || true
|
||||
;;
|
||||
"blackwell_capable"|"hopper_capable")
|
||||
log "Installing latest Triton for Hopper/Blackwell"
|
||||
python -m pip install --user --force-reinstall triton || python -m pip install --user --force-reinstall --pre triton || python -m pip install --user --force-reinstall "triton>=3.2.0" || true
|
||||
if [ "$cur" != "3.2.0" ]; then
|
||||
log "Installing Triton 3.2.0 for Turing compatibility (current: ${cur:-none})"
|
||||
python -m pip install --no-cache-dir "triton==3.2.0" || true
|
||||
else
|
||||
log "Triton 3.2.0 already present; skipping"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
log "Installing latest stable Triton"
|
||||
python -m pip install --user --force-reinstall triton || { log "WARNING: Triton installation failed"; return 1; }
|
||||
# Image bakes Triton==3.4.0; leave as-is
|
||||
log "Using baked Triton (${cur:-unknown}); no change"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
@ -136,7 +146,7 @@ build_sage_attention_mixed() {
|
||||
local jobs; jobs="$(decide_build_jobs)"
|
||||
log "Using MAX_JOBS=${jobs} for SageAttention build"
|
||||
|
||||
if MAX_JOBS="${jobs}" python -m pip install --user --no-build-isolation .; then
|
||||
if MAX_JOBS="${jobs}" python -m pip install --no-build-isolation .; then
|
||||
echo "${SAGE_STRATEGY:-fallback}|${TORCH_CUDA_ARCH_LIST:-}" > "$SAGE_ATTENTION_BUILT_FLAG"
|
||||
log "SageAttention built successfully"
|
||||
cd "$BASE_DIR"; return 0
|
||||
@ -187,6 +197,18 @@ setup_sage_attention() {
|
||||
fi
|
||||
}
|
||||
|
||||
# --- early GPU probe and exit (before heavy setup) ---
|
||||
eval "$(probe_and_prepare_gpu)"
|
||||
log "GPU probe: ${GPU_COUNT:-0} CUDA device(s); CC list: ${TORCH_CUDA_ARCH_LIST:-none}; strategy=${SAGE_STRATEGY:-fallback}"
|
||||
if [ "${GPU_COUNT:-0}" -eq 0 ]; then
|
||||
log "No NVIDIA GPU detected; shutting down."
|
||||
exit 0
|
||||
fi
|
||||
if [ "${COMPAT_GE_75:-0}" -ne 1 ]; then
|
||||
log "GPU compute capability < 7.5; shutting down."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- root to runtime user ---
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
if [ ! -f "$PERMISSIONS_SET_FLAG" ]; then
|
||||
@ -198,6 +220,7 @@ if [ "$(id -u)" = "0" ]; then
|
||||
mkdir -p "/home/${APP_USER}"
|
||||
for d in "$BASE_DIR" "/home/$APP_USER"; do [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true; done
|
||||
|
||||
# Make system site-packages writable by the runtime user (no venvs; system-wide installs)
|
||||
readarray -t PY_PATHS < <(python - <<'PY'
|
||||
import sys, sysconfig, os, site, datetime
|
||||
def log(m): print(f"[bootstrap:python {datetime.datetime.now().strftime('%H:%M:%S')}] {m}", file=sys.stderr, flush=True)
|
||||
@ -246,29 +269,24 @@ fi
|
||||
export PATH="$HOME/.local/bin:$PATH"
|
||||
pyver="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
|
||||
export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}"
|
||||
export PIP_USER=1
|
||||
export PIP_PREFER_BINARY=1
|
||||
|
||||
# --- single GPU probe + early exit ---
|
||||
# --- refresh GPU probe after user switch (no exit) ---
|
||||
eval "$(probe_and_prepare_gpu)"
|
||||
if [ "${GPU_COUNT:-0}" -eq 0 ] || [ "${COMPAT_GE_75:-0}" -ne 1 ]; then
|
||||
log "No compatible NVIDIA GPU (compute capability >= 7.5) detected; shutting down."
|
||||
exit 0
|
||||
fi
|
||||
log "GPU probe (post-switch): ${GPU_COUNT:-0} CUDA device(s); CC list: ${TORCH_CUDA_ARCH_LIST:-none}; strategy=${SAGE_STRATEGY:-fallback}"
|
||||
|
||||
# --- Ensure package manager and Manager deps are available ---
|
||||
# Ensure python -m pip works (bootstrap if needed)
|
||||
# Ensure pip works
|
||||
python -m pip --version >/dev/null 2>&1 || python -m ensurepip --upgrade >/dev/null 2>&1 || true
|
||||
python -m pip --version >/dev/null 2>&1 || log "WARNING: pip still not available after ensurepip"
|
||||
|
||||
# Ensure ComfyUI-Manager minimal Python deps
|
||||
python - <<'PY' || python -m pip install --no-cache-dir --user toml || true
|
||||
# Ensure minimal Python deps for ComfyUI-Manager (pre-baked, but verify)
|
||||
python - <<'PY' || python -m pip install --no-cache-dir toml GitPython || true
|
||||
import sys
|
||||
try:
|
||||
import toml # noqa
|
||||
sys.exit(0)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
import importlib
|
||||
for m in ("toml","git"):
|
||||
try: importlib.import_module(m)
|
||||
except Exception: sys.exit(1)
|
||||
sys.exit(0)
|
||||
PY
|
||||
|
||||
# --- SageAttention setup using probed data ---
|
||||
@ -296,7 +314,7 @@ if [ ! -f "$FIRST_RUN_FLAG" ] || [ "${COMFY_FORCE_INSTALL:-0}" = "1" ]; then
|
||||
[ "$base" = "ComfyUI-Manager" ] && continue
|
||||
if [ -f "$d/requirements.txt" ]; then
|
||||
log "Installing requirements for node: $base"
|
||||
python -m pip install --no-cache-dir --user --upgrade --upgrade-strategy only-if-needed -r "$d/requirements.txt" || true
|
||||
python -m pip install --no-cache-dir --upgrade --upgrade-strategy only-if-needed -r "$d/requirements.txt" || true
|
||||
fi
|
||||
if [ -f "$d/install.py" ]; then
|
||||
log "Running install.py for node: $base"
|
||||
@ -313,25 +331,6 @@ else
|
||||
log "Not first run; skipping custom_nodes dependency install"
|
||||
fi
|
||||
|
||||
# --- Ensure ONNX Runtime has CUDA provider (GPU) ---
|
||||
python - <<'PY' ||
|
||||
import sys
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
ok = "CUDAExecutionProvider" in ort.get_available_providers()
|
||||
sys.exit(0 if ok else 1)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
PY
|
||||
log "Installing onnxruntime-gpu for CUDAExecutionProvider..."
|
||||
python -m pip uninstall -y onnxruntime || true
|
||||
python -m pip install --no-cache-dir --user "onnxruntime-gpu>=1.19" || true
|
||||
python - <<'P2' || log "WARNING: ONNX Runtime CUDA provider not available after installation"
|
||||
import onnxruntime as ort, sys
|
||||
print("ORT providers:", ort.get_available_providers())
|
||||
sys.exit(0 if "CUDAExecutionProvider" in ort.get_available_providers() else 1)
|
||||
P2
|
||||
|
||||
# --- launch ComfyUI ---
|
||||
COMFYUI_ARGS=""
|
||||
if [ "${FORCE_SAGE_ATTENTION:-0}" = "1" ] && test_sage_attention; then
|
||||
|
||||
Loading…
Reference in New Issue
Block a user