mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-15 13:02:35 +08:00
fix(entrypoint): probe GPU once at startup before permissions; exit fast if not compatible
- Add early GPU probe that first tries nvidia-smi, then torch, with compute capability >= 7.5 gating; write a pass flag to avoid reprobe; exit 42 otherwise to prevent unnecessary work. - Move GPU detection before any user/permission operations to stop repeated permission logs on restarts. - Replace bracketed Markdown URLs with plain URLs in git commands.
This commit is contained in:
parent
249ff20e20
commit
db5ae38c11
@ -12,6 +12,7 @@ SAGE_ATTENTION_DIR="$BASE_DIR/.sage_attention"
|
||||
SAGE_ATTENTION_BUILT_FLAG="$SAGE_ATTENTION_DIR/.built"
|
||||
PERMISSIONS_SET_FLAG="$BASE_DIR/.permissions_set"
|
||||
FIRST_RUN_FLAG="$BASE_DIR/.first_run_done"
|
||||
GPU_CHECK_FLAG="$BASE_DIR/.gpu_check_passed"
|
||||
|
||||
# --- logging ---
|
||||
log() { echo "[$(date '+%H:%M:%S')] $1"; }
|
||||
@ -19,12 +20,74 @@ log() { echo "[$(date '+%H:%M:%S')] $1"; }
|
||||
# Make newly created files group-writable (helps in shared volumes)
|
||||
umask 0002
|
||||
|
||||
# --- early GPU probe (single pass; exit if not OK) ---
|
||||
early_gpu_probe() {
|
||||
# Skip re-probe if cached OK
|
||||
if [ -f "$GPU_CHECK_FLAG" ]; then
|
||||
log "GPU check previously passed; skipping reprobe"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 1) Prefer nvidia-smi if present (fails when driver/device not exposed)
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||
if nvidia-smi -L >/dev/null 2>&1; then
|
||||
# Optional: check compute capability via nvidia-smi if available
|
||||
cc_ok=1
|
||||
if nvidia-smi --query-gpu=compute_cap --format=csv,noheader >/dev/null 2>&1; then
|
||||
# compute_cap is like "8.9", "9.0"
|
||||
while IFS= read -r cap; do
|
||||
maj="${cap%%.*}"; min="${cap#*.}"
|
||||
[ -z "$maj" ] && continue
|
||||
val=$((10*${maj:-0}+${min:-0}))
|
||||
if [ "$val" -lt 75 ]; then cc_ok=0; fi
|
||||
done < <(nvidia-smi --query-gpu=compute_cap --format=csv,noheader || true)
|
||||
fi
|
||||
if [ "$cc_ok" -eq 1 ]; then
|
||||
touch "$GPU_CHECK_FLAG"
|
||||
log "GPU check passed via nvidia-smi"
|
||||
return 0
|
||||
else
|
||||
log "Incompatible GPU compute capability detected (< 7.5); exiting"
|
||||
return 2
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# 2) Fallback: torch probe (does not need a GPU to import)
|
||||
if python - <<'PY'
|
||||
import sys
|
||||
try:
|
||||
import torch
|
||||
sys.exit(0 if torch.cuda.is_available() else 3)
|
||||
except Exception:
|
||||
sys.exit(4)
|
||||
PY
|
||||
then
|
||||
touch "$GPU_CHECK_FLAG"
|
||||
log "GPU check passed via torch probe"
|
||||
return 0
|
||||
else
|
||||
rc=$?
|
||||
if [ $rc -eq 3 ]; then
|
||||
log "torch reports CUDA unavailable; exiting"
|
||||
else
|
||||
log "torch probe failed to import or run; exiting"
|
||||
fi
|
||||
return 5
|
||||
fi
|
||||
}
|
||||
|
||||
# Run early GPU probe before any user/permission work
|
||||
if ! early_gpu_probe; then
|
||||
log "No compatible NVIDIA GPU detected or driver not visible to container; exiting."
|
||||
exit 42
|
||||
fi
|
||||
|
||||
# --- build parallelism (single knob) ---
|
||||
decide_build_jobs() {
|
||||
if [ -n "${SAGE_MAX_JOBS:-}" ]; then echo "$SAGE_MAX_JOBS"; return; fi
|
||||
local mem_kb
|
||||
local mem_kb cpu cap jobs
|
||||
mem_kb=$(awk '/MemTotal:/ {print $2}' /proc/meminfo 2>/dev/null || echo 0)
|
||||
local cpu cap jobs
|
||||
cpu=$(nproc) cap=24
|
||||
if [ "$mem_kb" -le $((8*1024*1024)) ]; then jobs=2
|
||||
elif [ "$mem_kb" -le $((12*1024*1024)) ]; then jobs=3
|
||||
@ -35,7 +98,7 @@ decide_build_jobs() {
|
||||
echo "$jobs"
|
||||
}
|
||||
|
||||
# --- unified GPU probe (torch-based) ---
|
||||
# --- unified GPU probe (torch-based, for downstream flags) ---
|
||||
probe_and_prepare_gpu() {
|
||||
python - <<'PY' 2>/dev/null
|
||||
import os, sys
|
||||
@ -251,19 +314,14 @@ export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-}
|
||||
export PIP_USER=1
|
||||
export PIP_PREFER_BINARY=1
|
||||
|
||||
# --- single GPU probe + early exit ---
|
||||
# --- single GPU probe for downstream flags ---
|
||||
eval "$(probe_and_prepare_gpu)"
|
||||
if [ "${GPU_COUNT:-0}" -eq 0 ] || [ "${COMPAT_GE_75:-0}" -ne 1 ]; then
|
||||
log "No compatible NVIDIA GPU (compute capability >= 7.5) detected; shutting down."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Ensure package manager and Manager deps are available ---
|
||||
# Ensure python -m pip works (bootstrap if needed)
|
||||
# Ensure pip available
|
||||
python -m pip --version >/dev/null 2>&1 || python -m ensurepip --upgrade >/dev/null 2>&1 || true
|
||||
python -m pip --version >/dev/null 2>&1 || log "WARNING: pip still not available after ensurepip"
|
||||
|
||||
# Ensure ComfyUI-Manager minimal Python deps
|
||||
# Ensure ComfyUI-Manager minimal dep
|
||||
python - <<'PY'
|
||||
import sys
|
||||
try:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user