diff --git a/entrypoint.sh b/entrypoint.sh index 00f41f625..a522e7965 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -12,6 +12,7 @@ SAGE_ATTENTION_DIR="$BASE_DIR/.sage_attention" SAGE_ATTENTION_BUILT_FLAG="$SAGE_ATTENTION_DIR/.built" PERMISSIONS_SET_FLAG="$BASE_DIR/.permissions_set" FIRST_RUN_FLAG="$BASE_DIR/.first_run_done" +GPU_CHECK_FLAG="$BASE_DIR/.gpu_check_passed" # --- logging --- log() { echo "[$(date '+%H:%M:%S')] $1"; } @@ -19,12 +20,74 @@ log() { echo "[$(date '+%H:%M:%S')] $1"; } # Make newly created files group-writable (helps in shared volumes) umask 0002 +# --- early GPU probe (single pass; exit if not OK) --- +early_gpu_probe() { + # Skip re-probe if cached OK + if [ -f "$GPU_CHECK_FLAG" ]; then + log "GPU check previously passed; skipping reprobe" + return 0 + fi + + # 1) Prefer nvidia-smi if present (fails when driver/device not exposed) + if command -v nvidia-smi >/dev/null 2>&1; then + if nvidia-smi -L >/dev/null 2>&1; then + # Optional: check compute capability via nvidia-smi if available + cc_ok=1 + if nvidia-smi --query-gpu=compute_cap --format=csv,noheader >/dev/null 2>&1; then + # compute_cap is like "8.9", "9.0" + while IFS= read -r cap; do + maj="${cap%%.*}"; min="${cap#*.}" + [ -z "$maj" ] && continue + val=$((10*${maj:-0}+${min:-0})) + if [ "$val" -lt 75 ]; then cc_ok=0; fi + done < <(nvidia-smi --query-gpu=compute_cap --format=csv,noheader || true) + fi + if [ "$cc_ok" -eq 1 ]; then + touch "$GPU_CHECK_FLAG" + log "GPU check passed via nvidia-smi" + return 0 + else + log "Incompatible GPU compute capability detected (< 7.5); exiting" + return 2 + fi + fi + fi + + # 2) Fallback: torch probe (does not need a GPU to import) + if python - <<'PY' +import sys +try: + import torch + sys.exit(0 if torch.cuda.is_available() else 3) +except Exception: + sys.exit(4) +PY + then + touch "$GPU_CHECK_FLAG" + log "GPU check passed via torch probe" + return 0 + else + rc=$? + if [ $rc -eq 3 ]; then + log "torch reports CUDA unavailable; exiting" + else + log "torch probe failed to import or run; exiting" + fi + return 5 + fi +} + +# Run early GPU probe before any user/permission work +if ! early_gpu_probe; then + log "No compatible NVIDIA GPU detected or driver not visible to container; exiting." + exit 42 +fi + # --- build parallelism (single knob) --- decide_build_jobs() { if [ -n "${SAGE_MAX_JOBS:-}" ]; then echo "$SAGE_MAX_JOBS"; return; fi - local mem_kb + local mem_kb cpu cap jobs mem_kb=$(awk '/MemTotal:/ {print $2}' /proc/meminfo 2>/dev/null || echo 0) - local cpu cap jobs cpu=$(nproc) cap=24 if [ "$mem_kb" -le $((8*1024*1024)) ]; then jobs=2 elif [ "$mem_kb" -le $((12*1024*1024)) ]; then jobs=3 @@ -35,7 +98,7 @@ decide_build_jobs() { echo "$jobs" } -# --- unified GPU probe (torch-based) --- +# --- unified GPU probe (torch-based, for downstream flags) --- probe_and_prepare_gpu() { python - <<'PY' 2>/dev/null import os, sys @@ -251,19 +314,14 @@ export PYTHONPATH="$HOME/.local/lib/python${pyver}/site-packages:${PYTHONPATH:-} export PIP_USER=1 export PIP_PREFER_BINARY=1 -# --- single GPU probe + early exit --- +# --- single GPU probe for downstream flags --- eval "$(probe_and_prepare_gpu)" -if [ "${GPU_COUNT:-0}" -eq 0 ] || [ "${COMPAT_GE_75:-0}" -ne 1 ]; then - log "No compatible NVIDIA GPU (compute capability >= 7.5) detected; shutting down." - exit 0 -fi -# --- Ensure package manager and Manager deps are available --- -# Ensure python -m pip works (bootstrap if needed) +# Ensure pip available python -m pip --version >/dev/null 2>&1 || python -m ensurepip --upgrade >/dev/null 2>&1 || true python -m pip --version >/dev/null 2>&1 || log "WARNING: pip still not available after ensurepip" -# Ensure ComfyUI-Manager minimal Python deps +# Ensure ComfyUI-Manager minimal dep python - <<'PY' import sys try: