fix(bootstrap): valid git URLs, dynamic CUDA archs, +PTX fallback

Replace Markdown-style links in git clone with standard HTTPS URLs so the
repository actually clones under bash.
Derive TORCH_CUDA_ARCH_LIST from PyTorch devices and add +PTX to the
highest architecture for forward-compat extension builds.
Warn explicitly on Blackwell (sm_120) when the active torch/CUDA build
lacks support, prompting an upgrade to torch with CUDA 12.8+.
Keep pip --no-cache-dir, preserve Triton pin for Turing, and retain
idempotent ComfyUI-Manager update logic.
This commit is contained in:
clsferguson 2025-09-26 19:11:46 -06:00 committed by GitHub
parent 231082e2a6
commit 7ee4f37971
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -17,20 +17,33 @@ log() {
echo "[$(date '+%H:%M:%S')] $1" echo "[$(date '+%H:%M:%S')] $1"
} }
# Function to test PyTorch CUDA compatibility # Function to test PyTorch CUDA compatibility and warn for Blackwell (sm_120) if unsupported
test_pytorch_cuda() { test_pytorch_cuda() {
python -c " python - <<'PY' 2>/dev/null
import torch import sys, torch
import sys ok = torch.cuda.is_available()
if not torch.cuda.is_available(): if not ok:
print('[ERROR] PyTorch CUDA not available') print("[ERROR] PyTorch CUDA not available")
sys.exit(1) sys.exit(1)
device_count = torch.cuda.device_count() dc = torch.cuda.device_count()
print(f'[TEST] PyTorch CUDA available with {device_count} devices') print(f"[TEST] PyTorch CUDA available with {dc} devices")
for i in range(device_count): majors = set()
props = torch.cuda.get_device_properties(i) for i in range(dc):
print(f'[TEST] GPU {i}: {props.name} (Compute {props.major}.{props.minor})') p = torch.cuda.get_device_properties(i)
" 2>/dev/null majors.add(p.major)
print(f"[TEST] GPU {i}: {p.name} (Compute {p.major}.{p.minor})")
# Blackwell warning: require binaries that know sm_120
cuda_ver = getattr(torch.version, "cuda", None)
arch_list = []
try:
arch_list = list(getattr(torch.cuda, "get_arch_list")())
except Exception:
pass
if any(m >= 12 for m in majors):
has_120 = any("sm_120" in a or "compute_120" in a for a in arch_list)
if not has_120:
print("[WARN] Detected Blackwell (sm_120) GPU but current torch build does not expose sm_120; use torch 2.7+ with CUDA 12.8+ binaries or a source build", flush=True)
PY
} }
# Function to detect all GPUs and their generations (best-effort labels) # Function to detect all GPUs and their generations (best-effort labels)
@ -75,10 +88,9 @@ detect_gpu_generations() {
fi fi
} }
# Function to determine optimal Sage Attention strategy for mixed GPUs # Decide optimal Sage Attention strategy
determine_sage_strategy() { determine_sage_strategy() {
local strategy="" local strategy=""
if [ "${DETECTED_RTX20:-false}" = "true" ]; then if [ "${DETECTED_RTX20:-false}" = "true" ]; then
if [ "${DETECTED_RTX30:-false}" = "true" ] || [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX50:-false}" = "true" ]; then if [ "${DETECTED_RTX30:-false}" = "true" ] || [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX50:-false}" = "true" ]; then
strategy="mixed_with_rtx20" strategy="mixed_with_rtx20"
@ -87,7 +99,7 @@ determine_sage_strategy() {
strategy="rtx20_only" strategy="rtx20_only"
log "RTX 20 series only detected" log "RTX 20 series only detected"
fi fi
elif [ "${DETECTED_RTX50:-false}" = "true" ]; then elif [ "${DETECTED_RTX50:-false}" = "true" ] ; then
strategy="rtx50_capable" strategy="rtx50_capable"
log "RTX 50 series detected - using latest optimizations" log "RTX 50 series detected - using latest optimizations"
elif [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX30:-false}" = "true" ]; then elif [ "${DETECTED_RTX40:-false}" = "true" ] || [ "${DETECTED_RTX30:-false}" = "true" ]; then
@ -97,17 +109,16 @@ determine_sage_strategy() {
strategy="fallback" strategy="fallback"
log "Unknown or unsupported GPU configuration - using fallback" log "Unknown or unsupported GPU configuration - using fallback"
fi fi
export SAGE_STRATEGY=$strategy export SAGE_STRATEGY=$strategy
} }
# Function to install appropriate Triton version based on strategy # Install Triton appropriate to strategy
install_triton_version() { install_triton_version() {
case "$SAGE_STRATEGY" in case "$SAGE_STRATEGY" in
"mixed_with_rtx20"|"rtx20_only") "mixed_with_rtx20"|"rtx20_only")
log "Installing Triton 3.2.0 for RTX 20 series compatibility" log "Installing Triton 3.2.0 for RTX 20 series compatibility"
python -m pip install --no-cache-dir --user --force-reinstall "triton==3.2.0" || { python -m pip install --no-cache-dir --user --force-reinstall "triton==3.2.0" || {
log "WARNING: Failed to install specific Triton version, using default" log "WARNING: Failed to install triton==3.2.0; falling back to latest"
python -m pip install --no-cache-dir --user --force-reinstall triton || true python -m pip install --no-cache-dir --user --force-reinstall triton || true
} }
;; ;;
@ -115,7 +126,7 @@ install_triton_version() {
log "Installing latest Triton for RTX 50 series" log "Installing latest Triton for RTX 50 series"
python -m pip install --no-cache-dir --user --force-reinstall triton || \ python -m pip install --no-cache-dir --user --force-reinstall triton || \
python -m pip install --no-cache-dir --user --force-reinstall --pre triton || { python -m pip install --no-cache-dir --user --force-reinstall --pre triton || {
log "WARNING: Failed to install latest Triton, using stable >=3.2.0" log "WARNING: Failed to install latest Triton; trying >=3.2.0"
python -m pip install --no-cache-dir --user --force-reinstall "triton>=3.2.0" || true python -m pip install --no-cache-dir --user --force-reinstall "triton>=3.2.0" || true
} }
;; ;;
@ -129,20 +140,37 @@ install_triton_version() {
esac esac
} }
# Function to compute CUDA arch list from torch # Compute TORCH_CUDA_ARCH_LIST from runtime devices; append +PTX on the highest arch for forward-compat
compute_cuda_arch_list() { compute_cuda_arch_list() {
python - <<'PY' 2>/dev/null python - <<'PY' 2>/dev/null
import torch import sys, torch, re
archs = set() archs = set()
if torch.cuda.is_available(): if torch.cuda.is_available():
for i in range(torch.cuda.device_count()): for i in range(torch.cuda.device_count()):
p = torch.cuda.get_device_properties(i) p = torch.cuda.get_device_properties(i)
archs.add(f"{p.major}.{p.minor}") archs.add((p.major, p.minor))
print(";".join(sorted(archs))) # Fallback: parse compiled archs from torch binary if devices unavailable
if not archs:
try:
got = torch.cuda.get_arch_list()
for a in got:
m = re.match(r".*?(\d+)(\d+)$", a.replace("sm_", "").replace("compute_", ""))
if m:
archs.add((int(m.group(1)), int(m.group(2))))
except Exception:
pass
if not archs:
print("") # nothing
sys.exit(0)
archs = sorted(archs)
parts = [f"{M}.{m}" for (M,m) in archs]
# add +PTX to the highest arch for forward-compat builds of extensions
parts[-1] = parts[-1] + "+PTX"
print(";".join(parts))
PY PY
} }
# Function to build Sage Attention with architecture-specific optimizations # Build Sage Attention with architecture-specific optimizations
build_sage_attention_mixed() { build_sage_attention_mixed() {
log "Building Sage Attention for current GPU environment..." log "Building Sage Attention for current GPU environment..."
@ -168,7 +196,7 @@ build_sage_attention_mixed() {
git reset --hard origin/v1.0 || return 1 git reset --hard origin/v1.0 || return 1
else else
rm -rf SageAttention rm -rf SageAttention
git clone --depth 1 [https://github.com/thu-ml/SageAttention.git](https://github.com/thu-ml/SageAttention.git) -b v1.0 || return 1 git clone --depth 1 https://github.com/thu-ml/SageAttention.git -b v1.0 || return 1
cd SageAttention cd SageAttention
fi fi
;; ;;
@ -180,7 +208,7 @@ build_sage_attention_mixed() {
git reset --hard origin/main || return 1 git reset --hard origin/main || return 1
else else
rm -rf SageAttention rm -rf SageAttention
git clone --depth 1 [https://github.com/thu-ml/SageAttention.git](https://github.com/thu-ml/SageAttention.git) || return 1 git clone --depth 1 https://github.com/thu-ml/SageAttention.git || return 1
cd SageAttention cd SageAttention
fi fi
;; ;;
@ -199,7 +227,7 @@ build_sage_attention_mixed() {
fi fi
} }
# Function to check if current build matches detected GPUs # Check if current build matches detected GPUs
needs_rebuild() { needs_rebuild() {
if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then if [ ! -f "$SAGE_ATTENTION_BUILT_FLAG" ]; then
return 0 return 0
@ -213,50 +241,47 @@ needs_rebuild() {
return 1 return 1
} }
# Function to check if Sage Attention is working # Verify Sage Attention imports
test_sage_attention() { test_sage_attention() {
python -c " python - <<'PY' 2>/dev/null
import sys import sys
try: try:
import sageattention import sageattention
print('[TEST] Sage Attention import: SUCCESS') print("[TEST] Sage Attention import: SUCCESS")
try: try:
if hasattr(sageattention, '__version__'): v = getattr(sageattention, "__version__", None)
print(f'[TEST] Version: {sageattention.__version__}') if v:
except: print(f"[TEST] Version: {v}")
except Exception:
pass pass
sys.exit(0) sys.exit(0)
except ImportError as e: except ImportError as e:
print(f'[TEST] Sage Attention import: FAILED - {e}') print(f"[TEST] Sage Attention import: FAILED - {e}")
sys.exit(1) sys.exit(1)
except Exception as e: except Exception as e:
print(f'[TEST] Sage Attention test: ERROR - {e}') print(f"[TEST] Sage Attention test: ERROR - {e}")
sys.exit(1) sys.exit(1)
" 2>/dev/null PY
} }
# Main GPU detection and Sage Attention setup # Main GPU detection and Sage Attention setup
setup_sage_attention() { setup_sage_attention() {
# Internal tracking and exported availability flag
export SAGE_ATTENTION_BUILT=0 export SAGE_ATTENTION_BUILT=0
export SAGE_ATTENTION_AVAILABLE=0 export SAGE_ATTENTION_AVAILABLE=0
# Detect GPU generations
if ! detect_gpu_generations; then if ! detect_gpu_generations; then
log "No GPUs detected, skipping Sage Attention setup" log "No GPUs detected, skipping Sage Attention setup"
return 0 return 0
fi fi
# Determine optimal strategy
determine_sage_strategy determine_sage_strategy
# Build/install if needed
if needs_rebuild || ! test_sage_attention; then if needs_rebuild || ! test_sage_attention; then
log "Building Sage Attention..." log "Building Sage Attention..."
if install_triton_version && build_sage_attention_mixed && test_sage_attention; then if install_triton_version && build_sage_attention_mixed && test_sage_attention; then
export SAGE_ATTENTION_BUILT=1 export SAGE_ATTENTION_BUILT=1
export SAGE_ATTENTION_AVAILABLE=1 export SAGE_ATTENTION_AVAILABLE=1
log "Sage Attention is built and importable; it will be used only if FORCE_SAGE_ATTENTION=1 on boot" log "Sage Attention is built and importable; enable with FORCE_SAGE_ATTENTION=1"
else else
export SAGE_ATTENTION_BUILT=0 export SAGE_ATTENTION_BUILT=0
export SAGE_ATTENTION_AVAILABLE=0 export SAGE_ATTENTION_AVAILABLE=0
@ -289,6 +314,7 @@ if [ "$(id -u)" = "0" ]; then
[ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true [ -e "$d" ] && chown -R "${APP_USER}:${APP_GROUP}" "$d" || true
done done
# Make Python system install targets writable (under /usr/local only)
readarray -t PY_PATHS < <(python - <<'PY' readarray -t PY_PATHS < <(python - <<'PY'
import sys, sysconfig, os, datetime import sys, sysconfig, os, datetime
def log(msg): def log(msg):
@ -345,7 +371,7 @@ if [ -d "$CUSTOM_NODES_DIR/ComfyUI-Manager/.git" ]; then
git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" clean -fdx || true git -C "$CUSTOM_NODES_DIR/ComfyUI-Manager" clean -fdx || true
elif [ ! -d "$CUSTOM_NODES_DIR/ComfyUI-Manager" ]; then elif [ ! -d "$CUSTOM_NODES_DIR/ComfyUI-Manager" ]; then
log "Installing ComfyUI-Manager into $CUSTOM_NODES_DIR/ComfyUI-Manager" log "Installing ComfyUI-Manager into $CUSTOM_NODES_DIR/ComfyUI-Manager"
git clone --depth 1 [https://github.com/ltdrdata/ComfyUI-Manager.git](https://github.com/ltdrdata/ComfyUI-Manager.git) "$CUSTOM_NODES_DIR/ComfyUI-Manager" || true git clone --depth 1 https://github.com/ltdrdata/ComfyUI-Manager.git "$CUSTOM_NODES_DIR/ComfyUI-Manager" || true
fi fi
# User-site PATHs for --user installs (custom nodes) # User-site PATHs for --user installs (custom nodes)
@ -379,8 +405,6 @@ if [ "$RUN_NODE_INSTALL" = "1" ]; then
done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0) done < <(find "$CUSTOM_NODES_DIR" -maxdepth 2 -type f -iname 'pyproject.toml' -not -path '*/ComfyUI-Manager/*' -print0)
python -m pip check || true python -m pip check || true
# Mark first run complete
touch "$FIRST_RUN_FLAG" || true touch "$FIRST_RUN_FLAG" || true
fi fi