diff --git a/entrypoint.sh b/entrypoint.sh index a948997db..427a6d5cf 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -71,7 +71,7 @@ detect_gpu_generations() { # Store detection results globally export DETECTED_RTX20=$has_rtx20 - export DETECTED_RTX30=$has_rtx30 + export DETECTED_RTX30=$has_rtx30 export DETECTED_RTX40=$has_rtx40 export DETECTED_RTX50=$has_rtx50 export GPU_COUNT=$gpu_count @@ -125,7 +125,6 @@ install_triton_version() { ;; "rtx50_capable") log "Installing latest Triton for RTX 50 series" - # Try latest first, fallback to pre-release if needed python -m pip install --user --force-reinstall triton || \ python -m pip install --user --force-reinstall --pre triton || { log "WARNING: Failed to install latest Triton, using stable" @@ -153,13 +152,14 @@ build_sage_attention_mixed() { # Set CUDA architecture list based on detected GPUs local cuda_arch_list="" [ "$DETECTED_RTX20" = "true" ] && cuda_arch_list="${cuda_arch_list}7.5;" - [ "$DETECTED_RTX30" = "true" ] && cuda_arch_list="${cuda_arch_list}8.6;" + [ "$DETECTED_RTX30" = "true" ] && cuda_arch_list="${cuda_arch_list}8.6;" [ "$DETECTED_RTX40" = "true" ] && cuda_arch_list="${cuda_arch_list}8.9;" [ "$DETECTED_RTX50" = "true" ] && cuda_arch_list="${cuda_arch_list}12.0;" # Remove trailing semicolon cuda_arch_list=${cuda_arch_list%;} - + + # Export for PyTorch build export TORCH_CUDA_ARCH_LIST="$cuda_arch_list" log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" @@ -181,7 +181,7 @@ build_sage_attention_mixed() { *) log "Cloning latest Sage Attention for modern GPUs" if [ -d "SageAttention/.git" ]; then - cd SageAttention + cd SageAttention git fetch --depth 1 origin || return 1 git reset --hard origin/main || return 1 else @@ -213,7 +213,8 @@ needs_rebuild() { return 0 # Needs build fi - local built_strategy=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "unknown") + local built_strategy + built_strategy=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "unknown") if [ "$built_strategy" != "$SAGE_STRATEGY" ]; then log "GPU configuration changed (was: $built_strategy, now: $SAGE_STRATEGY) - rebuild needed" return 0 # Needs rebuild @@ -229,29 +230,28 @@ import sys try: import sageattention print('[TEST] Sage Attention import: SUCCESS') - # Try to get version info try: if hasattr(sageattention, '__version__'): print(f'[TEST] Version: {sageattention.__version__}') except: pass - sys.exit(0) except ImportError as e: print(f'[TEST] Sage Attention import: FAILED - {e}') sys.exit(1) except Exception as e: - print(f'[TEST] Sage Attention test: ERROR - {e}') + print(f'[TEST] Sage Attention test: ERROR - {e}') sys.exit(1) " 2>/dev/null } # Main GPU detection and Sage Attention setup setup_sage_attention() { - # Initialize Sage Attention availability flag - export SAGE_ATTENTION_AVAILABLE=0 - + # DO NOT set SAGE_ATTENTION_AVAILABLE here; respect any user-provided env choice + # Track build status separately for logging/visibility + export SAGE_ATTENTION_BUILT=0 + # Detect GPU generations if ! detect_gpu_generations; then log "No GPUs detected, skipping Sage Attention setup" @@ -261,35 +261,19 @@ setup_sage_attention() { # Determine optimal strategy determine_sage_strategy - # Check if rebuild is needed + # Build/install if needed if needs_rebuild || ! test_sage_attention; then log "Building Sage Attention..." - - # Install appropriate Triton version first - if install_triton_version; then - # Build Sage Attention - if build_sage_attention_mixed; then - # Test installation - if test_sage_attention; then - export SAGE_ATTENTION_AVAILABLE=1 - log "Sage Attention setup completed successfully" - log "SAGE_ATTENTION_AVAILABLE=1 (will use --use-sage-attention flag)" - else - log "WARNING: Sage Attention build succeeded but import test failed" - export SAGE_ATTENTION_AVAILABLE=0 - fi - else - log "ERROR: Sage Attention build failed" - export SAGE_ATTENTION_AVAILABLE=0 - fi + if install_triton_version && build_sage_attention_mixed && test_sage_attention; then + export SAGE_ATTENTION_BUILT=1 + log "Sage Attention is built and available; enable by setting SAGE_ATTENTION_AVAILABLE=1 or using --use-sage-attention explicitly" else - log "ERROR: Triton installation failed, skipping Sage Attention build" - export SAGE_ATTENTION_AVAILABLE=0 + export SAGE_ATTENTION_BUILT=0 + log "WARNING: Sage Attention is not available after build attempt" fi else - export SAGE_ATTENTION_AVAILABLE=1 - log "Sage Attention already built and working for current GPU configuration" - log "SAGE_ATTENTION_AVAILABLE=1 (will use --use-sage-attention flag)" + export SAGE_ATTENTION_BUILT=1 + log "Sage Attention already built and importable for current GPU configuration" fi } @@ -404,13 +388,21 @@ if [ "${COMFY_AUTO_INSTALL:-1}" = "1" ]; then pip check || true fi -# Build ComfyUI command with Sage Attention flag if available +# Build ComfyUI command with Sage Attention flag only if user explicitly enabled it via env COMFYUI_ARGS="" if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then - COMFYUI_ARGS="--use-sage-attention" - log "Starting ComfyUI with Sage Attention enabled" + if test_sage_attention; then + COMFYUI_ARGS="--use-sage-attention" + log "Starting ComfyUI with Sage Attention enabled by environment (SAGE_ATTENTION_AVAILABLE=1)" + else + log "WARNING: SAGE_ATTENTION_AVAILABLE=1 but Sage Attention import failed; starting without" + fi else - log "Starting ComfyUI without Sage Attention (not available or build failed)" + if [ "${SAGE_ATTENTION_BUILT:-0}" = "1" ]; then + log "Sage Attention is built and available; set SAGE_ATTENTION_AVAILABLE=1 to enable it" + else + log "Sage Attention not available; starting without it" + fi fi cd "$BASE_DIR" @@ -421,7 +413,7 @@ if [ $# -eq 0 ]; then exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS else # Arguments were passed, check if it's the default command - if [ "$1" = "python" ] && [ "$2" = "main.py" ]; then + if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then # Default python command, add our args shift 2 # Remove 'python main.py' exec python main.py $COMFYUI_ARGS "$@"