entrypoint: build SageAttention but don’t auto‑enable; honor SAGE_ATTENTION_AVAILABLE env

The entrypoint no longer exports SAGE_ATTENTION_AVAILABLE=1 on successful builds, preventing global attention patching from being forced; instead, it builds/tests SageAttention, sets SAGE_ATTENTION_BUILT=1 for visibility, and only appends --use-sage-attention when SAGE_ATTENTION_AVAILABLE=1 is supplied by the environment, preserving user control across docker run -e/compose env usage while keeping the feature available.
This commit is contained in:
clsferguson 2025-09-23 10:28:12 -06:00 committed by GitHub
parent 360a2c4ec7
commit 7af5a79577
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -125,7 +125,6 @@ install_triton_version() {
;; ;;
"rtx50_capable") "rtx50_capable")
log "Installing latest Triton for RTX 50 series" log "Installing latest Triton for RTX 50 series"
# Try latest first, fallback to pre-release if needed
python -m pip install --user --force-reinstall triton || \ python -m pip install --user --force-reinstall triton || \
python -m pip install --user --force-reinstall --pre triton || { python -m pip install --user --force-reinstall --pre triton || {
log "WARNING: Failed to install latest Triton, using stable" log "WARNING: Failed to install latest Triton, using stable"
@ -160,6 +159,7 @@ build_sage_attention_mixed() {
# Remove trailing semicolon # Remove trailing semicolon
cuda_arch_list=${cuda_arch_list%;} cuda_arch_list=${cuda_arch_list%;}
# Export for PyTorch build
export TORCH_CUDA_ARCH_LIST="$cuda_arch_list" export TORCH_CUDA_ARCH_LIST="$cuda_arch_list"
log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" log "Set TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
@ -213,7 +213,8 @@ needs_rebuild() {
return 0 # Needs build return 0 # Needs build
fi fi
local built_strategy=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "unknown") local built_strategy
built_strategy=$(cat "$SAGE_ATTENTION_BUILT_FLAG" 2>/dev/null || echo "unknown")
if [ "$built_strategy" != "$SAGE_STRATEGY" ]; then if [ "$built_strategy" != "$SAGE_STRATEGY" ]; then
log "GPU configuration changed (was: $built_strategy, now: $SAGE_STRATEGY) - rebuild needed" log "GPU configuration changed (was: $built_strategy, now: $SAGE_STRATEGY) - rebuild needed"
return 0 # Needs rebuild return 0 # Needs rebuild
@ -229,14 +230,12 @@ import sys
try: try:
import sageattention import sageattention
print('[TEST] Sage Attention import: SUCCESS') print('[TEST] Sage Attention import: SUCCESS')
# Try to get version info # Try to get version info
try: try:
if hasattr(sageattention, '__version__'): if hasattr(sageattention, '__version__'):
print(f'[TEST] Version: {sageattention.__version__}') print(f'[TEST] Version: {sageattention.__version__}')
except: except:
pass pass
sys.exit(0) sys.exit(0)
except ImportError as e: except ImportError as e:
print(f'[TEST] Sage Attention import: FAILED - {e}') print(f'[TEST] Sage Attention import: FAILED - {e}')
@ -249,8 +248,9 @@ except Exception as e:
# Main GPU detection and Sage Attention setup # Main GPU detection and Sage Attention setup
setup_sage_attention() { setup_sage_attention() {
# Initialize Sage Attention availability flag # DO NOT set SAGE_ATTENTION_AVAILABLE here; respect any user-provided env choice
export SAGE_ATTENTION_AVAILABLE=0 # Track build status separately for logging/visibility
export SAGE_ATTENTION_BUILT=0
# Detect GPU generations # Detect GPU generations
if ! detect_gpu_generations; then if ! detect_gpu_generations; then
@ -261,35 +261,19 @@ setup_sage_attention() {
# Determine optimal strategy # Determine optimal strategy
determine_sage_strategy determine_sage_strategy
# Check if rebuild is needed # Build/install if needed
if needs_rebuild || ! test_sage_attention; then if needs_rebuild || ! test_sage_attention; then
log "Building Sage Attention..." log "Building Sage Attention..."
if install_triton_version && build_sage_attention_mixed && test_sage_attention; then
# Install appropriate Triton version first export SAGE_ATTENTION_BUILT=1
if install_triton_version; then log "Sage Attention is built and available; enable by setting SAGE_ATTENTION_AVAILABLE=1 or using --use-sage-attention explicitly"
# Build Sage Attention
if build_sage_attention_mixed; then
# Test installation
if test_sage_attention; then
export SAGE_ATTENTION_AVAILABLE=1
log "Sage Attention setup completed successfully"
log "SAGE_ATTENTION_AVAILABLE=1 (will use --use-sage-attention flag)"
else
log "WARNING: Sage Attention build succeeded but import test failed"
export SAGE_ATTENTION_AVAILABLE=0
fi
else
log "ERROR: Sage Attention build failed"
export SAGE_ATTENTION_AVAILABLE=0
fi
else else
log "ERROR: Triton installation failed, skipping Sage Attention build" export SAGE_ATTENTION_BUILT=0
export SAGE_ATTENTION_AVAILABLE=0 log "WARNING: Sage Attention is not available after build attempt"
fi fi
else else
export SAGE_ATTENTION_AVAILABLE=1 export SAGE_ATTENTION_BUILT=1
log "Sage Attention already built and working for current GPU configuration" log "Sage Attention already built and importable for current GPU configuration"
log "SAGE_ATTENTION_AVAILABLE=1 (will use --use-sage-attention flag)"
fi fi
} }
@ -404,13 +388,21 @@ if [ "${COMFY_AUTO_INSTALL:-1}" = "1" ]; then
pip check || true pip check || true
fi fi
# Build ComfyUI command with Sage Attention flag if available # Build ComfyUI command with Sage Attention flag only if user explicitly enabled it via env
COMFYUI_ARGS="" COMFYUI_ARGS=""
if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then if [ "${SAGE_ATTENTION_AVAILABLE:-0}" = "1" ]; then
COMFYUI_ARGS="--use-sage-attention" if test_sage_attention; then
log "Starting ComfyUI with Sage Attention enabled" COMFYUI_ARGS="--use-sage-attention"
log "Starting ComfyUI with Sage Attention enabled by environment (SAGE_ATTENTION_AVAILABLE=1)"
else
log "WARNING: SAGE_ATTENTION_AVAILABLE=1 but Sage Attention import failed; starting without"
fi
else else
log "Starting ComfyUI without Sage Attention (not available or build failed)" if [ "${SAGE_ATTENTION_BUILT:-0}" = "1" ]; then
log "Sage Attention is built and available; set SAGE_ATTENTION_AVAILABLE=1 to enable it"
else
log "Sage Attention not available; starting without it"
fi
fi fi
cd "$BASE_DIR" cd "$BASE_DIR"
@ -421,7 +413,7 @@ if [ $# -eq 0 ]; then
exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS exec python main.py --listen 0.0.0.0 $COMFYUI_ARGS
else else
# Arguments were passed, check if it's the default command # Arguments were passed, check if it's the default command
if [ "$1" = "python" ] && [ "$2" = "main.py" ]; then if [ "$1" = "python" ] && [ "${2:-}" = "main.py" ]; then
# Default python command, add our args # Default python command, add our args
shift 2 # Remove 'python main.py' shift 2 # Remove 'python main.py'
exec python main.py $COMFYUI_ARGS "$@" exec python main.py $COMFYUI_ARGS "$@"