mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-14 00:00:57 +08:00
700 lines
28 KiB
Python
700 lines
28 KiB
Python
# ------------------- Hide ROCm/HIP -------------------
|
|
import sys
|
|
import os
|
|
|
|
os.environ.pop("ROCM_HOME", None)
|
|
os.environ.pop("HIP_HOME", None)
|
|
os.environ.pop("ROCM_VERSION", None)
|
|
|
|
#triton fix?
|
|
os.environ["FLASH_ATTENTION_TRITON_AMD_ENABLE"] = "TRUE"
|
|
os.environ["FLASH_ATTENTION_TRITON_AMD_AUTOTUNE"] = "TRUE"
|
|
os.environ["TRITON_DEBUG"] = "1" # Verbose logging
|
|
|
|
paths = os.environ["PATH"].split(";")
|
|
paths_no_rocm = [p for p in paths if "rocm" not in p.lower()]
|
|
os.environ["PATH"] = ";".join(paths_no_rocm)
|
|
# ------------------- End ROCm/HIP Hiding -------------
|
|
|
|
# Fix for cublasLt errors on newer ZLUDA (if no hipblaslt)
|
|
os.environ['DISABLE_ADDMM_CUDA_LT'] = '1'
|
|
|
|
# ------------------- main imports -------------------
|
|
# main imports
|
|
import torch
|
|
|
|
torch._dynamo.config.suppress_errors = True # Skip compilation errors
|
|
torch._dynamo.config.optimize_ddp = False # Disable distributed optimizations
|
|
|
|
import ctypes
|
|
import shutil
|
|
import subprocess
|
|
import importlib.metadata
|
|
from functools import wraps
|
|
from typing import Union, List
|
|
from enum import Enum
|
|
# ------------------- main imports -------------------
|
|
|
|
# ------------------- gfx detection -------------------
|
|
import os
|
|
import re
|
|
|
|
def detect_amd_gpu_architecture():
|
|
"""
|
|
Detect AMD GPU architecture on Windows and return the appropriate gfx code for TRITON_OVERRIDE_ARCH
|
|
"""
|
|
try:
|
|
# Method 1: Try Windows registry
|
|
try:
|
|
import winreg
|
|
key_path = r"SYSTEM\CurrentControlSet\Control\Class\{4d36e968-e325-11ce-bfc1-08002be10318}"
|
|
with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, key_path) as key:
|
|
i = 0
|
|
while True:
|
|
try:
|
|
subkey_name = winreg.EnumKey(key, i)
|
|
with winreg.OpenKey(key, subkey_name) as subkey:
|
|
try:
|
|
desc = winreg.QueryValueEx(subkey, "DriverDesc")[0]
|
|
if "AMD" in desc or "Radeon" in desc:
|
|
print(f" :: Detected GPU via Windows registry: {desc}")
|
|
return gpu_name_to_gfx(desc)
|
|
except FileNotFoundError:
|
|
pass
|
|
i += 1
|
|
except OSError:
|
|
break
|
|
except ImportError:
|
|
pass
|
|
|
|
# Method 2: Try WMIC command
|
|
try:
|
|
import subprocess
|
|
result = subprocess.run(['wmic', 'path', 'win32_VideoController', 'get', 'name'],
|
|
capture_output=True, text=True, timeout=10)
|
|
if result.returncode == 0:
|
|
for line in result.stdout.split('\n'):
|
|
line = line.strip()
|
|
if line and "AMD" in line or "Radeon" in line:
|
|
print(f" :: Detected GPU via WMIC: {line}")
|
|
return gpu_name_to_gfx(line)
|
|
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
pass
|
|
|
|
print(" :: Could not detect AMD GPU architecture automatically")
|
|
return None
|
|
|
|
except Exception as e:
|
|
print(f" :: GPU detection failed: {str(e)}")
|
|
return None
|
|
|
|
def gpu_name_to_gfx(gpu_name):
|
|
"""
|
|
Map GPU names to their corresponding gfx architecture codes
|
|
"""
|
|
gpu_name_lower = gpu_name.lower()
|
|
|
|
# RDNA3 (gfx11xx)
|
|
if any(x in gpu_name_lower for x in ['rx 7900', 'rx 7800', 'rx 7700', 'rx 7600', 'rx 7500']):
|
|
if 'rx 7900' in gpu_name_lower:
|
|
return 'gfx1100' # Navi 31
|
|
elif 'rx 7800' in gpu_name_lower or 'rx 7700' in gpu_name_lower:
|
|
return 'gfx1101' # Navi 32
|
|
elif 'rx 7600' in gpu_name_lower or 'rx 7500' in gpu_name_lower:
|
|
return 'gfx1102' # Navi 33
|
|
|
|
# RDNA2 (gfx10xx)
|
|
elif any(x in gpu_name_lower for x in ['rx 6950', 'rx 6900', 'rx 6800', 'rx 6750', 'rx 6700']):
|
|
return 'gfx1030' # Navi 21/22
|
|
elif any(x in gpu_name_lower for x in ['rx 6650', 'rx 6600', 'rx 6500', 'rx 6400']):
|
|
return 'gfx1032' # Navi 23/24
|
|
|
|
# RDNA1 (gfx10xx)
|
|
elif any(x in gpu_name_lower for x in ['rx 5700', 'rx 5600', 'rx 5500']):
|
|
return 'gfx1010' # Navi 10
|
|
|
|
# Vega (gfx9xx)
|
|
elif any(x in gpu_name_lower for x in ['vega 64', 'vega 56', 'vega 20', 'radeon vii']):
|
|
return 'gfx900' # Vega 10/20
|
|
elif 'vega 11' in gpu_name_lower or 'vega 8' in gpu_name_lower:
|
|
return 'gfx902' # Raven Ridge APU
|
|
|
|
# Polaris (gfx8xx)
|
|
elif any(x in gpu_name_lower for x in ['rx 580', 'rx 570', 'rx 480', 'rx 470']):
|
|
return 'gfx803' # Polaris 10/20
|
|
elif any(x in gpu_name_lower for x in ['rx 560', 'rx 550', 'rx 460']):
|
|
return 'gfx803' # Polaris 11/12
|
|
|
|
# Default fallback - try to extract numbers and make educated guess
|
|
if 'rx 9' in gpu_name_lower: # Future RDNA4?
|
|
return 'gfx1200' # Anticipated next gen
|
|
elif 'rx 8' in gpu_name_lower: # Future RDNA4?
|
|
return 'gfx1150' # Anticipated next gen
|
|
elif 'rx 7' in gpu_name_lower:
|
|
return 'gfx1100' # Default RDNA3
|
|
elif 'rx 6' in gpu_name_lower:
|
|
return 'gfx1030' # Default RDNA2
|
|
elif 'rx 5' in gpu_name_lower:
|
|
return 'gfx1010' # Default RDNA1
|
|
|
|
print(f" :: Unknown GPU model: {gpu_name}, using default gfx1030")
|
|
return 'gfx1030' # Safe default for most modern AMD GPUs
|
|
|
|
def set_triton_arch_override():
|
|
"""
|
|
Automatically detect and set TRITON_OVERRIDE_ARCH environment variable
|
|
"""
|
|
# Check if already set by user
|
|
if 'TRITON_OVERRIDE_ARCH' in os.environ:
|
|
print(f" :: TRITON_OVERRIDE_ARCH already set to: {os.environ['TRITON_OVERRIDE_ARCH']}")
|
|
return
|
|
|
|
print(" :: Auto-detecting AMD GPU architecture for Triton...")
|
|
gfx_arch = detect_amd_gpu_architecture()
|
|
|
|
if gfx_arch:
|
|
os.environ['TRITON_OVERRIDE_ARCH'] = gfx_arch
|
|
print(f" :: Set TRITON_OVERRIDE_ARCH={gfx_arch}")
|
|
else:
|
|
# Fallback to a common architecture
|
|
fallback_arch = 'gfx1030'
|
|
os.environ['TRITON_OVERRIDE_ARCH'] = fallback_arch
|
|
print(f" :: Using fallback TRITON_OVERRIDE_ARCH={fallback_arch}")
|
|
print(" :: If Triton fails, you may need to manually set TRITON_OVERRIDE_ARCH in your environment")
|
|
# ------------------- gfx detection -------------------
|
|
|
|
# ------------------- ComfyUI Package Version Check -------------------
|
|
def get_package_version(package_name):
|
|
try:
|
|
from importlib.metadata import version
|
|
return version(package_name)
|
|
except ImportError:
|
|
from importlib_metadata import version
|
|
return version(package_name)
|
|
|
|
def parse_requirements_file(requirements_path):
|
|
"""Parse requirements.txt file and extract package versions."""
|
|
requirements = {}
|
|
try:
|
|
with open(requirements_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith('#'):
|
|
# Handle different version specifiers
|
|
if '==' in line:
|
|
pkg, version = line.split('==', 1)
|
|
requirements[pkg] = version.strip()
|
|
elif '>=' in line:
|
|
pkg, version = line.split('>=', 1)
|
|
requirements[pkg] = version.strip()
|
|
elif '~=' in line: # Compatible release operator
|
|
pkg, version = line.split('~=', 1)
|
|
requirements[pkg] = version.strip()
|
|
# You can add more operators if needed (>, <, <=, !=)
|
|
except FileNotFoundError:
|
|
print(f" :: Warning: requirements.txt not found at {requirements_path}")
|
|
return requirements
|
|
|
|
def is_compatible_version(installed_version, required_version, operator='>='):
|
|
"""Check if installed version meets requirement based on operator."""
|
|
try:
|
|
from packaging import version
|
|
installed_v = version.parse(installed_version)
|
|
required_v = version.parse(required_version)
|
|
|
|
if operator == '>=':
|
|
return installed_v >= required_v
|
|
elif operator == '==':
|
|
return installed_v == required_v
|
|
elif operator == '~=':
|
|
# Compatible release: ~=2.0 means >=2.0, <3.0
|
|
# ~=2.1 means >=2.1, <2.2
|
|
required_parts = required_v.release
|
|
if len(required_parts) == 1:
|
|
# ~=2 means >=2.0, <3.0
|
|
return (installed_v >= required_v and
|
|
installed_v.release[0] == required_parts[0])
|
|
else:
|
|
# ~=2.1 means >=2.1, <2.2
|
|
return (installed_v >= required_v and
|
|
installed_v.release[:len(required_parts)-1] == required_parts[:-1] and
|
|
installed_v.release[len(required_parts)-1] >= required_parts[-1])
|
|
else:
|
|
# Default to >= for unknown operators
|
|
return installed_v >= required_v
|
|
except Exception as e:
|
|
print(f" :: Version comparison error for {installed_version} vs {required_version}: {e}")
|
|
return False
|
|
|
|
def uninstall_package(package_name):
|
|
"""Uninstall a package quietly"""
|
|
import subprocess
|
|
import sys
|
|
try:
|
|
args = [sys.executable, '-m', 'pip', 'uninstall', package_name, '-y', '--quiet']
|
|
subprocess.check_call(args)
|
|
return True
|
|
except subprocess.CalledProcessError:
|
|
return False
|
|
|
|
def check_pydantic_compatibility():
|
|
"""Check if current pydantic packages are compatible, return True if they need reinstalling"""
|
|
try:
|
|
# Try to import the problematic class that causes the error
|
|
from pydantic_settings import TomlConfigSettingsSource
|
|
# If we get here, the packages are compatible
|
|
return False
|
|
except ImportError:
|
|
# Import failed, packages are incompatible
|
|
return True
|
|
except Exception:
|
|
# Any other error, assume incompatible
|
|
return True
|
|
|
|
def handle_pydantic_packages(required_packages):
|
|
"""Special handling for pydantic packages to ensure compatibility"""
|
|
import subprocess
|
|
import sys
|
|
|
|
pydantic_packages = ['pydantic', 'pydantic-settings']
|
|
packages_in_requirements = [pkg for pkg in pydantic_packages if pkg in required_packages]
|
|
|
|
if not packages_in_requirements:
|
|
return # No pydantic packages to handle
|
|
|
|
# Check if both packages are available and what versions
|
|
pydantic_installed = None
|
|
pydantic_settings_installed = None
|
|
|
|
try:
|
|
pydantic_installed = get_package_version('pydantic')
|
|
except:
|
|
pass
|
|
|
|
try:
|
|
pydantic_settings_installed = get_package_version('pydantic-settings')
|
|
except:
|
|
pass
|
|
|
|
# If both are installed, check compatibility
|
|
if pydantic_installed and pydantic_settings_installed:
|
|
print(f"Found pydantic: {pydantic_installed}, pydantic-settings: {pydantic_settings_installed}")
|
|
|
|
# Check if they're compatible by testing the import
|
|
if not check_pydantic_compatibility():
|
|
print(" :: Pydantic packages are compatible, skipping reinstall")
|
|
return
|
|
else:
|
|
print(" :: Pydantic packages are incompatible, need to reinstall")
|
|
|
|
# If we get here, we need to install/reinstall pydantic packages
|
|
print(" :: Setting up pydantic packages for compatibility...")
|
|
|
|
# Uninstall existing versions to avoid conflicts
|
|
if pydantic_installed:
|
|
print(f" :: Uninstalling existing pydantic {pydantic_installed}")
|
|
uninstall_package('pydantic')
|
|
|
|
if pydantic_settings_installed:
|
|
print(f" :: Uninstalling existing pydantic-settings {pydantic_settings_installed}")
|
|
uninstall_package('pydantic-settings')
|
|
|
|
# Install both packages together
|
|
try:
|
|
print(" :: Installing compatible pydantic packages...")
|
|
combined_args = [sys.executable, '-m', 'pip', 'install',
|
|
'pydantic~=2.0',
|
|
'pydantic-settings~=2.0',
|
|
'--quiet',
|
|
'--disable-pip-version-check']
|
|
|
|
subprocess.check_call(combined_args)
|
|
|
|
# Verify installation
|
|
new_pydantic = get_package_version('pydantic')
|
|
new_pydantic_settings = get_package_version('pydantic-settings')
|
|
print(f" :: Successfully installed pydantic: {new_pydantic}, pydantic-settings: {new_pydantic_settings}")
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f" :: Failed to install pydantic packages: {e}")
|
|
|
|
def install_package(package_name, version_spec, upgrade=False):
|
|
import subprocess
|
|
import sys
|
|
|
|
# For ~= operator, install with the compatible release syntax
|
|
if '~=' in version_spec:
|
|
package_spec = f'{package_name}~={version_spec}'
|
|
else:
|
|
package_spec = f'{package_name}=={version_spec}'
|
|
|
|
args = [sys.executable, '-m', 'pip', 'install',
|
|
package_spec,
|
|
'--quiet',
|
|
'--disable-pip-version-check']
|
|
if upgrade:
|
|
args.append('--upgrade')
|
|
|
|
try:
|
|
subprocess.check_call(args)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f" :: Failed to install {package_name}: {e}")
|
|
# Try installing without version constraint as fallback
|
|
if upgrade and '~=' in package_spec:
|
|
try:
|
|
print(f" :: Retrying {package_name} installation without version constraint...")
|
|
fallback_args = [sys.executable, '-m', 'pip', 'install',
|
|
package_name,
|
|
'--upgrade',
|
|
'--quiet',
|
|
'--disable-pip-version-check']
|
|
subprocess.check_call(fallback_args)
|
|
print(f" :: {package_name} installed successfully without version constraint")
|
|
except subprocess.CalledProcessError as e2:
|
|
print(f" :: Fallback installation also failed: {e2}")
|
|
|
|
def ensure_package(package_name, required_version, operator='>='):
|
|
# Skip individual pydantic package handling - they're handled together
|
|
if package_name in ['pydantic', 'pydantic-settings']:
|
|
return
|
|
|
|
try:
|
|
installed_version = get_package_version(package_name)
|
|
print(f"Installed version of {package_name}: {installed_version}")
|
|
|
|
if not is_compatible_version(installed_version, required_version, operator):
|
|
install_package(package_name, required_version, upgrade=True)
|
|
print(f"\n{package_name} outdated. Upgraded to {required_version}.")
|
|
except Exception as e:
|
|
print(f" :: {package_name} not found or error checking version: {e}")
|
|
install_package(package_name, required_version)
|
|
print(f"\n{package_name} was missing. Installed it.")
|
|
|
|
# Determine operator from requirements.txt
|
|
def get_version_operator(requirements_path, package_name):
|
|
"""Extract the version operator used for a package in requirements.txt"""
|
|
try:
|
|
with open(requirements_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith('#') and line.startswith(package_name):
|
|
if '~=' in line:
|
|
return '~='
|
|
elif '==' in line:
|
|
return '=='
|
|
elif '>=' in line:
|
|
return '>='
|
|
except FileNotFoundError:
|
|
pass
|
|
return '>=' # Default
|
|
|
|
import os
|
|
requirements_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'requirements.txt')
|
|
required_packages = parse_requirements_file(requirements_path)
|
|
|
|
packages_to_monitor = [
|
|
"comfyui-frontend-package",
|
|
"comfyui-workflow-templates",
|
|
"av",
|
|
"comfyui-embedded-docs",
|
|
"pydantic",
|
|
"pydantic-settings",
|
|
]
|
|
|
|
print("\n :: Checking package versions...")
|
|
|
|
# Handle pydantic packages first with special logic
|
|
handle_pydantic_packages(required_packages)
|
|
|
|
# Handle other packages
|
|
for package_name in packages_to_monitor:
|
|
if package_name in required_packages and package_name not in ['pydantic', 'pydantic-settings']:
|
|
operator = get_version_operator(requirements_path, package_name)
|
|
ensure_package(package_name, required_packages[package_name], operator)
|
|
elif package_name not in ['pydantic', 'pydantic-settings']:
|
|
print(f" :: Warning: {package_name} not found in requirements.txt")
|
|
|
|
print(" :: Package version check complete.")
|
|
# ------------------- End Version Check -------------------
|
|
|
|
# ------------------- Triton Setup -------------------
|
|
print("\n :: ------------------------ ZLUDA ----------------------- :: ")
|
|
|
|
# identify device and set triton arch override
|
|
zluda_device_name = torch.cuda.get_device_name() if torch.cuda.is_available() else ""
|
|
is_zluda = zluda_device_name.endswith("[ZLUDA]")
|
|
if is_zluda:
|
|
set_triton_arch_override()
|
|
|
|
try:
|
|
import triton
|
|
import triton.language as tl
|
|
print(" :: Triton core imported successfully")
|
|
|
|
# This needs to be up here, so it can disable cudnn before anything can even think about using it
|
|
torch.backends.cudnn.enabled = os.environ.get("TORCH_BACKENDS_CUDNN_ENABLED", "1").strip().lower() not in {"0", "off", "false", "disable", "disabled", "no"}
|
|
if torch.backends.cudnn.enabled:
|
|
print(" :: Enabled cuDNN")
|
|
else:
|
|
print(" :: Disabled cuDNN")
|
|
|
|
@triton.jit
|
|
def _zluda_kernel_test(x_ptr, y_ptr, n_elements, BLOCK_SIZE: tl.constexpr):
|
|
pid = tl.program_id(axis=0)
|
|
offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
|
|
mask = offsets < n_elements
|
|
x = tl.load(x_ptr + offsets, mask=mask)
|
|
tl.store(y_ptr + offsets, x + 1, mask=mask)
|
|
|
|
def _verify_triton() -> bool:
|
|
try:
|
|
print(" :: Running Triton kernel test...")
|
|
x = torch.ones(64, device='cuda')
|
|
y = torch.empty_like(x)
|
|
_zluda_kernel_test[(1,)](x, y, x.numel(), BLOCK_SIZE=64)
|
|
if torch.allclose(y, x + 1):
|
|
print(" :: Triton kernel test passed successfully")
|
|
return True
|
|
print(" :: Triton kernel test failed (incorrect output)")
|
|
return False
|
|
except Exception as e:
|
|
print(f" :: Triton test failed: {str(e)}")
|
|
return False
|
|
|
|
triton_available = _verify_triton()
|
|
if triton_available:
|
|
print(" :: Triton initialized successfully")
|
|
os.environ['FLASH_ATTENTION_TRITON_AMD_AUTOTUNE'] = 'TRUE'
|
|
else:
|
|
print(" :: Triton available but failed verification")
|
|
|
|
except ImportError:
|
|
print(" :: Triton not installed")
|
|
triton_available = False
|
|
except Exception as e:
|
|
print(f" :: Triton initialization failed: {str(e)}")
|
|
triton_available = False
|
|
# ------------------- End Triton Verification -------------------
|
|
|
|
# # ------------------- ZLUDA Core Implementation -------------------
|
|
MEM_BUS_WIDTH = {
|
|
"AMD Radeon RX 9070 XT": 256,
|
|
"AMD Radeon RX 9070": 256,
|
|
"AMD Radeon RX 9070 GRE": 192,
|
|
"AMD Radeon RX 9060 XT": 128,
|
|
"AMD Radeon RX 9060": 128,
|
|
"AMD Radeon RX 7900 XTX": 384,
|
|
"AMD Radeon RX 7900 XT": 320,
|
|
"AMD Radeon RX 7900 GRE": 256,
|
|
"AMD Radeon RX 7800 XT": 256,
|
|
"AMD Radeon RX 7700 XT": 192,
|
|
"AMD Radeon RX 7700": 192,
|
|
"AMD Radeon RX 7650 GRE": 128,
|
|
"AMD Radeon RX 7600 XT": 128,
|
|
"AMD Radeon RX 7600": 128,
|
|
"AMD Radeon RX 7400": 128,
|
|
"AMD Radeon RX 6950 XT": 256,
|
|
"AMD Radeon RX 6900 XT": 256,
|
|
"AMD Radeon RX 6800 XT": 256,
|
|
"AMD Radeon RX 6800": 256,
|
|
"AMD Radeon RX 6750 XT": 192,
|
|
"AMD Radeon RX 6750 GRE 12GB": 192,
|
|
"AMD Radeon RX 6750 GRE 10GB": 160,
|
|
"AMD Radeon RX 6700 XT": 192,
|
|
"AMD Radeon RX 6700": 160,
|
|
"AMD Radeon RX 6650 XT": 128,
|
|
"AMD Radeon RX 6600 XT": 128,
|
|
"AMD Radeon RX 6600": 128,
|
|
"AMD Radeon RX 6500 XT": 64,
|
|
"AMD Radeon RX 6400": 64,
|
|
"AMD Radeon RX 5700 XT": 256,
|
|
"AMD Radeon RX 5700": 256,
|
|
"AMD Radeon RX 5600 XT": 192,
|
|
"AMD Radeon RX 5500 XT": 128,
|
|
"AMD Radeon RX 5500": 128,
|
|
"AMD Radeon RX 5300": 96,
|
|
# AMD Radeon Pro R9000/W7000/W6000/W5000 series, Apple exclusive WX series not listed
|
|
"AMD Radeon AI PRO R9700": 256,
|
|
"AMD Radeon PRO W7900": 384,
|
|
"AMD Radeon PRO W7800 48GB": 384,
|
|
"AMD Radeon PRO W7800": 256,
|
|
"AMD Radeon PRO W7700": 256,
|
|
"AMD Radeon PRO W7600": 128,
|
|
"AMD Radeon PRO W7500": 128,
|
|
"AMD Radeon PRO W7400": 128,
|
|
"AMD Radeon PRO W6800": 256,
|
|
"AMD Radeon PRO W6600": 128,
|
|
"AMD Radeon PRO W6400": 64,
|
|
"AMD Radeon PRO W5700": 256,
|
|
"AMD Radeon PRO W5500": 128,
|
|
}
|
|
|
|
# ------------------- Device Properties Implementation -------------------
|
|
class DeviceProperties:
|
|
PROPERTIES_OVERRIDE = {"regs_per_multiprocessor": 65535, "gcnArchName": "UNKNOWN ARCHITECTURE"}
|
|
internal: torch._C._CudaDeviceProperties
|
|
|
|
def __init__(self, props: torch._C._CudaDeviceProperties):
|
|
self.internal = props
|
|
|
|
def __getattr__(self, name):
|
|
if name in DeviceProperties.PROPERTIES_OVERRIDE:
|
|
return DeviceProperties.PROPERTIES_OVERRIDE[name]
|
|
return getattr(self.internal, name)
|
|
|
|
# # ------------------- Audio Ops Patch -------------------
|
|
# if is_zluda:
|
|
# _torch_stft = torch.stft
|
|
# _torch_istft = torch.istft
|
|
|
|
# def z_stft(input: torch.Tensor, window: torch.Tensor, *args, **kwargs):
|
|
# return _torch_stft(input=input.cpu(), window=window.cpu(), *args, **kwargs).to(input.device)
|
|
|
|
# def z_istft(input: torch.Tensor, window: torch.Tensor, *args, **kwargs):
|
|
# return _torch_istft(input=input.cpu(), window=window.cpu(), *args, **kwargs).to(input.device)
|
|
|
|
# def z_jit(f, *_, **__):
|
|
# f.graph = torch._C.Graph()
|
|
# return f
|
|
|
|
# torch._dynamo.config.suppress_errors = True
|
|
# torch.stft = z_stft
|
|
# torch.istft = z_istft
|
|
# torch.jit.script = z_jit
|
|
# # ------------------- End Audio Patch -------------------
|
|
|
|
# ------------------- Top-K Fallback Patch -------------------
|
|
if is_zluda:
|
|
_topk = torch.topk
|
|
|
|
def safe_topk(input: torch.Tensor, *args, **kwargs):
|
|
device = input.device
|
|
values, indices = _topk(input.cpu(), *args, **kwargs)
|
|
return torch.return_types.topk((values.to(device), indices.to(device),))
|
|
|
|
torch.topk = safe_topk
|
|
# ------------------- End Top-K Patch -------------------
|
|
|
|
# ------------------- ONNX Runtime Patch -------------------
|
|
try:
|
|
import onnxruntime as ort
|
|
|
|
if is_zluda:
|
|
print(" :: Patching ONNX Runtime for ZLUDA — disabling CUDA EP.")
|
|
|
|
# Store original get_available_providers
|
|
original_get_available_providers = ort.get_available_providers
|
|
|
|
def filtered_providers():
|
|
return [ep for ep in original_get_available_providers() if ep != "CUDAExecutionProvider"]
|
|
|
|
# Patch ONLY the _pybind_state version (used during session creation)
|
|
ort.capi._pybind_state.get_available_providers = filtered_providers
|
|
|
|
# Wrap InferenceSession to force CPU provider when CUDA is explicitly requested
|
|
OriginalSession = ort.InferenceSession
|
|
|
|
class SafeInferenceSession(OriginalSession):
|
|
def __init__(self, *args, providers=None, **kwargs):
|
|
if providers and "CUDAExecutionProvider" in providers:
|
|
print(" :: Forcing ONNX to use CPUExecutionProvider instead of CUDA.")
|
|
providers = ["CPUExecutionProvider"]
|
|
super().__init__(*args, providers=providers, **kwargs)
|
|
|
|
ort.InferenceSession = SafeInferenceSession
|
|
except ImportError:
|
|
print(" :: ONNX Runtime not installed — skipping patch.")
|
|
except Exception as e:
|
|
print(" :: Failed to patch ONNX Runtime:", e)
|
|
# ------------------- End ONNX Patch -------------------
|
|
|
|
# ------------------- ZLUDA hijack ---------------------
|
|
do_nothing = lambda _: None
|
|
def do_hijack():
|
|
if not is_zluda:
|
|
return
|
|
print(f" :: Using ZLUDA with device: {zluda_device_name}")
|
|
print(" :: Applying core ZLUDA patches...")
|
|
|
|
# 2. Triton optimizations
|
|
if triton_available:
|
|
print(" :: Initializing Triton optimizations")
|
|
try:
|
|
# General Triton config
|
|
print(" :: Configuring Triton device properties...")
|
|
_get_props = triton.runtime.driver.active.utils.get_device_properties
|
|
def patched_props(device):
|
|
props = _get_props(device)
|
|
name = torch.cuda.get_device_name()[:-8] # Remove [ZLUDA]
|
|
props["mem_bus_width"] = MEM_BUS_WIDTH.get(name, 128)
|
|
if name not in MEM_BUS_WIDTH:
|
|
print(f' :: Using default mem_bus_width=128 for {name}')
|
|
return props
|
|
triton.runtime.driver.active.utils.get_device_properties = patched_props
|
|
print(" :: Triton device properties configured")
|
|
|
|
# Flash Attention
|
|
flash_enabled = False
|
|
try:
|
|
from comfy.flash_attn_triton_amd import interface_fa
|
|
print(" :: Flash attention components found")
|
|
|
|
original_sdpa = torch.nn.functional.scaled_dot_product_attention
|
|
|
|
def amd_flash_wrapper(query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False, scale=None):
|
|
try:
|
|
if (query.shape[-1] <= 128 and
|
|
attn_mask is None and # fix flash-attention error : "Flash attention error: Boolean value of Tensor with more than one value is ambiguous"
|
|
query.dtype != torch.float32):
|
|
if scale is None:
|
|
scale = query.shape[-1] ** -0.5
|
|
return interface_fa.fwd(
|
|
query.transpose(1, 2),
|
|
key.transpose(1, 2),
|
|
value.transpose(1, 2),
|
|
None, None, dropout_p, scale,
|
|
is_causal, -1, -1, 0.0, False, None
|
|
)[0].transpose(1, 2)
|
|
except Exception as e:
|
|
print(f' :: Flash attention error: {str(e)}')
|
|
return original_sdpa(query=query, key=key, value=value, attn_mask=attn_mask, dropout_p=dropout_p, is_causal=is_causal, scale=scale)
|
|
|
|
torch.nn.functional.scaled_dot_product_attention = amd_flash_wrapper
|
|
flash_enabled = True
|
|
print(" :: AMD flash attention enabled successfully")
|
|
|
|
except ImportError:
|
|
print(" :: Flash attention components not installed")
|
|
except Exception as e:
|
|
print(f" :: Flash attention setup failed: {str(e)}")
|
|
|
|
# Other Triton optimizations
|
|
if not flash_enabled:
|
|
print(" :: Applying basic Triton optimizations")
|
|
# Add other Triton optimizations here
|
|
# ...
|
|
|
|
except Exception as e:
|
|
print(f" :: Triton optimization failed: {str(e)}")
|
|
else:
|
|
print(" :: Triton optimizations skipped (not available)")
|
|
|
|
# 3. Common configurations
|
|
print(" :: Configuring PyTorch backends...")
|
|
torch.backends.cuda.enable_mem_efficient_sdp(False)
|
|
torch.backends.cuda.enable_mem_efficient_sdp = do_nothing
|
|
if hasattr(torch.backends.cuda, "enable_flash_sdp"):
|
|
torch.backends.cuda.enable_flash_sdp(True)
|
|
print(" :: Disabled CUDA flash attention")
|
|
if hasattr(torch.backends.cuda, "enable_math_sdp"):
|
|
torch.backends.cuda.enable_math_sdp(True)
|
|
print(" :: Enabled math attention fallback")
|
|
|
|
print(" :: ZLUDA initialization complete")
|
|
print(" :: ------------------------ ZLUDA ----------------------- :: \n")
|
|
|
|
if is_zluda:
|
|
do_hijack()
|
|
else:
|
|
print(f" :: CUDA device detected: {zluda_device_name or 'None'}")
|