Merge pull request #14068 from Comfy-Org/fix/single-gpu-non-cuda

Fix single-GPU non-CUDA regressions on worksplit-multigpu (AMD/ROCm unload, DynamicVRAM crash)
This commit is contained in:
Jedrzej Kosinski 2026-05-22 17:30:57 -07:00 committed by GitHub
commit e6c65fa7ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 2 deletions

View File

@ -214,7 +214,10 @@ def get_all_torch_devices(exclude_current=False):
global cpu_state global cpu_state
devices = [] devices = []
if cpu_state == CPUState.GPU: if cpu_state == CPUState.GPU:
if is_nvidia(): # NVIDIA + AMD/ROCm both expose their GPUs through torch.cuda.*;
# without the AMD arm, single-GPU ROCm users get an empty list
# which silently turns unload_all_models() into a no-op.
if is_nvidia() or is_amd():
for i in range(torch.cuda.device_count()): for i in range(torch.cuda.device_count()):
devices.append(torch.device("cuda", i)) devices.append(torch.device("cuda", i))
elif is_intel_xpu(): elif is_intel_xpu():
@ -223,6 +226,14 @@ def get_all_torch_devices(exclude_current=False):
elif is_ascend_npu(): elif is_ascend_npu():
for i in range(torch.npu.device_count()): for i in range(torch.npu.device_count()):
devices.append(torch.device("npu", i)) devices.append(torch.device("npu", i))
elif is_mlu():
for i in range(torch.mlu.device_count()):
devices.append(torch.device("mlu", i))
else:
# Fallback for unhandled GPU backends (e.g. DirectML): at least
# report the current device so callers like unload_all_models()
# do not silently no-op.
devices.append(get_torch_device())
else: else:
devices.append(get_torch_device()) devices.append(get_torch_device())
if exclude_current: if exclude_current:

View File

@ -216,7 +216,14 @@ import comfy.memory_management
import comfy.model_patcher import comfy.model_patcher
if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()): if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)): if not comfy.model_management.is_nvidia():
# The implicit auto-enable path is already gated by is_nvidia();
# this guard handles users who pass --enable-dynamic-vram explicitly
# on a non-NVIDIA system, where torch.cuda.device_count() below would
# either return 0 (silently disabling) or crash on backends that
# raise without CUDA. Be explicit and disable cleanly.
logging.warning("DynamicVRAM was requested but no NVIDIA GPU was detected. Falling back to legacy ModelPatcher.")
elif (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows") logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
elif comfy_aimdo.control.init_devices(range(torch.cuda.device_count())): elif comfy_aimdo.control.init_devices(range(torch.cuda.device_count())):
if args.verbose == 'DEBUG': if args.verbose == 'DEBUG':