From 5ffea26de7a4fe046b0c95dcb85195e56f9677d6 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Fri, 22 May 2026 17:13:55 -0700
Subject: [PATCH] Fix single-GPU non-CUDA regressions on worksplit-multigpu

Two fixes for single-GPU users on non-NVIDIA backends; multi-GPU
non-CUDA support is intentionally out of scope here (tracked separately).

1. get_all_torch_devices: add AMD/ROCm, MLU, and a generic fallback arm.

   Previously the function only enumerated NVIDIA, Intel XPU, and Ascend
   NPU when cpu_state==GPU; on AMD/ROCm (which exposes its GPU through
   torch.cuda.*) and DirectML it fell through to an empty list. The
   biggest user-visible regression: unload_all_models() iterates this
   list, so it became a silent no-op on AMD/ROCm. /free, manager
   unloads, and shutdown stopped releasing VRAM.

   - is_amd() now shares the torch.cuda.* arm with is_nvidia(), since
     ROCm reuses the CUDA API surface.
   - is_mlu() gets its own arm using torch.mlu.device_count().
   - A final fallback appends get_torch_device() for any GPU backend
     the explicit arms miss (notably DirectML), so callers see at
     least the current device and unload_all_models works.

   MPS users are unaffected: cpu_state==MPS already routes to the
   else branch which appends get_torch_device() returning mps.

2. main.py DynamicVRAM init: guard the comfy_aimdo branch with an
   explicit is_nvidia() check.

   The outer condition allows entering the DynamicVRAM init block when
   the user passes --enable-dynamic-vram explicitly, bypassing the
   implicit is_nvidia() gate. On non-NVIDIA backends this then runs
   comfy_aimdo.control.init_devices(range(torch.cuda.device_count())),
   which is comfy-aimdo-only territory and may crash at startup. Add a
   leading is_nvidia() check that logs a clean warning and falls back
   to the legacy ModelPatcher path.
---
 comfy/model_management.py | 13 ++++++++++++-
 main.py                   |  9 ++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 051062a90..c146eee11 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -214,7 +214,10 @@ def get_all_torch_devices(exclude_current=False):
     global cpu_state
     devices = []
     if cpu_state == CPUState.GPU:
-        if is_nvidia():
+        # NVIDIA + AMD/ROCm both expose their GPUs through torch.cuda.*;
+        # without the AMD arm, single-GPU ROCm users get an empty list
+        # which silently turns unload_all_models() into a no-op.
+        if is_nvidia() or is_amd():
             for i in range(torch.cuda.device_count()):
                 devices.append(torch.device("cuda", i))
         elif is_intel_xpu():
@@ -223,6 +226,14 @@ def get_all_torch_devices(exclude_current=False):
         elif is_ascend_npu():
             for i in range(torch.npu.device_count()):
                 devices.append(torch.device("npu", i))
+        elif is_mlu():
+            for i in range(torch.mlu.device_count()):
+                devices.append(torch.device("mlu", i))
+        else:
+            # Fallback for unhandled GPU backends (e.g. DirectML): at least
+            # report the current device so callers like unload_all_models()
+            # do not silently no-op.
+            devices.append(get_torch_device())
     else:
         devices.append(get_torch_device())
     if exclude_current:
diff --git a/main.py b/main.py
index 9933d11ee..9b22d1304 100644
--- a/main.py
+++ b/main.py
@@ -216,7 +216,14 @@ import comfy.memory_management
 import comfy.model_patcher
 
 if args.enable_dynamic_vram or (enables_dynamic_vram() and comfy.model_management.is_nvidia() and not comfy.model_management.is_wsl()):
-    if (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
+    if not comfy.model_management.is_nvidia():
+        # The implicit auto-enable path is already gated by is_nvidia();
+        # this guard handles users who pass --enable-dynamic-vram explicitly
+        # on a non-NVIDIA system, where torch.cuda.device_count() below would
+        # either return 0 (silently disabling) or crash on backends that
+        # raise without CUDA. Be explicit and disable cleanly.
+        logging.warning("DynamicVRAM was requested but no NVIDIA GPU was detected. Falling back to legacy ModelPatcher.")
+    elif (not args.enable_dynamic_vram) and (comfy.model_management.torch_version_numeric < (2, 8)):
         logging.warning("Unsupported Pytorch detected. DynamicVRAM support requires Pytorch version 2.8 or later. Falling back to legacy ModelPatcher. VRAM estimates may be unreliable especially on Windows")
     elif comfy_aimdo.control.init_devices(range(torch.cuda.device_count())):
         if args.verbose == 'DEBUG':