From b319c8088b8f125332071d3fbfadcf5c26e830c3 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Fri, 22 May 2026 22:29:45 -0700
Subject: [PATCH] SelectXDevice: address code-review follow-ups

True reset semantics for "default":
- On first selector application, cache the loader's original
  load_device / offload_device on the underlying model object (which
  is shared across patcher clones) and restore those base values when
  the user picks "default". Previously "default" meant "passthrough"
  so SelectXDevice(gpu:1) -> SelectXDevice(default) silently kept the
  gpu:1 routing.

CPU + dynamic VRAM:
- When SelectModelDevice / SelectCLIPDevice resolves to CPU on a
  ModelPatcherDynamic, also call clone(disable_dynamic=True) so the
  result is a plain ModelPatcher, matching ModelPatcherDynamic.__new__'s
  intent that CPU loads never run through the dynamic path. Fallback to
  the regular dynamic clone if disable_dynamic is unsupported on that
  patcher.

MultiGPU collision pruning:
- After SelectModelDevice retargets the primary patcher, drop any
  multigpu clone (from a prior MultiGPU CFG Split) whose load_device
  now matches the primary; otherwise two patchers would be bound to
  the same device. Logs the prune at info level.

SelectVAEDevice: reject CPU at runtime:
- The UI uses get_gpu_device_options_no_cpu(), but a workflow opened
  from another machine could still pass "cpu" through validate_inputs.
  Detect that case explicitly, log a "CPU is not a supported choice"
  passthrough message, and leave the VAE unchanged.

Cosmetic:
- Update VAE node docstring to accurately reflect the runtime CPU
  rejection rather than the older "intentionally not offered" claim.
- Demote the fallback warnings inside resolve_gpu_device_option to no
  log at all; the Select*Device nodes now own a single context-rich
  info-level message per failed lookup, so there is no double logging.

Amp-Thread-ID: https://ampcode.com/threads/T-019e52b4-31ee-72cd-996b-64ecd9420e13
Co-authored-by: Amp <amp@ampcode.com>
---
 comfy/model_management.py      |  19 ++---
 comfy_extras/nodes_multigpu.py | 142 ++++++++++++++++++++++++++-------
 2 files changed, 119 insertions(+), 42 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index d744f1745..3bce128b2 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -268,8 +268,10 @@ def resolve_gpu_device_option(option: str):
 
     Returns None for "default" (let the caller use its normal default).
     Returns torch.device("cpu") for "cpu".
-    For "gpu:N", returns the Nth torch device. Falls back to None if
-    the index is out of range (caller should use default).
+    For "gpu:N", returns the Nth torch device. Returns None if the
+    index is out of range, the option string is malformed, or
+    unrecognized (callers are expected to log their own context-rich
+    message before falling back to the default device).
     """
     if option is None or option == "default":
         return None
@@ -278,16 +280,11 @@ def resolve_gpu_device_option(option: str):
     if option.startswith("gpu:"):
         try:
             idx = int(option[4:])
-            devices = get_all_torch_devices()
-            if 0 <= idx < len(devices):
-                return devices[idx]
-            else:
-                logging.warning(f"Device '{option}' not available (only {len(devices)} GPU(s)), using default.")
-                return None
-        except (ValueError, IndexError):
-            logging.warning(f"Invalid device option '{option}', using default.")
+        except ValueError:
             return None
-    logging.warning(f"Unrecognized device option '{option}', using default.")
+        devices = get_all_torch_devices()
+        if 0 <= idx < len(devices):
+            return devices[idx]
     return None
 
 @contextmanager
diff --git a/comfy_extras/nodes_multigpu.py b/comfy_extras/nodes_multigpu.py
index 1fb134eca..0e109f426 100644
--- a/comfy_extras/nodes_multigpu.py
+++ b/comfy_extras/nodes_multigpu.py
@@ -46,15 +46,90 @@ class MultiGPUCFGSplitNode(io.ComfyNode):
         return io.NodeOutput(model)
 
 
+def _remember_base_devices(patcher: ModelPatcher):
+    """Stash the original load/offload device on the underlying model.
+
+    Stored on patcher.model (which is shared across patcher clones), so
+    repeated selector applications can recover the loader's original
+    routing when the user picks "default".
+    """
+    if not hasattr(patcher.model, "_select_base_load_device"):
+        patcher.model._select_base_load_device = patcher.load_device
+        patcher.model._select_base_offload_device = patcher.offload_device
+
+
+def _apply_patcher_device(patcher: ModelPatcher, resolved, base_offload_override=None):
+    """Apply *resolved* to a freshly-cloned patcher; respect base devices on default.
+
+    Returns the (possibly newly-replaced) patcher. For CPU on a dynamic
+    patcher, also tries to downgrade to a plain ModelPatcher so the
+    dynamic-only code paths are bypassed (best-effort: silently keeps
+    the dynamic patcher if downgrade is not supported).
+    """
+    _remember_base_devices(patcher)
+    base_load = patcher.model._select_base_load_device
+    base_offload = base_offload_override if base_offload_override is not None else patcher.model._select_base_offload_device
+
+    if resolved is None:
+        # "default" -> reset routing to whatever the loader produced
+        patcher.load_device = base_load
+        patcher.offload_device = base_offload
+    elif resolved.type == "cpu":
+        if patcher.is_dynamic():
+            try:
+                patcher = patcher.clone(disable_dynamic=True)
+            except Exception:
+                # Downgrade unavailable (no cached_patcher_init); fall
+                # back to the existing dynamic patcher.
+                pass
+        patcher.load_device = resolved
+        patcher.offload_device = resolved
+    else:
+        patcher.load_device = resolved
+        patcher.offload_device = base_offload
+
+    if hasattr(patcher, "register_load_device"):
+        patcher.register_load_device(patcher.load_device)
+    return patcher
+
+
+def _prune_multigpu_collision(model: ModelPatcher, primary_device):
+    """Drop any multigpu clone whose load_device matches *primary_device*.
+
+    Without pruning, MultiGPU CFG Split would have stacked a clone on
+    the same device the primary now occupies (i.e. the workflow places
+    MultiGPU CFG Split before Select Model Device). Keeps the clone set
+    consistent with the new primary placement.
+    """
+    multigpu_models = model.get_additional_models_with_key("multigpu")
+    if not multigpu_models:
+        return
+    filtered = [m for m in multigpu_models if m.load_device != primary_device]
+    if len(filtered) != len(multigpu_models):
+        logging.info(f"Select Model Device: pruning MultiGPU clone on {primary_device} that now collides with the primary model.")
+        model.set_additional_models("multigpu", filtered)
+        if hasattr(model, "match_multigpu_clones"):
+            model.match_multigpu_clones()
+
+
 class SelectModelDeviceNode(io.ComfyNode):
     """
     Place the diffusion model on a specific device (default / cpu / gpu:N).
 
+    - "default" restores the device assigned by the loader (even after a
+      prior Select Model Device call).
+    - "cpu" pins both the load and offload device to CPU.
+    - "gpu:N" pins the load device to the Nth available GPU; the offload
+      device is restored to the loader's original choice.
+
+    If the workflow already has MultiGPU CFG Split applied and the chosen
+    GPU collides with one of the existing multigpu clones, that clone is
+    dropped so two patchers don't end up bound to the same device.
+
     When the selected device does not exist on the current machine
     (e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
     the node passes the model through unchanged and logs a message
-    instead of failing. This keeps workflows portable across machines
-    with different GPU counts.
+    instead of failing.
     """
 
     @classmethod
@@ -83,15 +158,12 @@ class SelectModelDeviceNode(io.ComfyNode):
     def execute(cls, model: ModelPatcher, device: str = "default") -> io.NodeOutput:
         model = model.clone()
         resolved = comfy.model_management.resolve_gpu_device_option(device)
-        if resolved is None:
-            if device not in (None, "default"):
-                logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
+        if resolved is None and device not in (None, "default"):
+            logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
             return io.NodeOutput(model)
-        model.load_device = resolved
-        if resolved.type == "cpu":
-            model.offload_device = resolved
-        if hasattr(model, "register_load_device"):
-            model.register_load_device(resolved)
+        model = _apply_patcher_device(model, resolved)
+        if resolved is not None:
+            _prune_multigpu_collision(model, model.load_device)
         return io.NodeOutput(model)
 
 
@@ -99,11 +171,14 @@ class SelectCLIPDeviceNode(io.ComfyNode):
     """
     Place the CLIP text encoder on a specific device (default / cpu / gpu:N).
 
+    - "default" restores the device assigned by the loader.
+    - "cpu" pins both the load and offload device to CPU.
+    - "gpu:N" pins the load device to the Nth available GPU.
+
     When the selected device does not exist on the current machine
     (e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
     the node passes the CLIP through unchanged and logs a message
-    instead of failing. This keeps workflows portable across machines
-    with different GPU counts.
+    instead of failing.
     """
 
     @classmethod
@@ -130,15 +205,10 @@ class SelectCLIPDeviceNode(io.ComfyNode):
     def execute(cls, clip: CLIP, device: str = "default") -> io.NodeOutput:
         clip = clip.clone()
         resolved = comfy.model_management.resolve_gpu_device_option(device)
-        if resolved is None:
-            if device not in (None, "default"):
-                logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
+        if resolved is None and device not in (None, "default"):
+            logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
             return io.NodeOutput(clip)
-        clip.patcher.load_device = resolved
-        if resolved.type == "cpu":
-            clip.patcher.offload_device = resolved
-        if hasattr(clip.patcher, "register_load_device"):
-            clip.patcher.register_load_device(resolved)
+        clip.patcher = _apply_patcher_device(clip.patcher, resolved)
         return io.NodeOutput(clip)
 
 
@@ -146,13 +216,18 @@ class SelectVAEDeviceNode(io.ComfyNode):
     """
     Place the VAE on a specific device (default / gpu:N).
 
-    CPU is intentionally not offered as a choice; VAE on CPU is impractical.
+    - "default" restores the device assigned by the loader.
+    - "gpu:N" pins the load device to the Nth available GPU; the offload
+      device is set to the standard VAE offload device.
+
+    CPU is intentionally not exposed in the UI for the VAE; if a workflow
+    supplies "cpu" anyway (e.g. opened from another machine), the request
+    is dropped with a log message and the VAE is passed through unchanged.
 
     When the selected device does not exist on the current machine
     (e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
     the node passes the VAE through unchanged and logs a message
-    instead of failing. This keeps workflows portable across machines
-    with different GPU counts.
+    instead of failing.
     """
 
     @classmethod
@@ -182,15 +257,20 @@ class SelectVAEDeviceNode(io.ComfyNode):
         vae = copy.copy(vae)
         vae.patcher = vae.patcher.clone()
         resolved = comfy.model_management.resolve_gpu_device_option(device)
-        if resolved is None:
-            if device not in (None, "default"):
-                logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
+        if resolved is None and device not in (None, "default"):
+            logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
             return io.NodeOutput(vae)
-        vae.device = resolved
-        vae.patcher.load_device = resolved
-        vae.patcher.offload_device = comfy.model_management.vae_offload_device()
-        if hasattr(vae.patcher, "register_load_device"):
-            vae.patcher.register_load_device(resolved)
+        if resolved is not None and resolved.type == "cpu":
+            logging.info("Select VAE Device: CPU is not a supported choice, passing through unchanged.")
+            return io.NodeOutput(vae)
+        vae.patcher = _apply_patcher_device(
+            vae.patcher, resolved,
+            base_offload_override=comfy.model_management.vae_offload_device(),
+        )
+        # VAE caches the working device separately from its patcher.
+        if not hasattr(vae, "_select_base_device"):
+            vae._select_base_device = vae.device
+        vae.device = vae._select_base_device if resolved is None else resolved
         return io.NodeOutput(vae)