mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-24 07:57:29 +08:00
SelectXDevice: address code-review follow-ups
True reset semantics for "default": - On first selector application, cache the loader's original load_device / offload_device on the underlying model object (which is shared across patcher clones) and restore those base values when the user picks "default". Previously "default" meant "passthrough" so SelectXDevice(gpu:1) -> SelectXDevice(default) silently kept the gpu:1 routing. CPU + dynamic VRAM: - When SelectModelDevice / SelectCLIPDevice resolves to CPU on a ModelPatcherDynamic, also call clone(disable_dynamic=True) so the result is a plain ModelPatcher, matching ModelPatcherDynamic.__new__'s intent that CPU loads never run through the dynamic path. Fallback to the regular dynamic clone if disable_dynamic is unsupported on that patcher. MultiGPU collision pruning: - After SelectModelDevice retargets the primary patcher, drop any multigpu clone (from a prior MultiGPU CFG Split) whose load_device now matches the primary; otherwise two patchers would be bound to the same device. Logs the prune at info level. SelectVAEDevice: reject CPU at runtime: - The UI uses get_gpu_device_options_no_cpu(), but a workflow opened from another machine could still pass "cpu" through validate_inputs. Detect that case explicitly, log a "CPU is not a supported choice" passthrough message, and leave the VAE unchanged. Cosmetic: - Update VAE node docstring to accurately reflect the runtime CPU rejection rather than the older "intentionally not offered" claim. - Demote the fallback warnings inside resolve_gpu_device_option to no log at all; the Select*Device nodes now own a single context-rich info-level message per failed lookup, so there is no double logging. Amp-Thread-ID: https://ampcode.com/threads/T-019e52b4-31ee-72cd-996b-64ecd9420e13 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
9ee1540882
commit
b319c8088b
@ -268,8 +268,10 @@ def resolve_gpu_device_option(option: str):
|
||||
|
||||
Returns None for "default" (let the caller use its normal default).
|
||||
Returns torch.device("cpu") for "cpu".
|
||||
For "gpu:N", returns the Nth torch device. Falls back to None if
|
||||
the index is out of range (caller should use default).
|
||||
For "gpu:N", returns the Nth torch device. Returns None if the
|
||||
index is out of range, the option string is malformed, or
|
||||
unrecognized (callers are expected to log their own context-rich
|
||||
message before falling back to the default device).
|
||||
"""
|
||||
if option is None or option == "default":
|
||||
return None
|
||||
@ -278,16 +280,11 @@ def resolve_gpu_device_option(option: str):
|
||||
if option.startswith("gpu:"):
|
||||
try:
|
||||
idx = int(option[4:])
|
||||
devices = get_all_torch_devices()
|
||||
if 0 <= idx < len(devices):
|
||||
return devices[idx]
|
||||
else:
|
||||
logging.warning(f"Device '{option}' not available (only {len(devices)} GPU(s)), using default.")
|
||||
return None
|
||||
except (ValueError, IndexError):
|
||||
logging.warning(f"Invalid device option '{option}', using default.")
|
||||
except ValueError:
|
||||
return None
|
||||
logging.warning(f"Unrecognized device option '{option}', using default.")
|
||||
devices = get_all_torch_devices()
|
||||
if 0 <= idx < len(devices):
|
||||
return devices[idx]
|
||||
return None
|
||||
|
||||
@contextmanager
|
||||
|
||||
@ -46,15 +46,90 @@ class MultiGPUCFGSplitNode(io.ComfyNode):
|
||||
return io.NodeOutput(model)
|
||||
|
||||
|
||||
def _remember_base_devices(patcher: ModelPatcher):
|
||||
"""Stash the original load/offload device on the underlying model.
|
||||
|
||||
Stored on patcher.model (which is shared across patcher clones), so
|
||||
repeated selector applications can recover the loader's original
|
||||
routing when the user picks "default".
|
||||
"""
|
||||
if not hasattr(patcher.model, "_select_base_load_device"):
|
||||
patcher.model._select_base_load_device = patcher.load_device
|
||||
patcher.model._select_base_offload_device = patcher.offload_device
|
||||
|
||||
|
||||
def _apply_patcher_device(patcher: ModelPatcher, resolved, base_offload_override=None):
|
||||
"""Apply *resolved* to a freshly-cloned patcher; respect base devices on default.
|
||||
|
||||
Returns the (possibly newly-replaced) patcher. For CPU on a dynamic
|
||||
patcher, also tries to downgrade to a plain ModelPatcher so the
|
||||
dynamic-only code paths are bypassed (best-effort: silently keeps
|
||||
the dynamic patcher if downgrade is not supported).
|
||||
"""
|
||||
_remember_base_devices(patcher)
|
||||
base_load = patcher.model._select_base_load_device
|
||||
base_offload = base_offload_override if base_offload_override is not None else patcher.model._select_base_offload_device
|
||||
|
||||
if resolved is None:
|
||||
# "default" -> reset routing to whatever the loader produced
|
||||
patcher.load_device = base_load
|
||||
patcher.offload_device = base_offload
|
||||
elif resolved.type == "cpu":
|
||||
if patcher.is_dynamic():
|
||||
try:
|
||||
patcher = patcher.clone(disable_dynamic=True)
|
||||
except Exception:
|
||||
# Downgrade unavailable (no cached_patcher_init); fall
|
||||
# back to the existing dynamic patcher.
|
||||
pass
|
||||
patcher.load_device = resolved
|
||||
patcher.offload_device = resolved
|
||||
else:
|
||||
patcher.load_device = resolved
|
||||
patcher.offload_device = base_offload
|
||||
|
||||
if hasattr(patcher, "register_load_device"):
|
||||
patcher.register_load_device(patcher.load_device)
|
||||
return patcher
|
||||
|
||||
|
||||
def _prune_multigpu_collision(model: ModelPatcher, primary_device):
|
||||
"""Drop any multigpu clone whose load_device matches *primary_device*.
|
||||
|
||||
Without pruning, MultiGPU CFG Split would have stacked a clone on
|
||||
the same device the primary now occupies (i.e. the workflow places
|
||||
MultiGPU CFG Split before Select Model Device). Keeps the clone set
|
||||
consistent with the new primary placement.
|
||||
"""
|
||||
multigpu_models = model.get_additional_models_with_key("multigpu")
|
||||
if not multigpu_models:
|
||||
return
|
||||
filtered = [m for m in multigpu_models if m.load_device != primary_device]
|
||||
if len(filtered) != len(multigpu_models):
|
||||
logging.info(f"Select Model Device: pruning MultiGPU clone on {primary_device} that now collides with the primary model.")
|
||||
model.set_additional_models("multigpu", filtered)
|
||||
if hasattr(model, "match_multigpu_clones"):
|
||||
model.match_multigpu_clones()
|
||||
|
||||
|
||||
class SelectModelDeviceNode(io.ComfyNode):
|
||||
"""
|
||||
Place the diffusion model on a specific device (default / cpu / gpu:N).
|
||||
|
||||
- "default" restores the device assigned by the loader (even after a
|
||||
prior Select Model Device call).
|
||||
- "cpu" pins both the load and offload device to CPU.
|
||||
- "gpu:N" pins the load device to the Nth available GPU; the offload
|
||||
device is restored to the loader's original choice.
|
||||
|
||||
If the workflow already has MultiGPU CFG Split applied and the chosen
|
||||
GPU collides with one of the existing multigpu clones, that clone is
|
||||
dropped so two patchers don't end up bound to the same device.
|
||||
|
||||
When the selected device does not exist on the current machine
|
||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||
the node passes the model through unchanged and logs a message
|
||||
instead of failing. This keeps workflows portable across machines
|
||||
with different GPU counts.
|
||||
instead of failing.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@ -83,15 +158,12 @@ class SelectModelDeviceNode(io.ComfyNode):
|
||||
def execute(cls, model: ModelPatcher, device: str = "default") -> io.NodeOutput:
|
||||
model = model.clone()
|
||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||
if resolved is None:
|
||||
if device not in (None, "default"):
|
||||
logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
|
||||
if resolved is None and device not in (None, "default"):
|
||||
logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
|
||||
return io.NodeOutput(model)
|
||||
model.load_device = resolved
|
||||
if resolved.type == "cpu":
|
||||
model.offload_device = resolved
|
||||
if hasattr(model, "register_load_device"):
|
||||
model.register_load_device(resolved)
|
||||
model = _apply_patcher_device(model, resolved)
|
||||
if resolved is not None:
|
||||
_prune_multigpu_collision(model, model.load_device)
|
||||
return io.NodeOutput(model)
|
||||
|
||||
|
||||
@ -99,11 +171,14 @@ class SelectCLIPDeviceNode(io.ComfyNode):
|
||||
"""
|
||||
Place the CLIP text encoder on a specific device (default / cpu / gpu:N).
|
||||
|
||||
- "default" restores the device assigned by the loader.
|
||||
- "cpu" pins both the load and offload device to CPU.
|
||||
- "gpu:N" pins the load device to the Nth available GPU.
|
||||
|
||||
When the selected device does not exist on the current machine
|
||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||
the node passes the CLIP through unchanged and logs a message
|
||||
instead of failing. This keeps workflows portable across machines
|
||||
with different GPU counts.
|
||||
instead of failing.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@ -130,15 +205,10 @@ class SelectCLIPDeviceNode(io.ComfyNode):
|
||||
def execute(cls, clip: CLIP, device: str = "default") -> io.NodeOutput:
|
||||
clip = clip.clone()
|
||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||
if resolved is None:
|
||||
if device not in (None, "default"):
|
||||
logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
|
||||
if resolved is None and device not in (None, "default"):
|
||||
logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
|
||||
return io.NodeOutput(clip)
|
||||
clip.patcher.load_device = resolved
|
||||
if resolved.type == "cpu":
|
||||
clip.patcher.offload_device = resolved
|
||||
if hasattr(clip.patcher, "register_load_device"):
|
||||
clip.patcher.register_load_device(resolved)
|
||||
clip.patcher = _apply_patcher_device(clip.patcher, resolved)
|
||||
return io.NodeOutput(clip)
|
||||
|
||||
|
||||
@ -146,13 +216,18 @@ class SelectVAEDeviceNode(io.ComfyNode):
|
||||
"""
|
||||
Place the VAE on a specific device (default / gpu:N).
|
||||
|
||||
CPU is intentionally not offered as a choice; VAE on CPU is impractical.
|
||||
- "default" restores the device assigned by the loader.
|
||||
- "gpu:N" pins the load device to the Nth available GPU; the offload
|
||||
device is set to the standard VAE offload device.
|
||||
|
||||
CPU is intentionally not exposed in the UI for the VAE; if a workflow
|
||||
supplies "cpu" anyway (e.g. opened from another machine), the request
|
||||
is dropped with a log message and the VAE is passed through unchanged.
|
||||
|
||||
When the selected device does not exist on the current machine
|
||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||
the node passes the VAE through unchanged and logs a message
|
||||
instead of failing. This keeps workflows portable across machines
|
||||
with different GPU counts.
|
||||
instead of failing.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
@ -182,15 +257,20 @@ class SelectVAEDeviceNode(io.ComfyNode):
|
||||
vae = copy.copy(vae)
|
||||
vae.patcher = vae.patcher.clone()
|
||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||
if resolved is None:
|
||||
if device not in (None, "default"):
|
||||
logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
|
||||
if resolved is None and device not in (None, "default"):
|
||||
logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
|
||||
return io.NodeOutput(vae)
|
||||
vae.device = resolved
|
||||
vae.patcher.load_device = resolved
|
||||
vae.patcher.offload_device = comfy.model_management.vae_offload_device()
|
||||
if hasattr(vae.patcher, "register_load_device"):
|
||||
vae.patcher.register_load_device(resolved)
|
||||
if resolved is not None and resolved.type == "cpu":
|
||||
logging.info("Select VAE Device: CPU is not a supported choice, passing through unchanged.")
|
||||
return io.NodeOutput(vae)
|
||||
vae.patcher = _apply_patcher_device(
|
||||
vae.patcher, resolved,
|
||||
base_offload_override=comfy.model_management.vae_offload_device(),
|
||||
)
|
||||
# VAE caches the working device separately from its patcher.
|
||||
if not hasattr(vae, "_select_base_device"):
|
||||
vae._select_base_device = vae.device
|
||||
vae.device = vae._select_base_device if resolved is None else resolved
|
||||
return io.NodeOutput(vae)
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user