mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-26 00:47:24 +08:00
SelectXDevice: address code-review follow-ups
True reset semantics for "default": - On first selector application, cache the loader's original load_device / offload_device on the underlying model object (which is shared across patcher clones) and restore those base values when the user picks "default". Previously "default" meant "passthrough" so SelectXDevice(gpu:1) -> SelectXDevice(default) silently kept the gpu:1 routing. CPU + dynamic VRAM: - When SelectModelDevice / SelectCLIPDevice resolves to CPU on a ModelPatcherDynamic, also call clone(disable_dynamic=True) so the result is a plain ModelPatcher, matching ModelPatcherDynamic.__new__'s intent that CPU loads never run through the dynamic path. Fallback to the regular dynamic clone if disable_dynamic is unsupported on that patcher. MultiGPU collision pruning: - After SelectModelDevice retargets the primary patcher, drop any multigpu clone (from a prior MultiGPU CFG Split) whose load_device now matches the primary; otherwise two patchers would be bound to the same device. Logs the prune at info level. SelectVAEDevice: reject CPU at runtime: - The UI uses get_gpu_device_options_no_cpu(), but a workflow opened from another machine could still pass "cpu" through validate_inputs. Detect that case explicitly, log a "CPU is not a supported choice" passthrough message, and leave the VAE unchanged. Cosmetic: - Update VAE node docstring to accurately reflect the runtime CPU rejection rather than the older "intentionally not offered" claim. - Demote the fallback warnings inside resolve_gpu_device_option to no log at all; the Select*Device nodes now own a single context-rich info-level message per failed lookup, so there is no double logging. Amp-Thread-ID: https://ampcode.com/threads/T-019e52b4-31ee-72cd-996b-64ecd9420e13 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
9ee1540882
commit
b319c8088b
@ -268,8 +268,10 @@ def resolve_gpu_device_option(option: str):
|
|||||||
|
|
||||||
Returns None for "default" (let the caller use its normal default).
|
Returns None for "default" (let the caller use its normal default).
|
||||||
Returns torch.device("cpu") for "cpu".
|
Returns torch.device("cpu") for "cpu".
|
||||||
For "gpu:N", returns the Nth torch device. Falls back to None if
|
For "gpu:N", returns the Nth torch device. Returns None if the
|
||||||
the index is out of range (caller should use default).
|
index is out of range, the option string is malformed, or
|
||||||
|
unrecognized (callers are expected to log their own context-rich
|
||||||
|
message before falling back to the default device).
|
||||||
"""
|
"""
|
||||||
if option is None or option == "default":
|
if option is None or option == "default":
|
||||||
return None
|
return None
|
||||||
@ -278,16 +280,11 @@ def resolve_gpu_device_option(option: str):
|
|||||||
if option.startswith("gpu:"):
|
if option.startswith("gpu:"):
|
||||||
try:
|
try:
|
||||||
idx = int(option[4:])
|
idx = int(option[4:])
|
||||||
devices = get_all_torch_devices()
|
except ValueError:
|
||||||
if 0 <= idx < len(devices):
|
|
||||||
return devices[idx]
|
|
||||||
else:
|
|
||||||
logging.warning(f"Device '{option}' not available (only {len(devices)} GPU(s)), using default.")
|
|
||||||
return None
|
|
||||||
except (ValueError, IndexError):
|
|
||||||
logging.warning(f"Invalid device option '{option}', using default.")
|
|
||||||
return None
|
return None
|
||||||
logging.warning(f"Unrecognized device option '{option}', using default.")
|
devices = get_all_torch_devices()
|
||||||
|
if 0 <= idx < len(devices):
|
||||||
|
return devices[idx]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
|
|||||||
@ -46,15 +46,90 @@ class MultiGPUCFGSplitNode(io.ComfyNode):
|
|||||||
return io.NodeOutput(model)
|
return io.NodeOutput(model)
|
||||||
|
|
||||||
|
|
||||||
|
def _remember_base_devices(patcher: ModelPatcher):
|
||||||
|
"""Stash the original load/offload device on the underlying model.
|
||||||
|
|
||||||
|
Stored on patcher.model (which is shared across patcher clones), so
|
||||||
|
repeated selector applications can recover the loader's original
|
||||||
|
routing when the user picks "default".
|
||||||
|
"""
|
||||||
|
if not hasattr(patcher.model, "_select_base_load_device"):
|
||||||
|
patcher.model._select_base_load_device = patcher.load_device
|
||||||
|
patcher.model._select_base_offload_device = patcher.offload_device
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_patcher_device(patcher: ModelPatcher, resolved, base_offload_override=None):
|
||||||
|
"""Apply *resolved* to a freshly-cloned patcher; respect base devices on default.
|
||||||
|
|
||||||
|
Returns the (possibly newly-replaced) patcher. For CPU on a dynamic
|
||||||
|
patcher, also tries to downgrade to a plain ModelPatcher so the
|
||||||
|
dynamic-only code paths are bypassed (best-effort: silently keeps
|
||||||
|
the dynamic patcher if downgrade is not supported).
|
||||||
|
"""
|
||||||
|
_remember_base_devices(patcher)
|
||||||
|
base_load = patcher.model._select_base_load_device
|
||||||
|
base_offload = base_offload_override if base_offload_override is not None else patcher.model._select_base_offload_device
|
||||||
|
|
||||||
|
if resolved is None:
|
||||||
|
# "default" -> reset routing to whatever the loader produced
|
||||||
|
patcher.load_device = base_load
|
||||||
|
patcher.offload_device = base_offload
|
||||||
|
elif resolved.type == "cpu":
|
||||||
|
if patcher.is_dynamic():
|
||||||
|
try:
|
||||||
|
patcher = patcher.clone(disable_dynamic=True)
|
||||||
|
except Exception:
|
||||||
|
# Downgrade unavailable (no cached_patcher_init); fall
|
||||||
|
# back to the existing dynamic patcher.
|
||||||
|
pass
|
||||||
|
patcher.load_device = resolved
|
||||||
|
patcher.offload_device = resolved
|
||||||
|
else:
|
||||||
|
patcher.load_device = resolved
|
||||||
|
patcher.offload_device = base_offload
|
||||||
|
|
||||||
|
if hasattr(patcher, "register_load_device"):
|
||||||
|
patcher.register_load_device(patcher.load_device)
|
||||||
|
return patcher
|
||||||
|
|
||||||
|
|
||||||
|
def _prune_multigpu_collision(model: ModelPatcher, primary_device):
|
||||||
|
"""Drop any multigpu clone whose load_device matches *primary_device*.
|
||||||
|
|
||||||
|
Without pruning, MultiGPU CFG Split would have stacked a clone on
|
||||||
|
the same device the primary now occupies (i.e. the workflow places
|
||||||
|
MultiGPU CFG Split before Select Model Device). Keeps the clone set
|
||||||
|
consistent with the new primary placement.
|
||||||
|
"""
|
||||||
|
multigpu_models = model.get_additional_models_with_key("multigpu")
|
||||||
|
if not multigpu_models:
|
||||||
|
return
|
||||||
|
filtered = [m for m in multigpu_models if m.load_device != primary_device]
|
||||||
|
if len(filtered) != len(multigpu_models):
|
||||||
|
logging.info(f"Select Model Device: pruning MultiGPU clone on {primary_device} that now collides with the primary model.")
|
||||||
|
model.set_additional_models("multigpu", filtered)
|
||||||
|
if hasattr(model, "match_multigpu_clones"):
|
||||||
|
model.match_multigpu_clones()
|
||||||
|
|
||||||
|
|
||||||
class SelectModelDeviceNode(io.ComfyNode):
|
class SelectModelDeviceNode(io.ComfyNode):
|
||||||
"""
|
"""
|
||||||
Place the diffusion model on a specific device (default / cpu / gpu:N).
|
Place the diffusion model on a specific device (default / cpu / gpu:N).
|
||||||
|
|
||||||
|
- "default" restores the device assigned by the loader (even after a
|
||||||
|
prior Select Model Device call).
|
||||||
|
- "cpu" pins both the load and offload device to CPU.
|
||||||
|
- "gpu:N" pins the load device to the Nth available GPU; the offload
|
||||||
|
device is restored to the loader's original choice.
|
||||||
|
|
||||||
|
If the workflow already has MultiGPU CFG Split applied and the chosen
|
||||||
|
GPU collides with one of the existing multigpu clones, that clone is
|
||||||
|
dropped so two patchers don't end up bound to the same device.
|
||||||
|
|
||||||
When the selected device does not exist on the current machine
|
When the selected device does not exist on the current machine
|
||||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||||
the node passes the model through unchanged and logs a message
|
the node passes the model through unchanged and logs a message
|
||||||
instead of failing. This keeps workflows portable across machines
|
instead of failing.
|
||||||
with different GPU counts.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -83,15 +158,12 @@ class SelectModelDeviceNode(io.ComfyNode):
|
|||||||
def execute(cls, model: ModelPatcher, device: str = "default") -> io.NodeOutput:
|
def execute(cls, model: ModelPatcher, device: str = "default") -> io.NodeOutput:
|
||||||
model = model.clone()
|
model = model.clone()
|
||||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||||
if resolved is None:
|
if resolved is None and device not in (None, "default"):
|
||||||
if device not in (None, "default"):
|
logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
|
||||||
logging.info(f"Select Model Device: requested device '{device}' not available, passing through unchanged.")
|
|
||||||
return io.NodeOutput(model)
|
return io.NodeOutput(model)
|
||||||
model.load_device = resolved
|
model = _apply_patcher_device(model, resolved)
|
||||||
if resolved.type == "cpu":
|
if resolved is not None:
|
||||||
model.offload_device = resolved
|
_prune_multigpu_collision(model, model.load_device)
|
||||||
if hasattr(model, "register_load_device"):
|
|
||||||
model.register_load_device(resolved)
|
|
||||||
return io.NodeOutput(model)
|
return io.NodeOutput(model)
|
||||||
|
|
||||||
|
|
||||||
@ -99,11 +171,14 @@ class SelectCLIPDeviceNode(io.ComfyNode):
|
|||||||
"""
|
"""
|
||||||
Place the CLIP text encoder on a specific device (default / cpu / gpu:N).
|
Place the CLIP text encoder on a specific device (default / cpu / gpu:N).
|
||||||
|
|
||||||
|
- "default" restores the device assigned by the loader.
|
||||||
|
- "cpu" pins both the load and offload device to CPU.
|
||||||
|
- "gpu:N" pins the load device to the Nth available GPU.
|
||||||
|
|
||||||
When the selected device does not exist on the current machine
|
When the selected device does not exist on the current machine
|
||||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||||
the node passes the CLIP through unchanged and logs a message
|
the node passes the CLIP through unchanged and logs a message
|
||||||
instead of failing. This keeps workflows portable across machines
|
instead of failing.
|
||||||
with different GPU counts.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -130,15 +205,10 @@ class SelectCLIPDeviceNode(io.ComfyNode):
|
|||||||
def execute(cls, clip: CLIP, device: str = "default") -> io.NodeOutput:
|
def execute(cls, clip: CLIP, device: str = "default") -> io.NodeOutput:
|
||||||
clip = clip.clone()
|
clip = clip.clone()
|
||||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||||
if resolved is None:
|
if resolved is None and device not in (None, "default"):
|
||||||
if device not in (None, "default"):
|
logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
|
||||||
logging.info(f"Select CLIP Device: requested device '{device}' not available, passing through unchanged.")
|
|
||||||
return io.NodeOutput(clip)
|
return io.NodeOutput(clip)
|
||||||
clip.patcher.load_device = resolved
|
clip.patcher = _apply_patcher_device(clip.patcher, resolved)
|
||||||
if resolved.type == "cpu":
|
|
||||||
clip.patcher.offload_device = resolved
|
|
||||||
if hasattr(clip.patcher, "register_load_device"):
|
|
||||||
clip.patcher.register_load_device(resolved)
|
|
||||||
return io.NodeOutput(clip)
|
return io.NodeOutput(clip)
|
||||||
|
|
||||||
|
|
||||||
@ -146,13 +216,18 @@ class SelectVAEDeviceNode(io.ComfyNode):
|
|||||||
"""
|
"""
|
||||||
Place the VAE on a specific device (default / gpu:N).
|
Place the VAE on a specific device (default / gpu:N).
|
||||||
|
|
||||||
CPU is intentionally not offered as a choice; VAE on CPU is impractical.
|
- "default" restores the device assigned by the loader.
|
||||||
|
- "gpu:N" pins the load device to the Nth available GPU; the offload
|
||||||
|
device is set to the standard VAE offload device.
|
||||||
|
|
||||||
|
CPU is intentionally not exposed in the UI for the VAE; if a workflow
|
||||||
|
supplies "cpu" anyway (e.g. opened from another machine), the request
|
||||||
|
is dropped with a log message and the VAE is passed through unchanged.
|
||||||
|
|
||||||
When the selected device does not exist on the current machine
|
When the selected device does not exist on the current machine
|
||||||
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
(e.g. a workflow built on a 2-GPU box opened on a 1-GPU box),
|
||||||
the node passes the VAE through unchanged and logs a message
|
the node passes the VAE through unchanged and logs a message
|
||||||
instead of failing. This keeps workflows portable across machines
|
instead of failing.
|
||||||
with different GPU counts.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -182,15 +257,20 @@ class SelectVAEDeviceNode(io.ComfyNode):
|
|||||||
vae = copy.copy(vae)
|
vae = copy.copy(vae)
|
||||||
vae.patcher = vae.patcher.clone()
|
vae.patcher = vae.patcher.clone()
|
||||||
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
resolved = comfy.model_management.resolve_gpu_device_option(device)
|
||||||
if resolved is None:
|
if resolved is None and device not in (None, "default"):
|
||||||
if device not in (None, "default"):
|
logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
|
||||||
logging.info(f"Select VAE Device: requested device '{device}' not available, passing through unchanged.")
|
|
||||||
return io.NodeOutput(vae)
|
return io.NodeOutput(vae)
|
||||||
vae.device = resolved
|
if resolved is not None and resolved.type == "cpu":
|
||||||
vae.patcher.load_device = resolved
|
logging.info("Select VAE Device: CPU is not a supported choice, passing through unchanged.")
|
||||||
vae.patcher.offload_device = comfy.model_management.vae_offload_device()
|
return io.NodeOutput(vae)
|
||||||
if hasattr(vae.patcher, "register_load_device"):
|
vae.patcher = _apply_patcher_device(
|
||||||
vae.patcher.register_load_device(resolved)
|
vae.patcher, resolved,
|
||||||
|
base_offload_override=comfy.model_management.vae_offload_device(),
|
||||||
|
)
|
||||||
|
# VAE caches the working device separately from its patcher.
|
||||||
|
if not hasattr(vae, "_select_base_device"):
|
||||||
|
vae._select_base_device = vae.device
|
||||||
|
vae.device = vae._select_base_device if resolved is None else resolved
|
||||||
return io.NodeOutput(vae)
|
return io.NodeOutput(vae)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user