From 4ae6f77064a14115d9c477817b9b38991a35153a Mon Sep 17 00:00:00 2001 From: John Pollock Date: Wed, 4 Mar 2026 12:26:26 -0600 Subject: [PATCH] refactor: enhance isolation handling in notify_execution_graph and process_latent_in methods --- comfy/isolation/__init__.py | 31 ++++++++++------ .../isolation/model_patcher_proxy_registry.py | 35 ++++++++++++++----- comfy/model_management.py | 6 +++- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/comfy/isolation/__init__.py b/comfy/isolation/__init__.py index ed6d4265c..34ccb34dc 100644 --- a/comfy/isolation/__init__.py +++ b/comfy/isolation/__init__.py @@ -188,22 +188,33 @@ async def notify_execution_graph(needed_class_types: Set[str]) -> None: scan_shm_forensics("ISO:stop_extension", refresh_model_context=True) scan_shm_forensics("ISO:notify_graph_start", refresh_model_context=True) + isolated_class_types_in_graph = needed_class_types.intersection( + {spec.node_name for spec in _ISOLATED_NODE_SPECS} + ) + graph_uses_isolation = bool(isolated_class_types_in_graph) logger.debug( "%s ISO:notify_graph_start running=%d needed=%d", LOG_PREFIX, len(_RUNNING_EXTENSIONS), len(needed_class_types), ) - for ext_name, extension in list(_RUNNING_EXTENSIONS.items()): - ext_class_types = _get_class_types_for_extension(ext_name) + if graph_uses_isolation: + for ext_name, extension in list(_RUNNING_EXTENSIONS.items()): + ext_class_types = _get_class_types_for_extension(ext_name) - # If NONE of this extension's nodes are in the execution graph → evict - if not ext_class_types.intersection(needed_class_types): - await _stop_extension( - ext_name, - extension, - "isolated custom_node not in execution graph, evicting", - ) + # If NONE of this extension's nodes are in the execution graph -> evict. + if not ext_class_types.intersection(needed_class_types): + await _stop_extension( + ext_name, + extension, + "isolated custom_node not in execution graph, evicting", + ) + else: + logger.debug( + "%s ISO:notify_graph_skip_evict running=%d reason=no isolated nodes in graph", + LOG_PREFIX, + len(_RUNNING_EXTENSIONS), + ) # Isolated child processes add steady VRAM pressure; reclaim host-side models # at workflow boundaries so subsequent host nodes (e.g. CLIP encode) keep headroom. @@ -217,7 +228,7 @@ async def notify_execution_graph(needed_class_types: Set[str]) -> None: _WORKFLOW_BOUNDARY_MIN_FREE_VRAM_BYTES, ) free_before = model_management.get_free_memory(device) - if free_before < required and _RUNNING_EXTENSIONS: + if free_before < required and _RUNNING_EXTENSIONS and graph_uses_isolation: for ext_name, extension in list(_RUNNING_EXTENSIONS.items()): await _stop_extension( ext_name, diff --git a/comfy/isolation/model_patcher_proxy_registry.py b/comfy/isolation/model_patcher_proxy_registry.py index cb4558d2f..468bfb663 100644 --- a/comfy/isolation/model_patcher_proxy_registry.py +++ b/comfy/isolation/model_patcher_proxy_registry.py @@ -1082,15 +1082,32 @@ class ModelPatcherRegistry(BaseRegistry[Any]): async def process_latent_in( self, instance_id: str, args: tuple, kwargs: dict ) -> Any: - return self._run_operation_with_lease( - instance_id, - "process_latent_in", - lambda: detach_if_grad( - self._get_instance(instance_id).model.process_latent_in( - *args, **kwargs - ) - ), - ) + import torch + + def _invoke() -> Any: + instance = self._get_instance(instance_id) + result = detach_if_grad(instance.model.process_latent_in(*args, **kwargs)) + + # DynamicVRAM + isolation: returning CUDA tensors across RPC can stall + # at the transport boundary. Marshal dynamic-path results as CPU and let + # the proxy restore placement when needed. + is_dynamic_fn = getattr(instance, "is_dynamic", None) + if callable(is_dynamic_fn) and is_dynamic_fn(): + def _to_cpu(obj: Any) -> Any: + if torch.is_tensor(obj): + return obj.detach().cpu() if obj.device.type != "cpu" else obj + if isinstance(obj, dict): + return {k: _to_cpu(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_to_cpu(v) for v in obj] + if isinstance(obj, tuple): + return tuple(_to_cpu(v) for v in obj) + return obj + + return _to_cpu(result) + return result + + return self._run_operation_with_lease(instance_id, "process_latent_in", _invoke) async def process_latent_out( self, instance_id: str, args: tuple, kwargs: dict diff --git a/comfy/model_management.py b/comfy/model_management.py index a762b4134..f472f7fe9 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -831,7 +831,11 @@ def archive_model_dtypes(model): def cleanup_models(): to_delete = [] for i in range(len(current_loaded_models)): - if current_loaded_models[i].real_model() is None: + real_model_ref = current_loaded_models[i].real_model + if real_model_ref is None: + to_delete = [i] + to_delete + continue + if callable(real_model_ref) and real_model_ref() is None: to_delete = [i] + to_delete for i in to_delete: