From 362d1d845efe1408ebddc82a1849c882dd66a61c Mon Sep 17 00:00:00 2001 From: Rattus Date: Mon, 26 Jan 2026 01:06:17 +1000 Subject: [PATCH] fix syncs Fix these sync to conditionalize properly for CPU and always run in exception flows. --- comfy/model_management.py | 3 ++- execution.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 527197447..888cea5c3 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1120,7 +1120,8 @@ def get_cast_buffer(offload_stream, device, size, ref): def reset_cast_buffers(): global LARGEST_CASTED_WEIGHT LARGEST_CASTED_WEIGHT = (None, 0) - torch.cuda.synchronize() + for offload_stream in STREAM_CAST_BUFFERS: + offload_stream.synchronize() STREAM_CAST_BUFFERS.clear() torch.cuda.empty_cache() diff --git a/execution.py b/execution.py index a25bd36cd..30bf50b9d 100644 --- a/execution.py +++ b/execution.py @@ -523,8 +523,11 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed, #that we just want to cull out each model run. allocator = comfy.memory_management.aimdo_allocator with nullcontext() if allocator is None else torch.cuda.use_mem_pool(torch.cuda.MemPool(allocator.allocator())): - output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data) - torch.cuda.synchronize() + try: + output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data) + finally: + if allocator is not None: + torch.cuda.synchronize() if allocator is not None: #FIXME: this is probably a little zealous # Torch code comments says some stuff about not actually freeing tensors on mempool