fix syncs

Fix these sync to conditionalize properly for CPU and always run in
exception flows.
This commit is contained in:
Rattus 2026-01-26 01:06:17 +10:00
parent 878c5156d6
commit 362d1d845e
2 changed files with 7 additions and 3 deletions

View File

@ -1120,7 +1120,8 @@ def get_cast_buffer(offload_stream, device, size, ref):
def reset_cast_buffers():
global LARGEST_CASTED_WEIGHT
LARGEST_CASTED_WEIGHT = (None, 0)
torch.cuda.synchronize()
for offload_stream in STREAM_CAST_BUFFERS:
offload_stream.synchronize()
STREAM_CAST_BUFFERS.clear()
torch.cuda.empty_cache()

View File

@ -523,8 +523,11 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,
#that we just want to cull out each model run.
allocator = comfy.memory_management.aimdo_allocator
with nullcontext() if allocator is None else torch.cuda.use_mem_pool(torch.cuda.MemPool(allocator.allocator())):
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
torch.cuda.synchronize()
try:
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
finally:
if allocator is not None:
torch.cuda.synchronize()
if allocator is not None:
#FIXME: this is probably a little zealous
# Torch code comments says some stuff about not actually freeing tensors on mempool