From ee927aafa8770a4bea8cfc2fcbedba5f86656097 Mon Sep 17 00:00:00 2001 From: Rattus Date: Sat, 9 May 2026 18:41:57 +1000 Subject: [PATCH] ops: sync the CPU with only the offload stream activity This was syncing with the offload stream which itself is synced with the compute stream, so this was syncing CPU with compute transitively. Define the event to sync it more gently. --- comfy/model_management.py | 5 ++++- comfy/ops.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 145a32080..c1d0901fc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream): pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0) STREAM_PIN_BUFFERS[offload_stream] = pin_buffer elif offload_stream is not None: - offload_stream.synchronize() + event = getattr(pin_buffer, "_comfy_event", None) + if event is not None: + event.synchronize() + delattr(pin_buffer, "_comfy_event") return pin_buffer def resize_pin_buffer(pin_buffer, size): diff --git a/comfy/ops.py b/comfy/ops.py index d425ea7eb..be744a030 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin pin = stream_pin_tensor[pin_offset:pin_offset + pin_size] cast_maybe_lowvram_patch(xfer_source, pin, None) comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream) + stream_pin_hostbuf._comfy_event = offload_stream.record_event() return offload_stream