ops: sync the CPU with only the offload stream activity

This was syncing with the offload stream which itself is synced with the compute stream, so this was syncing CPU with compute transitively. Define the event to sync it more gently.
2026-05-15 03:27:24 +08:00 · 2026-05-09 18:41:57 +10:00 · 2026-05-09 18:41:57 +10:00 · ee927aafa8
commit ee927aafa8
parent 44c0a0602b
2 changed files with 5 additions and 1 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream):
        pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
        STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
    elif offload_stream is not None:
-        offload_stream.synchronize()
+        event = getattr(pin_buffer, "_comfy_event", None)
        if event is not None:
            event.synchronize()
            delattr(pin_buffer, "_comfy_event")
    return pin_buffer
 def resize_pin_buffer(pin_buffer, size):
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
            pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
            cast_maybe_lowvram_patch(xfer_source, pin, None)
            comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
        stream_pin_hostbuf._comfy_event = offload_stream.record_event()
    return offload_stream