ops: sync the CPU with only the offload stream activity

This was syncing with the offload stream which itself is synced with the
compute stream, so this was syncing CPU with compute transitively. Define
the event to sync it more gently.
This commit is contained in:
Rattus 2026-05-09 18:41:57 +10:00
parent 44c0a0602b
commit ee927aafa8
2 changed files with 5 additions and 1 deletions

View File

@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream):
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
elif offload_stream is not None:
offload_stream.synchronize()
event = getattr(pin_buffer, "_comfy_event", None)
if event is not None:
event.synchronize()
delattr(pin_buffer, "_comfy_event")
return pin_buffer
def resize_pin_buffer(pin_buffer, size):

View File

@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
cast_maybe_lowvram_patch(xfer_source, pin, None)
comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
stream_pin_hostbuf._comfy_event = offload_stream.record_event()
return offload_stream