diff --git a/comfy/model_management.py b/comfy/model_management.py index 145a32080..c1d0901fc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream): pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0) STREAM_PIN_BUFFERS[offload_stream] = pin_buffer elif offload_stream is not None: - offload_stream.synchronize() + event = getattr(pin_buffer, "_comfy_event", None) + if event is not None: + event.synchronize() + delattr(pin_buffer, "_comfy_event") return pin_buffer def resize_pin_buffer(pin_buffer, size): diff --git a/comfy/ops.py b/comfy/ops.py index d425ea7eb..be744a030 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin pin = stream_pin_tensor[pin_offset:pin_offset + pin_size] cast_maybe_lowvram_patch(xfer_source, pin, None) comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream) + stream_pin_hostbuf._comfy_event = offload_stream.record_event() return offload_stream