mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-15 03:27:24 +08:00
ops: sync the CPU with only the offload stream activity
This was syncing with the offload stream which itself is synced with the compute stream, so this was syncing CPU with compute transitively. Define the event to sync it more gently.
This commit is contained in:
parent
44c0a0602b
commit
ee927aafa8
@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream):
|
|||||||
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
|
pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
|
||||||
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
|
STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
|
||||||
elif offload_stream is not None:
|
elif offload_stream is not None:
|
||||||
offload_stream.synchronize()
|
event = getattr(pin_buffer, "_comfy_event", None)
|
||||||
|
if event is not None:
|
||||||
|
event.synchronize()
|
||||||
|
delattr(pin_buffer, "_comfy_event")
|
||||||
return pin_buffer
|
return pin_buffer
|
||||||
|
|
||||||
def resize_pin_buffer(pin_buffer, size):
|
def resize_pin_buffer(pin_buffer, size):
|
||||||
|
|||||||
@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
|
|||||||
pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
|
pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
|
||||||
cast_maybe_lowvram_patch(xfer_source, pin, None)
|
cast_maybe_lowvram_patch(xfer_source, pin, None)
|
||||||
comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
|
comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
|
||||||
|
stream_pin_hostbuf._comfy_event = offload_stream.record_event()
|
||||||
|
|
||||||
return offload_stream
|
return offload_stream
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user