From ee927aafa8770a4bea8cfc2fcbedba5f86656097 Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Sat, 9 May 2026 18:41:57 +1000
Subject: [PATCH] ops: sync the CPU with only the offload stream activity

This was syncing with the offload stream which itself is synced with the
compute stream, so this was syncing CPU with compute transitively. Define
the event to sync it more gently.
---
 comfy/model_management.py | 5 ++++-
 comfy/ops.py              | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 145a32080..c1d0901fc 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1196,7 +1196,10 @@ def get_pin_buffer(offload_stream):
         pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0)
         STREAM_PIN_BUFFERS[offload_stream] = pin_buffer
     elif offload_stream is not None:
-        offload_stream.synchronize()
+        event = getattr(pin_buffer, "_comfy_event", None)
+        if event is not None:
+            event.synchronize()
+            delattr(pin_buffer, "_comfy_event")
     return pin_buffer
 
 def resize_pin_buffer(pin_buffer, size):
diff --git a/comfy/ops.py b/comfy/ops.py
index d425ea7eb..be744a030 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -241,6 +241,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin
             pin = stream_pin_tensor[pin_offset:pin_offset + pin_size]
             cast_maybe_lowvram_patch(xfer_source, pin, None)
             comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream)
+        stream_pin_hostbuf._comfy_event = offload_stream.record_event()
 
     return offload_stream