From e2d29926a903540dcff59ac7173dd32cf22151cd Mon Sep 17 00:00:00 2001
From: Rattus <rattus128@gmail.com>
Date: Tue, 13 Jan 2026 19:38:36 +1000
Subject: [PATCH] write better tx commentary

---
 comfy/ops.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index 825f9ae82..24d275216 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -170,8 +170,11 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
 
     if pin is not None:
         xfer_dest = comfy.memory_management.interpret_gathered_like([ pin ], xfer_dest)[0]
+        #FIXME: This might be the wrong thing to do. Some reading suggests the DMA engine
+        #is posted writes and the compute stream could just fire and forget here. That
+        #would save this sync and some stalling on the offload stream that is better off
+        #running ahead to the next layer to read.
         if offload_stream is not None:
-            #FIXME: if post cast didnt do anything this sync is un-needed
             offload_stream.wait_stream(comfy.model_management.current_stream(device))
         comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=offload_stream, r=pin)