diff --git a/comfy/ops.py b/comfy/ops.py
index 9710b2de2..107d59cd0 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -187,13 +187,11 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
 
     if pin is not None:
         xfer_dest = comfy.memory_management.interpret_gathered_like([ pin ], xfer_dest)[0]
-        #FIXME: This might be the wrong thing to do. Some reading suggests the DMA engine
-        #is posted writes and the compute stream could just fire and forget here. That
-        #would save this sync and some stalling on the offload stream that is better off
-        #running ahead to the next layer to read.
-        if offload_stream is not None:
-            offload_stream.wait_stream(comfy.model_management.current_stream(device))
-        comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=offload_stream, r=pin)
+        #FIXME: put this on nsight and see if its worth offloading to the pin with
+        #the offload stream. This adds extra sync requirements on xfer_dest in addition to:
+        #if offload_stream is not None:
+        #    offload_stream.wait_stream(comfy.model_management.current_stream(device))
+        comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=None, r=pin)
 
     #FIXME: weird offload return protocol
     return weight, bias, (offload_stream, device if signature is not None else None, None)