disable async pin population

2026-02-06 19:42:34 +08:00 · 2026-01-26 00:53:07 +10:00 · 2026-01-26 00:53:07 +10:00 · 878c5156d6
commit 878c5156d6
parent 12e1560dcc
1 changed files with 5 additions and 7 deletions
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -187,13 +187,11 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
    if pin is not None:
        xfer_dest = comfy.memory_management.interpret_gathered_like([ pin ], xfer_dest)[0]
-        #FIXME: This might be the wrong thing to do. Some reading suggests the DMA engine
+        #FIXME: put this on nsight and see if its worth offloading to the pin with
-        #is posted writes and the compute stream could just fire and forget here. That
+        #the offload stream. This adds extra sync requirements on xfer_dest in addition to:
-        #would save this sync and some stalling on the offload stream that is better off
+        #if offload_stream is not None:
-        #running ahead to the next layer to read.
+        #    offload_stream.wait_stream(comfy.model_management.current_stream(device))
-        if offload_stream is not None:
+        comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=None, r=pin)
            offload_stream.wait_stream(comfy.model_management.current_stream(device))
        comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=offload_stream, r=pin)
    #FIXME: weird offload return protocol
    return weight, bias, (offload_stream, device if signature is not None else None, None)