From 878c5156d64e7ce9668807cdc0098dada49d5783 Mon Sep 17 00:00:00 2001 From: Rattus Date: Mon, 26 Jan 2026 00:53:07 +1000 Subject: [PATCH] disable async pin population --- comfy/ops.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/comfy/ops.py b/comfy/ops.py index 9710b2de2..107d59cd0 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -187,13 +187,11 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu if pin is not None: xfer_dest = comfy.memory_management.interpret_gathered_like([ pin ], xfer_dest)[0] - #FIXME: This might be the wrong thing to do. Some reading suggests the DMA engine - #is posted writes and the compute stream could just fire and forget here. That - #would save this sync and some stalling on the offload stream that is better off - #running ahead to the next layer to read. - if offload_stream is not None: - offload_stream.wait_stream(comfy.model_management.current_stream(device)) - comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=offload_stream, r=pin) + #FIXME: put this on nsight and see if its worth offloading to the pin with + #the offload stream. This adds extra sync requirements on xfer_dest in addition to: + #if offload_stream is not None: + # offload_stream.wait_stream(comfy.model_management.current_stream(device)) + comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=None, r=pin) #FIXME: weird offload return protocol return weight, bias, (offload_stream, device if signature is not None else None, None)