Go back to pre-pins

Post pins dont really work for low spec users and you are more likely to recycle your model with a different lora than to really care about that tiny little bit of perf of pre-computed Lora. Do it the old way.
2026-06-14 11:59:21 +08:00 · 2026-01-29 23:48:27 +10:00 · 2026-01-29 23:48:27 +10:00 · b1eb25b5c1
commit b1eb25b5c1
parent bc80f784d8
2 changed files with 10 additions and 18 deletions
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1494,8 +1494,6 @@ class ModelPatcherDynamic(ModelPatcher):
                def set_dirty(item, dirty):
                    if dirty or not hasattr(item, "_v_signature"):
                        item._v_signature = None
-                    if dirty:
-                        comfy.pinned_memory.unpin_memory(item)

                def setup_param(self, m, n, param_key):
                    nonlocal num_patches
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -119,6 +119,15 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
            xfer_dest = torch.empty((dest_size,), dtype=torch.uint8, device=device)
            offload_stream = None

+        if signature is None and pin is None:
+            comfy.pinned_memory.pin_memory(s)
+            pin = comfy.pinned_memory.get_pin(s)
+        else:
+            pin = None
+
+        if pin is not None:
+            comfy.model_management.cast_to_gathered(xfer_source, pin)
+            xfer_srouce = [ pin ]
        #send it over
        comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=offload_stream)
        comfy.model_management.sync_stream(device, offload_stream)
@ -130,13 +139,6 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
                    post_cast.copy_(pre_cast)
            xfer_dest = cast_dest

-    pin = None
-    if signature is None and not resident:
-        #prepare a new pin
-        assert comfy.pinned_memory.get_pin(s) is None
-        comfy.pinned_memory.pin_memory(s)
-        pin = comfy.pinned_memory.get_pin(s)
-
    params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest)
    weight = params[0]
    bias = params[1]
@ -174,21 +176,13 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
            x = f(x)
        return x

-    update_weight = signature is not None or pin is not None
+    update_weight = signature is not None

    weight = post_cast(s, "weight", weight, dtype, resident, update_weight)
    if s.bias is not None:
        bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight)
    s._v_signature=signature

-    if pin is not None:
-        xfer_dest = comfy.memory_management.interpret_gathered_like([ pin ], xfer_dest)[0]
-        #FIXME: put this on nsight and see if its worth offloading to the pin with
-        #the offload stream. This adds extra sync requirements on xfer_dest in addition to:
-        #if offload_stream is not None:
-        #    offload_stream.wait_stream(comfy.model_management.current_stream(device))
-        comfy.model_management.cast_to(xfer_dest, device=pin.device, non_blocking=non_blocking, stream=None, r=pin)
-
    #FIXME: weird offload return protocol
    return weight, bias, (offload_stream, device if signature is not None else None, None)