pinned_memory: add python

Add a python for managing pinned memory of the weight/bias module level. This allocates, pins and attached a tensor to a module for the pin for this module. It does not set the weight, just allocates a singular ram buffer for population and bulk DMA transfer.
2026-02-06 03:22:33 +08:00 · 2026-01-13 15:16:41 +10:00 · 2026-01-13 15:16:41 +10:00 · 439c178c2c
commit 439c178c2c
parent f1e8ccae5c
2 changed files with 35 additions and 1 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1185,7 +1185,7 @@ if not args.disable_pinned_memory:
            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))

-PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])
+PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])

 def discard_cuda_async_error():
    try:
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@ -0,0 +1,34 @@
+import torch
+import logging
+import comfy.model_management
+import comfy.memory_management
+
+from comfy.cli_args import args
+
+def get_pin(module):
+    return getattr(module, "_pin", None)
+
+def pin_memory(module):
+    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
+        return
+    #FIXME: This is a RAM cache trigger event
+    params = [ module.weight, module.bias ]
+    size = comfy.memory_management.vram_aligned_size(params)
+    try:
+        pin = torch.empty((size,), dtype=torch.uint8)
+        if comfy.model_management.pin_memory(pin):
+            module._pin = pin
+        else:
+            module.pin_failed = True
+            return False
+    except:
+        module.pin_failed = True
+        return False
+    return True
+
+def unpin_memory(module):
+    if get_pin(module) is None:
+        return 0
+    size = module._pin.numel() * module._pin.element_size()
+    comfy.model_management.unpin_memory(module._pin)
+    return size