pinned_memory: add python

Add a python for managing pinned memory of the weight/bias module level. This allocates, pins and attached a tensor to a module for the pin for this module. It does not set the weight, just allocates a singular ram buffer for population and bulk DMA transfer.
2026-01-23 04:40:15 +08:00 · 2026-01-13 15:16:41 +10:00 · 2026-01-13 15:16:41 +10:00 · 439c178c2c
commit 439c178c2c
parent f1e8ccae5c
2 changed files with 35 additions and 1 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1185,7 +1185,7 @@ if not args.disable_pinned_memory:
            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
-PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])
+PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
 def discard_cuda_async_error():
    try:
--- a/comfy/pinned_memory.py
+++ b/comfy/pinned_memory.py
@ -0,0 +1,34 @@
 import torch
 import logging
 import comfy.model_management
 import comfy.memory_management
 from comfy.cli_args import args
 def get_pin(module):
    return getattr(module, "_pin", None)
 def pin_memory(module):
    if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
        return
    #FIXME: This is a RAM cache trigger event
    params = [ module.weight, module.bias ]
    size = comfy.memory_management.vram_aligned_size(params)
    try:
        pin = torch.empty((size,), dtype=torch.uint8)
        if comfy.model_management.pin_memory(pin):
            module._pin = pin
        else:
            module.pin_failed = True
            return False
    except:
        module.pin_failed = True
        return False
    return True
 def unpin_memory(module):
    if get_pin(module) is None:
        return 0
    size = module._pin.numel() * module._pin.element_size()
    comfy.model_management.unpin_memory(module._pin)
    return size