pinned_memory: add python

Add a python for managing pinned memory of the weight/bias module level.
This allocates, pins and attached a tensor to a module for the pin for this
module. It does not set the weight, just allocates a singular ram buffer
for population and bulk DMA transfer.
This commit is contained in:
Rattus 2026-01-13 15:16:41 +10:00
parent f1e8ccae5c
commit 439c178c2c
2 changed files with 35 additions and 1 deletions

View File

@ -1185,7 +1185,7 @@ if not args.disable_pinned_memory:
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
def discard_cuda_async_error():
try:

34
comfy/pinned_memory.py Normal file
View File

@ -0,0 +1,34 @@
import torch
import logging
import comfy.model_management
import comfy.memory_management
from comfy.cli_args import args
def get_pin(module):
return getattr(module, "_pin", None)
def pin_memory(module):
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
return
#FIXME: This is a RAM cache trigger event
params = [ module.weight, module.bias ]
size = comfy.memory_management.vram_aligned_size(params)
try:
pin = torch.empty((size,), dtype=torch.uint8)
if comfy.model_management.pin_memory(pin):
module._pin = pin
else:
module.pin_failed = True
return False
except:
module.pin_failed = True
return False
return True
def unpin_memory(module):
if get_pin(module) is None:
return 0
size = module._pin.numel() * module._pin.element_size()
comfy.model_management.unpin_memory(module._pin)
return size