From b6805429b9c2f3aa919035bea849ecd1de3ac8e4 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 24 Nov 2025 23:48:20 -0800 Subject: [PATCH] Allow pinning quantized tensors. (#10873) --- comfy/model_management.py | 6 +++++- comfy/quant_ops.py | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index a21df54b3..a9327ac80 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1098,13 +1098,14 @@ if not args.disable_pinned_memory: MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95 logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024))) +PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"]) def pin_memory(tensor): global TOTAL_PINNED_MEMORY if MAX_PINNED_MEMORY <= 0: return False - if type(tensor) is not torch.nn.parameter.Parameter: + if type(tensor).__name__ not in PINNING_ALLOWED_TYPES: return False if not is_device_cpu(tensor.device): @@ -1124,6 +1125,9 @@ def pin_memory(tensor): return False ptr = tensor.data_ptr() + if ptr == 0: + return False + if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0: PINNED_MEMORY[ptr] = size TOTAL_PINNED_MEMORY += size diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py index 905b4729e..e938144a7 100644 --- a/comfy/quant_ops.py +++ b/comfy/quant_ops.py @@ -228,6 +228,14 @@ class QuantizedTensor(torch.Tensor): new_kwargs = dequant_arg(kwargs) return func(*new_args, **new_kwargs) + def data_ptr(self): + return self._qdata.data_ptr() + + def is_pinned(self): + return self._qdata.is_pinned() + + def is_contiguous(self): + return self._qdata.is_contiguous() # ============================================================================== # Generic Utilities (Layout-Agnostic Operations)