From b6805429b9c2f3aa919035bea849ecd1de3ac8e4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 24 Nov 2025 23:48:20 -0800
Subject: [PATCH] Allow pinning quantized tensors. (#10873)

---
 comfy/model_management.py | 6 +++++-
 comfy/quant_ops.py        | 8 ++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index a21df54b3..a9327ac80 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1098,13 +1098,14 @@ if not args.disable_pinned_memory:
             MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
         logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
 
+PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])
 
 def pin_memory(tensor):
     global TOTAL_PINNED_MEMORY
     if MAX_PINNED_MEMORY <= 0:
         return False
 
-    if type(tensor) is not torch.nn.parameter.Parameter:
+    if type(tensor).__name__ not in PINNING_ALLOWED_TYPES:
         return False
 
     if not is_device_cpu(tensor.device):
@@ -1124,6 +1125,9 @@ def pin_memory(tensor):
         return False
 
     ptr = tensor.data_ptr()
+    if ptr == 0:
+        return False
+
     if torch.cuda.cudart().cudaHostRegister(ptr, size, 1) == 0:
         PINNED_MEMORY[ptr] = size
         TOTAL_PINNED_MEMORY += size
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index 905b4729e..e938144a7 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -228,6 +228,14 @@ class QuantizedTensor(torch.Tensor):
         new_kwargs = dequant_arg(kwargs)
         return func(*new_args, **new_kwargs)
 
+    def data_ptr(self):
+        return self._qdata.data_ptr()
+
+    def is_pinned(self):
+        return self._qdata.is_pinned()
+
+    def is_contiguous(self):
+        return self._qdata.is_contiguous()
 
 # ==============================================================================
 # Generic Utilities (Layout-Agnostic Operations)