Fix/tweak pinned memory accounting (#13221)

* mm: Lower windows pin threshold Some workflows have more extranous use of shared GPU memory than is accounted for in the 5% pin headroom. Lower this for safety. * mm: Remove pin count clearing threshold. TOTAL_PINNED_MEMORY is shared between the legacy and aimdo pinning systems, however this catch-all assumes only the legacy system exists. Remove the catch-all as the PINNED_MEMORY buffer is coherent already.
2026-06-06 22:37:43 +08:00 · 2026-03-29 16:43:24 -07:00 · 2026-03-29 16:43:24 -07:00 · 8d723d2caa
commit 8d723d2caa
parent d113d1cc32
1 changed files with 2 additions and 4 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1326,9 +1326,9 @@ MAX_PINNED_MEMORY = -1
 if not args.disable_pinned_memory:
    if is_nvidia() or is_amd():
        if WINDOWS:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45  # Windows limit is apparently 50%
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40  # Windows limit is apparently 50%
        else:
-            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))

 PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
@ -1403,8 +1403,6 @@ def unpin_memory(tensor):

    if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
        TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
-        if len(PINNED_MEMORY) == 0:
-            TOTAL_PINNED_MEMORY = 0
        return True
    else:
        logging.warning("Unpin error.")