Fix/tweak pinned memory accounting (#13221)

* mm: Lower windows pin threshold

Some workflows have more extranous use of shared GPU memory than is
accounted for in the 5% pin headroom. Lower this for safety.

* mm: Remove pin count clearing threshold.

TOTAL_PINNED_MEMORY is shared between the legacy and aimdo pinning
systems, however this catch-all assumes only the legacy system exists.
Remove the catch-all as the PINNED_MEMORY buffer is coherent already.
This commit is contained in:
rattus 2026-03-29 16:43:24 -07:00 committed by GitHub
parent d113d1cc32
commit 8d723d2caa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1326,9 +1326,9 @@ MAX_PINNED_MEMORY = -1
if not args.disable_pinned_memory:
if is_nvidia() or is_amd():
if WINDOWS:
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45 # Windows limit is apparently 50%
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.40 # Windows limit is apparently 50%
else:
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.90
logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
PINNING_ALLOWED_TYPES = set(["Tensor", "Parameter", "QuantizedTensor"])
@ -1403,8 +1403,6 @@ def unpin_memory(tensor):
if torch.cuda.cudart().cudaHostUnregister(ptr) == 0:
TOTAL_PINNED_MEMORY -= PINNED_MEMORY.pop(ptr)
if len(PINNED_MEMORY) == 0:
TOTAL_PINNED_MEMORY = 0
return True
else:
logging.warning("Unpin error.")