mm: discard async errors from pinning failures

Pretty much every error cudaHostRegister can throw also queues the same error on the async GPU queue. This was fixed for repinning error case, but there is the bad mmap and just enomem cases that are harder to detect. Do some dummy GPU work to clean the error state.
2026-01-12 15:20:51 +08:00 · 2025-11-13 21:38:04 +10:00 · 2025-11-13 21:38:04 +10:00 · e148a7f1bb
commit e148a7f1bb
parent 3b3ef9a77a
1 changed files with 13 additions and 0 deletions
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1098,6 +1098,15 @@ if not args.disable_pinned_memory:
            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))

+def discard_cuda_async_error():
+    try:
+        a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
+        b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
+        _ = a + b
+        torch.cuda.synchronize()
+    except torch.AcceleratorError:
+        #Dump it! We already know about it from the synchronous return
+        pass

 def pin_memory(tensor):
    global TOTAL_PINNED_MEMORY
@ -1128,6 +1137,8 @@ def pin_memory(tensor):
        PINNED_MEMORY[ptr] = size
        TOTAL_PINNED_MEMORY += size
        return True
+    else:
+        discard_cuda_async_error()

    return False

@ -1156,6 +1167,8 @@ def unpin_memory(tensor):
        if len(PINNED_MEMORY) == 0:
            TOTAL_PINNED_MEMORY = 0
        return True
+    else:
+        discard_cuda_async_error()

    return False