From 8fd07170f1b0a7eaaf5a62020cd1926dd3b5092c Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 28 Dec 2025 19:07:25 -0800
Subject: [PATCH 1/3] Comment out unused norm_final in lumina/z image model.
 (#11545)

---
 comfy/ldm/lumina/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
index e80b1c138..afbab2ac7 100644
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -491,7 +491,8 @@ class NextDiT(nn.Module):
                 for layer_id in range(n_layers)
             ]
         )
-        self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
+        # This norm final is in the lumina 2.0 code but isn't actually used for anything.
+        # self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
         self.final_layer = FinalLayer(dim, patch_size, self.out_channels, z_image_modulation=z_image_modulation, operation_settings=operation_settings)
 
         if self.pad_tokens_multiple is not None:

From 9ca7e143afe6f09734c9aefcc85f491c5c0dc6e0 Mon Sep 17 00:00:00 2001
From: rattus <46076784+rattus128@users.noreply.github.com>
Date: Mon, 29 Dec 2025 15:19:34 -0800
Subject: [PATCH 2/3] mm: discard async errors from pinning failures (#10738)

Pretty much every error cudaHostRegister can throw also queues the same
error on the async GPU queue. This was fixed for repinning error case,
but there is the bad mmap and just enomem cases that are harder to
detect.

Do some dummy GPU work to clean the error state.
---
 comfy/model_management.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index e5554e225..9fcb699bc 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1126,6 +1126,16 @@ if not args.disable_pinned_memory:
 
 PINNING_ALLOWED_TYPES = set(["Parameter", "QuantizedTensor"])
 
+def discard_cuda_async_error():
+    try:
+        a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
+        b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
+        _ = a + b
+        torch.cuda.synchronize()
+    except torch.AcceleratorError:
+        #Dump it! We already know about it from the synchronous return
+        pass
+
 def pin_memory(tensor):
     global TOTAL_PINNED_MEMORY
     if MAX_PINNED_MEMORY <= 0:
@@ -1158,6 +1168,8 @@ def pin_memory(tensor):
         PINNED_MEMORY[ptr] = size
         TOTAL_PINNED_MEMORY += size
         return True
+    else:
+        discard_cuda_async_error()
 
     return False
 
@@ -1186,6 +1198,8 @@ def unpin_memory(tensor):
         if len(PINNED_MEMORY) == 0:
             TOTAL_PINNED_MEMORY = 0
         return True
+    else:
+        discard_cuda_async_error()
 
     return False
 

From 0e6221cc79a3f3cbf0e15a8321bfe75fcffbe667 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 29 Dec 2025 15:26:42 -0800
Subject: [PATCH 3/3] Add some warnings for pin and unpin errors. (#11561)

---
 comfy/model_management.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 9fcb699bc..87baedd73 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1169,6 +1169,7 @@ def pin_memory(tensor):
         TOTAL_PINNED_MEMORY += size
         return True
     else:
+        logging.warning("Pin error.")
         discard_cuda_async_error()
 
     return False
@@ -1199,6 +1200,7 @@ def unpin_memory(tensor):
             TOTAL_PINNED_MEMORY = 0
         return True
     else:
+        logging.warning("Unpin error.")
         discard_cuda_async_error()
 
     return False