From 92d97380bd02d9883295aeb2d29365cecd9a765e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 17 Oct 2025 15:22:59 -0700
Subject: [PATCH 1/6] Update Python 3.14 installation instructions (#10385)

Removed mention of installing pytorch nightly for Python 3.14.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b0731db33..c9a0644e3 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,7 @@ comfy install
 
 ## Manual Install (Windows, Linux)
 
-Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) and install pytorch nightly but it is not recommended.
+Python 3.14 will work if you comment out the `kornia` dependency in the requirements.txt file (breaks the canny node) but it is not recommended.
 
 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12
 

From 9da397ea2f271080406f0c14cf4f0db7221ddf70 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:03:28 -0700
Subject: [PATCH 2/6] Disable torch compiler for cast_bias_weight function
 (#10384)

* Disable torch compiler for cast_bias_weight function

* Fix torch compile.
---
 comfy/ops.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/comfy/ops.py b/comfy/ops.py
index 56b07b44c..5feeb3571 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -25,6 +25,9 @@ import comfy.rmsnorm
 import contextlib
 
 def run_every_op():
+    if torch.compiler.is_compiling():
+        return
+
     comfy.model_management.throw_exception_if_processing_interrupted()
 
 def scaled_dot_product_attention(q, k, v, *args, **kwargs):
@@ -70,6 +73,7 @@ if torch.cuda.is_available() and torch.backends.cudnn.is_available() and Perform
 def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
+@torch.compiler.disable()
 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None):
     if input is not None:
         if dtype is None:

From 5b80addafd24bda5b2f9f7a35e32dbd40823c3fd Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 18 Oct 2025 19:35:46 -0700
Subject: [PATCH 3/6] Turn off cuda malloc by default when --fast autotune is
 turned on. (#10393)

---
 comfy/model_management.py | 3 +++
 comfy/ops.py              | 3 ---
 cuda_malloc.py            | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index d82d5b8b0..7467391cd 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -371,6 +371,9 @@ try:
 except:
     pass
 
+if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
+    torch.backends.cudnn.benchmark = True
+
 try:
     if torch_version_numeric >= (2, 5):
         torch.backends.cuda.allow_fp16_bf16_reduction_math_sdp(True)
diff --git a/comfy/ops.py b/comfy/ops.py
index 5feeb3571..967134f05 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -67,9 +67,6 @@ except:
 
 cast_to = comfy.model_management.cast_to #TODO: remove once no more references
 
-if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
-    torch.backends.cudnn.benchmark = True
-
 def cast_to_input(weight, input, non_blocking=False, copy=True):
     return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy)
 
diff --git a/cuda_malloc.py b/cuda_malloc.py
index c1d9ae3ca..6520d5123 100644
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -1,6 +1,6 @@
 import os
 import importlib.util
-from comfy.cli_args import args
+from comfy.cli_args import args, PerformanceFeature
 import subprocess
 
 #Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
@@ -75,8 +75,9 @@ if not args.cuda_malloc:
                 spec.loader.exec_module(module)
                 version = module.__version__
 
-        if int(version[0]) >= 2 and "+cu" in version: #enable by default for torch version 2.0 and up only on cuda torch
-            args.cuda_malloc = cuda_malloc_supported()
+        if int(version[0]) >= 2 and "+cu" in version:  # enable by default for torch version 2.0 and up only on cuda torch
+            if PerformanceFeature.AutoTune not in args.fast:  # Autotune has issues with cuda malloc
+                args.cuda_malloc = cuda_malloc_supported()
     except:
         pass
 

From 0cf33953a7c951d163088cbfe36c55d1cdf8a718 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 18 Oct 2025 20:15:34 -0700
Subject: [PATCH 4/6] Fix batch size above 1 giving bad output in chroma
 radiance. (#10394)

---
 comfy/ldm/chroma_radiance/model.py | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/comfy/ldm/chroma_radiance/model.py b/comfy/ldm/chroma_radiance/model.py
index 47aa11b04..7d7be80f5 100644
--- a/comfy/ldm/chroma_radiance/model.py
+++ b/comfy/ldm/chroma_radiance/model.py
@@ -189,15 +189,15 @@ class ChromaRadiance(Chroma):
         nerf_pixels = nn.functional.unfold(img_orig, kernel_size=patch_size, stride=patch_size)
         nerf_pixels = nerf_pixels.transpose(1, 2) # -> [B, NumPatches, C * P * P]
 
+        # Reshape for per-patch processing
+        nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
+        nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
+
         if params.nerf_tile_size > 0 and num_patches > params.nerf_tile_size:
             # Enable tiling if nerf_tile_size isn't 0 and we actually have more patches than
             # the tile size.
-            img_dct = self.forward_tiled_nerf(img_out, nerf_pixels, B, C, num_patches, patch_size, params)
+            img_dct = self.forward_tiled_nerf(nerf_hidden, nerf_pixels, B, C, num_patches, patch_size, params)
         else:
-            # Reshape for per-patch processing
-            nerf_hidden = img_out.reshape(B * num_patches, params.hidden_size)
-            nerf_pixels = nerf_pixels.reshape(B * num_patches, C, patch_size**2).transpose(1, 2)
-
             # Get DCT-encoded pixel embeddings [pixel-dct]
             img_dct = self.nerf_image_embedder(nerf_pixels)
 
@@ -240,17 +240,8 @@ class ChromaRadiance(Chroma):
             end = min(i + tile_size, num_patches)
 
             # Slice the current tile from the input tensors
-            nerf_hidden_tile = nerf_hidden[:, i:end, :]
-            nerf_pixels_tile = nerf_pixels[:, i:end, :]
-
-            # Get the actual number of patches in this tile (can be smaller for the last tile)
-            num_patches_tile = nerf_hidden_tile.shape[1]
-
-            # Reshape the tile for per-patch processing
-            # [B, NumPatches_tile, D] -> [B * NumPatches_tile, D]
-            nerf_hidden_tile = nerf_hidden_tile.reshape(batch * num_patches_tile, params.hidden_size)
-            # [B, NumPatches_tile, C*P*P] -> [B*NumPatches_tile, C, P*P] -> [B*NumPatches_tile, P*P, C]
-            nerf_pixels_tile = nerf_pixels_tile.reshape(batch * num_patches_tile, channels, patch_size**2).transpose(1, 2)
+            nerf_hidden_tile = nerf_hidden[i * batch:end * batch]
+            nerf_pixels_tile = nerf_pixels[i * batch:end * batch]
 
             # get DCT-encoded pixel embeddings [pixel-dct]
             img_dct_tile = self.nerf_image_embedder(nerf_pixels_tile)

From dad076aee68ab676fb390d9663ab9e343824a080 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 18 Oct 2025 20:19:52 -0700
Subject: [PATCH 5/6] Speed up chroma radiance. (#10395)

---
 comfy/model_detection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 7677617c0..141f1e164 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -213,7 +213,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                 dit_config["nerf_mlp_ratio"] = 4
                 dit_config["nerf_depth"] = 4
                 dit_config["nerf_max_freqs"] = 8
-                dit_config["nerf_tile_size"] = 32
+                dit_config["nerf_tile_size"] = 512
                 dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
                 dit_config["nerf_embedder_dtype"] = torch.float32
         else:

From b4f30bd4087a79b4c4fc89bb67b9889adb866294 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 18 Oct 2025 22:25:35 -0700
Subject: [PATCH 6/6] Pytorch is stupid. (#10398)

---
 comfy/ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/ops.py b/comfy/ops.py
index 967134f05..934e21261 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -58,7 +58,7 @@ except (ModuleNotFoundError, TypeError):
 NVIDIA_MEMORY_CONV_BUG_WORKAROUND = False
 try:
     if comfy.model_management.is_nvidia():
-        if torch.backends.cudnn.version() >= 91200 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
+        if torch.backends.cudnn.version() >= 91002 and comfy.model_management.torch_version_numeric >= (2, 9) and comfy.model_management.torch_version_numeric <= (2, 10):
             #TODO: change upper bound version once it's fixed'
             NVIDIA_MEMORY_CONV_BUG_WORKAROUND = True
             logging.info("working around nvidia conv3d memory bug.")