From fc247150fec502b1834390516b556a87003f1d79 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Fri, 22 Aug 2025 19:41:08 -0700
Subject: [PATCH 01/12] Implement EasyCache and Invent LazyCache (#9496)

* Attempting a universal implementation of EasyCache, starting with flux as test; I screwed up the math a bit, but when I set it just right it works.

* Fixed math to make threshold work as expected, refactored code to use EasyCacheHolder instead of a dict wrapped by object

* Use sigmas from transformer_options instead of timesteps to be compatible with a greater amount of models, make end_percent work

* Make log statement when not skipping useful, preparing for per-cond caching

* Added DIFFUSION_MODEL wrapper around forward function for wan model

* Add subsampling for heuristic inputs

* Add subsampling to output_prev (output_prev_subsampled now)

* Properly consider conds in EasyCache logic

* Created SuperEasyCache to test what happens if caching and reuse is moved outside the scope of conds, added PREDICT_NOISE wrapper to facilitate this test

* Change max reuse_threshold to 3.0

* Mark EasyCache/SuperEasyCache as experimental (beta)

* Make Lumina2 compatible with EasyCache

* Add EasyCache support for Qwen Image

* Fix missing comma, curse you Cursor

* Add EasyCache support to AceStep

* Add EasyCache support to Chroma

* Added EasyCache support to Cosmos Predict t2i

* Make EasyCache not crash with Cosmos Predict ImagToVideo latents, but does not work well at all

* Add EasyCache support to hidream

* Added EasyCache support to hunyuan video

* Added EasyCache support to hunyuan3d

* Added EasyCache support to LTXV (not very good, but does not crash)

* Implemented EasyCache for aura_flow

* Renamed SuperEasyCache to LazyCache, hardcoded subsample_factor to 8 on nodes

* Eatra logging when verbose is true for EasyCache
---
 comfy/ldm/ace/model.py           |  24 +-
 comfy/ldm/aura/mmdit.py          |   8 +
 comfy/ldm/chroma/model.py        |   8 +
 comfy/ldm/cosmos/model.py        |  38 +++
 comfy/ldm/cosmos/predict2.py     |  17 +-
 comfy/ldm/flux/model.py          |   8 +
 comfy/ldm/hidream/model.py       |  19 +-
 comfy/ldm/hunyuan3d/model.py     |   8 +
 comfy/ldm/hunyuan_video/model.py |   8 +
 comfy/ldm/lightricks/model.py    |   8 +
 comfy/ldm/lumina/model.py        |  10 +-
 comfy/ldm/qwen_image/model.py    |  10 +-
 comfy/ldm/wan/model.py           |   8 +
 comfy/patcher_extension.py       |   1 +
 comfy/samplers.py                |   9 +-
 comfy_extras/nodes_easycache.py  | 459 +++++++++++++++++++++++++++++++
 nodes.py                         |   3 +-
 17 files changed, 639 insertions(+), 7 deletions(-)
 create mode 100644 comfy_extras/nodes_easycache.py

diff --git a/comfy/ldm/ace/model.py b/comfy/ldm/ace/model.py
index 12c524701..41d85eeb5 100644
--- a/comfy/ldm/ace/model.py
+++ b/comfy/ldm/ace/model.py
@@ -19,6 +19,7 @@ import torch
 from torch import nn
 
 import comfy.model_management
+import comfy.patcher_extension
 
 from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
 from .attention import LinearTransformerBlock, t2i_modulate
@@ -343,7 +344,28 @@ class ACEStepTransformer2DModel(nn.Module):
         output = self.final_layer(hidden_states, embedded_timestep, output_length)
         return output
 
-    def forward(
+    def forward(self,
+        x,
+        timestep,
+        attention_mask=None,
+        context: Optional[torch.Tensor] = None,
+        text_attention_mask: Optional[torch.LongTensor] = None,
+        speaker_embeds: Optional[torch.FloatTensor] = None,
+        lyric_token_idx: Optional[torch.LongTensor] = None,
+        lyric_mask: Optional[torch.LongTensor] = None,
+        block_controlnet_hidden_states: Optional[Union[List[torch.Tensor], torch.Tensor]] = None,
+        controlnet_scale: Union[float, torch.Tensor] = 1.0,
+        lyrics_strength=1.0,
+        **kwargs
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timestep, attention_mask, context, text_attention_mask, speaker_embeds, lyric_token_idx, lyric_mask, block_controlnet_hidden_states,
+                  controlnet_scale, lyrics_strength, **kwargs)
+
+    def _forward(
         self,
         x,
         timestep,
diff --git a/comfy/ldm/aura/mmdit.py b/comfy/ldm/aura/mmdit.py
index 1258ae11f..d7f32b5e8 100644
--- a/comfy/ldm/aura/mmdit.py
+++ b/comfy/ldm/aura/mmdit.py
@@ -9,6 +9,7 @@ import torch.nn.functional as F
 
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.ops
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 def modulate(x, shift, scale):
@@ -436,6 +437,13 @@ class MMDiT(nn.Module):
         return x + pos_encoding.reshape(1, -1, self.positional_encoding.shape[-1])
 
     def forward(self, x, timestep, context, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, transformer_options={}, **kwargs):
         patches_replace = transformer_options.get("patches_replace", {})
         # patchify x, add PE
         b, c, h, w = x.shape
diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py
index 06021d4f2..5cff44dc8 100644
--- a/comfy/ldm/chroma/model.py
+++ b/comfy/ldm/chroma/model.py
@@ -5,6 +5,7 @@ from dataclasses import dataclass
 import torch
 from torch import Tensor, nn
 from einops import rearrange, repeat
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 from comfy.ldm.flux.layers import (
@@ -253,6 +254,13 @@ class Chroma(nn.Module):
         return img
 
     def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, guidance, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
         bs, c, h, w = x.shape
         x = comfy.ldm.common_dit.pad_to_patch_size(x, (self.patch_size, self.patch_size))
 
diff --git a/comfy/ldm/cosmos/model.py b/comfy/ldm/cosmos/model.py
index 4836e0b69..53698b758 100644
--- a/comfy/ldm/cosmos/model.py
+++ b/comfy/ldm/cosmos/model.py
@@ -27,6 +27,8 @@ from torchvision import transforms
 from enum import Enum
 import logging
 
+import comfy.patcher_extension
+
 from .blocks import (
     FinalLayer,
     GeneralDITTransformerBlock,
@@ -435,6 +437,42 @@ class GeneralDIT(nn.Module):
         latent_condition_sigma: Optional[torch.Tensor] = None,
         condition_video_augment_sigma: Optional[torch.Tensor] = None,
         **kwargs,
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x,
+                timesteps,
+                context,
+                attention_mask,
+                fps,
+                image_size,
+                padding_mask,
+                scalar_feature,
+                data_type,
+                latent_condition,
+                latent_condition_sigma,
+                condition_video_augment_sigma,
+                **kwargs)
+
+    def _forward(
+        self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        context: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        # crossattn_emb: torch.Tensor,
+        # crossattn_mask: Optional[torch.Tensor] = None,
+        fps: Optional[torch.Tensor] = None,
+        image_size: Optional[torch.Tensor] = None,
+        padding_mask: Optional[torch.Tensor] = None,
+        scalar_feature: Optional[torch.Tensor] = None,
+        data_type: Optional[DataType] = DataType.VIDEO,
+        latent_condition: Optional[torch.Tensor] = None,
+        latent_condition_sigma: Optional[torch.Tensor] = None,
+        condition_video_augment_sigma: Optional[torch.Tensor] = None,
+        **kwargs,
     ):
         """
         Args:
diff --git a/comfy/ldm/cosmos/predict2.py b/comfy/ldm/cosmos/predict2.py
index 316117f77..fcc83ba76 100644
--- a/comfy/ldm/cosmos/predict2.py
+++ b/comfy/ldm/cosmos/predict2.py
@@ -11,6 +11,7 @@ import math
 from .position_embedding import VideoRopePosition3DEmb, LearnablePosEmbAxis
 from torchvision import transforms
 
+import comfy.patcher_extension
 from comfy.ldm.modules.attention import optimized_attention
 
 def apply_rotary_pos_emb(
@@ -805,7 +806,21 @@ class MiniTrainDIT(nn.Module):
         )
         return x_B_C_Tt_Hp_Wp
 
-    def forward(
+    def forward(self,
+        x: torch.Tensor,
+        timesteps: torch.Tensor,
+        context: torch.Tensor,
+        fps: Optional[torch.Tensor] = None,
+        padding_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timesteps, context, fps, padding_mask, **kwargs)
+
+    def _forward(
         self,
         x: torch.Tensor,
         timesteps: torch.Tensor,
diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py
index c4de82795..0a77fa097 100644
--- a/comfy/ldm/flux/model.py
+++ b/comfy/ldm/flux/model.py
@@ -6,6 +6,7 @@ import torch
 from torch import Tensor, nn
 from einops import rearrange, repeat
 import comfy.ldm.common_dit
+import comfy.patcher_extension
 
 from .layers import (
     DoubleStreamBlock,
@@ -214,6 +215,13 @@ class Flux(nn.Module):
         return img, repeat(img_ids, "h w c -> b (h w) c", b=bs)
 
     def forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, y, guidance, ref_latents, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, y=None, guidance=None, ref_latents=None, control=None, transformer_options={}, **kwargs):
         bs, c, h_orig, w_orig = x.shape
         patch_size = self.patch_size
 
diff --git a/comfy/ldm/hidream/model.py b/comfy/ldm/hidream/model.py
index 0305747bf..ae49cf945 100644
--- a/comfy/ldm/hidream/model.py
+++ b/comfy/ldm/hidream/model.py
@@ -13,6 +13,7 @@ from comfy.ldm.flux.layers import LastLayer
 
 from comfy.ldm.modules.attention import optimized_attention
 import comfy.model_management
+import comfy.patcher_extension
 import comfy.ldm.common_dit
 
 
@@ -692,7 +693,23 @@ class HiDreamImageTransformer2DModel(nn.Module):
             raise NotImplementedError
         return x, x_masks, img_sizes
 
-    def forward(
+    def forward(self,
+        x: torch.Tensor,
+        t: torch.Tensor,
+        y: Optional[torch.Tensor] = None,
+        context: Optional[torch.Tensor] = None,
+        encoder_hidden_states_llama3=None,
+        image_cond=None,
+        control = None,
+        transformer_options = {},
+    ):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, t, y, context, encoder_hidden_states_llama3, image_cond, control, transformer_options)
+
+    def _forward(
         self,
         x: torch.Tensor,
         t: torch.Tensor,
diff --git a/comfy/ldm/hunyuan3d/model.py b/comfy/ldm/hunyuan3d/model.py
index 4e18358f0..0fa5e78c1 100644
--- a/comfy/ldm/hunyuan3d/model.py
+++ b/comfy/ldm/hunyuan3d/model.py
@@ -7,6 +7,7 @@ from comfy.ldm.flux.layers import (
     SingleStreamBlock,
     timestep_embedding,
 )
+import comfy.patcher_extension
 
 
 class Hunyuan3Dv2(nn.Module):
@@ -67,6 +68,13 @@ class Hunyuan3Dv2(nn.Module):
         self.final_layer = LastLayer(hidden_size, 1, in_channels, dtype=dtype, device=device, operations=operations)
 
     def forward(self, x, timestep, context, guidance=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, guidance, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, guidance=None, transformer_options={}, **kwargs):
         x = x.movedim(-1, -2)
         timestep = 1.0 - timestep
         txt = context
diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py
index fbd8d4196..da1011596 100644
--- a/comfy/ldm/hunyuan_video/model.py
+++ b/comfy/ldm/hunyuan_video/model.py
@@ -1,6 +1,7 @@
 #Based on Flux code because of weird hunyuan video code license.
 
 import torch
+import comfy.patcher_extension
 import comfy.ldm.flux.layers
 import comfy.ldm.modules.diffusionmodules.mmdit
 from comfy.ldm.modules.attention import optimized_attention
@@ -348,6 +349,13 @@ class HunyuanVideo(nn.Module):
         return repeat(img_ids, "t h w c -> b (t h w) c", b=bs)
 
     def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, control=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, y, guidance, attention_mask, guiding_frame_index, ref_latent, control, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, ref_latent=None, control=None, transformer_options={}, **kwargs):
         bs, c, t, h, w = x.shape
         img_ids = self.img_ids(x)
         txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py
index ad9a7daea..aa2ea62b1 100644
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -1,5 +1,6 @@
 import torch
 from torch import nn
+import comfy.patcher_extension
 import comfy.ldm.modules.attention
 import comfy.ldm.common_dit
 from einops import rearrange
@@ -420,6 +421,13 @@ class LTXVModel(torch.nn.Module):
         self.patchifier = SymmetricPatchifier(1)
 
     def forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, attention_mask, frame_rate, transformer_options, keyframe_idxs, **kwargs)
+
+    def _forward(self, x, timestep, context, attention_mask, frame_rate=25, transformer_options={}, keyframe_idxs=None, **kwargs):
         patches_replace = transformer_options.get("patches_replace", {})
 
         orig_shape = list(x.shape)
diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
index f8dc4d7db..e08ed817d 100644
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -11,6 +11,7 @@ import comfy.ldm.common_dit
 from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
+import comfy.patcher_extension
 
 
 def modulate(x, scale):
@@ -590,8 +591,15 @@ class NextDiT(nn.Module):
 
         return padded_full_embed, mask, img_sizes, l_effective_cap_len, freqs_cis
 
-    # def forward(self, x, t, cap_feats, cap_mask):
     def forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, kwargs.get("transformer_options", {}))
+        ).execute(x, timesteps, context, num_tokens, attention_mask, **kwargs)
+
+    # def forward(self, x, t, cap_feats, cap_mask):
+    def _forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
         t = 1.0 - timesteps
         cap_feats = context
         cap_mask = attention_mask
diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
index af00ff119..57a458210 100644
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -9,6 +9,7 @@ from comfy.ldm.lightricks.model import TimestepEmbedding, Timesteps
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
 import comfy.ldm.common_dit
+import comfy.patcher_extension
 
 class GELU(nn.Module):
     def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True, dtype=None, device=None, operations=None):
@@ -355,7 +356,14 @@ class QwenImageTransformer2DModel(nn.Module):
         img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(w_offset, w_len - 1 + w_offset, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0) - (w_len // 2)
         return hidden_states, repeat(img_ids, "h w c -> b (h w) c", b=bs), orig_shape
 
-    def forward(
+    def forward(self, x, timestep, context, attention_mask=None, guidance=None, ref_latents=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, attention_mask, guidance, ref_latents, transformer_options, **kwargs)
+
+    def _forward(
         self,
         x,
         timesteps,
diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py
index 0726b8e1b..1885d9730 100644
--- a/comfy/ldm/wan/model.py
+++ b/comfy/ldm/wan/model.py
@@ -11,6 +11,7 @@ from comfy.ldm.flux.layers import EmbedND
 from comfy.ldm.flux.math import apply_rope
 import comfy.ldm.common_dit
 import comfy.model_management
+import comfy.patcher_extension
 
 
 def sinusoidal_embedding_1d(dim, position):
@@ -573,6 +574,13 @@ class WanModel(torch.nn.Module):
         return x
 
     def forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self._forward,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
+        ).execute(x, timestep, context, clip_fea, time_dim_concat, transformer_options, **kwargs)
+
+    def _forward(self, x, timestep, context, clip_fea=None, time_dim_concat=None, transformer_options={}, **kwargs):
         bs, c, t, h, w = x.shape
         x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size)
 
diff --git a/comfy/patcher_extension.py b/comfy/patcher_extension.py
index 965958f4c..46cc7b2a8 100644
--- a/comfy/patcher_extension.py
+++ b/comfy/patcher_extension.py
@@ -50,6 +50,7 @@ class WrappersMP:
     OUTER_SAMPLE = "outer_sample"
     PREPARE_SAMPLING = "prepare_sampling"
     SAMPLER_SAMPLE = "sampler_sample"
+    PREDICT_NOISE = "predict_noise"
     CALC_COND_BATCH = "calc_cond_batch"
     APPLY_MODEL = "apply_model"
     DIFFUSION_MODEL = "diffusion_model"
diff --git a/comfy/samplers.py b/comfy/samplers.py
index d5390d64e..ec7e0b350 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -953,7 +953,14 @@ class CFGGuider:
             self.original_conds[k] = comfy.sampler_helpers.convert_cond(conds[k])
 
     def __call__(self, *args, **kwargs):
-        return self.predict_noise(*args, **kwargs)
+        return self.outer_predict_noise(*args, **kwargs)
+
+    def outer_predict_noise(self, x, timestep, model_options={}, seed=None):
+        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
+            self.predict_noise,
+            self,
+            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, self.model_options, is_model_options=True)
+        ).execute(x, timestep, model_options, seed)
 
     def predict_noise(self, x, timestep, model_options={}, seed=None):
         return sampling_function(self.inner_model, x, timestep, self.conds.get("negative", None), self.conds.get("positive", None), self.cfg, model_options=model_options, seed=seed)
diff --git a/comfy_extras/nodes_easycache.py b/comfy_extras/nodes_easycache.py
new file mode 100644
index 000000000..e2b2efcd9
--- /dev/null
+++ b/comfy_extras/nodes_easycache.py
@@ -0,0 +1,459 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Union
+from comfy_api.latest import io, ComfyExtension
+import comfy.patcher_extension
+import logging
+import torch
+import comfy.model_patcher
+if TYPE_CHECKING:
+    from uuid import UUID
+
+
+def easycache_forward_wrapper(executor, *args, **kwargs):
+    # get values from args
+    x: torch.Tensor = args[0]
+    transformer_options: dict[str] = args[-1]
+    if not isinstance(transformer_options, dict):
+        transformer_options = kwargs.get("transformer_options")
+        if not transformer_options:
+            transformer_options = args[-2]
+    easycache: EasyCacheHolder = transformer_options["easycache"]
+    sigmas = transformer_options["sigmas"]
+    uuids = transformer_options["uuids"]
+    if sigmas is not None and easycache.is_past_end_timestep(sigmas):
+        return executor(*args, **kwargs)
+    # prepare next x_prev
+    has_first_cond_uuid = easycache.has_first_cond_uuid(uuids)
+    next_x_prev = x
+    input_change = None
+    do_easycache = easycache.should_do_easycache(sigmas)
+    if do_easycache:
+        # if first cond marked this step for skipping, skip it and use appropriate cached values
+        if easycache.skip_current_step:
+            if easycache.verbose:
+                logging.info(f"EasyCache [verbose] - was marked to skip this step by {easycache.first_cond_uuid}. Present uuids: {uuids}")
+            return easycache.apply_cache_diff(x, uuids)
+        if easycache.initial_step:
+            easycache.first_cond_uuid = uuids[0]
+            has_first_cond_uuid = easycache.has_first_cond_uuid(uuids)
+            easycache.initial_step = False
+        if has_first_cond_uuid:
+            if easycache.has_x_prev_subsampled():
+                input_change = (easycache.subsample(x, uuids, clone=False) - easycache.x_prev_subsampled).flatten().abs().mean()
+            if easycache.has_output_prev_norm() and easycache.has_relative_transformation_rate():
+                approx_output_change_rate = (easycache.relative_transformation_rate * input_change) / easycache.output_prev_norm
+                easycache.cumulative_change_rate += approx_output_change_rate
+                if easycache.cumulative_change_rate < easycache.reuse_threshold:
+                    if easycache.verbose:
+                        logging.info(f"EasyCache [verbose] - skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}")
+                    # other conds should also skip this step, and instead use their cached values
+                    easycache.skip_current_step = True
+                    return easycache.apply_cache_diff(x, uuids)
+                else:
+                    if easycache.verbose:
+                        logging.info(f"EasyCache [verbose] - NOT skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}")
+                    easycache.cumulative_change_rate = 0.0
+
+    output: torch.Tensor = executor(*args, **kwargs)
+    if has_first_cond_uuid and easycache.has_output_prev_norm():
+        output_change = (easycache.subsample(output, uuids, clone=False) - easycache.output_prev_subsampled).flatten().abs().mean()
+        if easycache.verbose:
+            output_change_rate = output_change / easycache.output_prev_norm
+            easycache.output_change_rates.append(output_change_rate.item())
+        if easycache.has_relative_transformation_rate():
+            approx_output_change_rate = (easycache.relative_transformation_rate * input_change) / easycache.output_prev_norm
+            easycache.approx_output_change_rates.append(approx_output_change_rate.item())
+            if easycache.verbose:
+                logging.info(f"EasyCache [verbose] - approx_output_change_rate: {approx_output_change_rate}")
+        if input_change is not None:
+            easycache.relative_transformation_rate = output_change / input_change
+        if easycache.verbose:
+            logging.info(f"EasyCache [verbose] - output_change_rate: {output_change_rate}")
+    # TODO: allow cache_diff to be offloaded
+    easycache.update_cache_diff(output, next_x_prev, uuids)
+    if has_first_cond_uuid:
+        easycache.x_prev_subsampled = easycache.subsample(next_x_prev, uuids)
+        easycache.output_prev_subsampled = easycache.subsample(output, uuids)
+        easycache.output_prev_norm = output.flatten().abs().mean()
+        if easycache.verbose:
+            logging.info(f"EasyCache [verbose] - x_prev_subsampled: {easycache.x_prev_subsampled.shape}")
+    return output
+
+def lazycache_predict_noise_wrapper(executor, *args, **kwargs):
+    # get values from args
+    x: torch.Tensor = args[0]
+    timestep: float = args[1]
+    model_options: dict[str] = args[2]
+    easycache: LazyCacheHolder = model_options["transformer_options"]["easycache"]
+    if easycache.is_past_end_timestep(timestep):
+        return executor(*args, **kwargs)
+    # prepare next x_prev
+    next_x_prev = x
+    input_change = None
+    do_easycache = easycache.should_do_easycache(timestep)
+    if do_easycache:
+        if easycache.has_x_prev_subsampled():
+            if easycache.has_x_prev_subsampled():
+                input_change = (easycache.subsample(x, clone=False) - easycache.x_prev_subsampled).flatten().abs().mean()
+            if easycache.has_output_prev_norm() and easycache.has_relative_transformation_rate():
+                approx_output_change_rate = (easycache.relative_transformation_rate * input_change) / easycache.output_prev_norm
+                easycache.cumulative_change_rate += approx_output_change_rate
+                if easycache.cumulative_change_rate < easycache.reuse_threshold:
+                    if easycache.verbose:
+                        logging.info(f"LazyCache [verbose] - skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}")
+                    # other conds should also skip this step, and instead use their cached values
+                    easycache.skip_current_step = True
+                    return easycache.apply_cache_diff(x)
+                else:
+                    if easycache.verbose:
+                        logging.info(f"LazyCache [verbose] - NOT skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}")
+                    easycache.cumulative_change_rate = 0.0
+    output: torch.Tensor = executor(*args, **kwargs)
+    if easycache.has_output_prev_norm():
+        output_change = (easycache.subsample(output, clone=False) - easycache.output_prev_subsampled).flatten().abs().mean()
+        if easycache.verbose:
+            output_change_rate = output_change / easycache.output_prev_norm
+            easycache.output_change_rates.append(output_change_rate.item())
+        if easycache.has_relative_transformation_rate():
+            approx_output_change_rate = (easycache.relative_transformation_rate * input_change) / easycache.output_prev_norm
+            easycache.approx_output_change_rates.append(approx_output_change_rate.item())
+            if easycache.verbose:
+                logging.info(f"LazyCache [verbose] - approx_output_change_rate: {approx_output_change_rate}")
+        if input_change is not None:
+            easycache.relative_transformation_rate = output_change / input_change
+        if easycache.verbose:
+            logging.info(f"LazyCache [verbose] - output_change_rate: {output_change_rate}")
+    # TODO: allow cache_diff to be offloaded
+    easycache.update_cache_diff(output, next_x_prev)
+    easycache.x_prev_subsampled = easycache.subsample(next_x_prev)
+    easycache.output_prev_subsampled = easycache.subsample(output)
+    easycache.output_prev_norm = output.flatten().abs().mean()
+    if easycache.verbose:
+        logging.info(f"LazyCache [verbose] - x_prev_subsampled: {easycache.x_prev_subsampled.shape}")
+    return output
+
+def easycache_calc_cond_batch_wrapper(executor, *args, **kwargs):
+    model_options = args[-1]
+    easycache: EasyCacheHolder = model_options["transformer_options"]["easycache"]
+    easycache.skip_current_step = False
+    # TODO: check if first_cond_uuid is active at this timestep; otherwise, EasyCache needs to be partially reset
+    return executor(*args, **kwargs)
+
+def easycache_sample_wrapper(executor, *args, **kwargs):
+    """
+    This OUTER_SAMPLE wrapper makes sure easycache is prepped for current run, and all memory usage is cleared at the end.
+    """
+    try:
+        guider = executor.class_obj
+        orig_model_options = guider.model_options
+        guider.model_options = comfy.model_patcher.create_model_options_clone(orig_model_options)
+        # clone and prepare timesteps
+        guider.model_options["transformer_options"]["easycache"] = guider.model_options["transformer_options"]["easycache"].clone().prepare_timesteps(guider.model_patcher.model.model_sampling)
+        easycache: Union[EasyCacheHolder, LazyCacheHolder] = guider.model_options['transformer_options']['easycache']
+        logging.info(f"{easycache.name} enabled - threshold: {easycache.reuse_threshold}, start_percent: {easycache.start_percent}, end_percent: {easycache.end_percent}")
+        return executor(*args, **kwargs)
+    finally:
+        easycache = guider.model_options['transformer_options']['easycache']
+        output_change_rates = easycache.output_change_rates
+        approx_output_change_rates = easycache.approx_output_change_rates
+        if easycache.verbose:
+            logging.info(f"{easycache.name} [verbose] - output_change_rates {len(output_change_rates)}: {output_change_rates}")
+            logging.info(f"{easycache.name} [verbose] - approx_output_change_rates {len(approx_output_change_rates)}: {approx_output_change_rates}")
+        total_steps = len(args[3])-1
+        logging.info(f"{easycache.name} - skipped {easycache.total_steps_skipped}/{total_steps} steps ({total_steps/(total_steps-easycache.total_steps_skipped):.2f}x speedup).")
+        easycache.reset()
+        guider.model_options = orig_model_options
+
+
+class EasyCacheHolder:
+    def __init__(self, reuse_threshold: float, start_percent: float, end_percent: float, subsample_factor: int, offload_cache_diff: bool, verbose: bool=False):
+        self.name = "EasyCache"
+        self.reuse_threshold = reuse_threshold
+        self.start_percent = start_percent
+        self.end_percent = end_percent
+        self.subsample_factor = subsample_factor
+        self.offload_cache_diff = offload_cache_diff
+        self.verbose = verbose
+        # timestep values
+        self.start_t = 0.0
+        self.end_t = 0.0
+        # control values
+        self.relative_transformation_rate: float = None
+        self.cumulative_change_rate = 0.0
+        self.initial_step = True
+        self.skip_current_step = False
+        # cache values
+        self.first_cond_uuid = None
+        self.x_prev_subsampled: torch.Tensor = None
+        self.output_prev_subsampled: torch.Tensor = None
+        self.output_prev_norm: torch.Tensor = None
+        self.uuid_cache_diffs: dict[UUID, torch.Tensor] = {}
+        self.output_change_rates = []
+        self.approx_output_change_rates = []
+        self.total_steps_skipped = 0
+        # how to deal with mismatched dims
+        self.allow_mismatch = True
+        self.cut_from_start = True
+
+    def is_past_end_timestep(self, timestep: float) -> bool:
+        return not (timestep[0] > self.end_t).item()
+
+    def should_do_easycache(self, timestep: float) -> bool:
+        return (timestep[0] <= self.start_t).item()
+
+    def has_x_prev_subsampled(self) -> bool:
+        return self.x_prev_subsampled is not None
+
+    def has_output_prev_subsampled(self) -> bool:
+        return self.output_prev_subsampled is not None
+
+    def has_output_prev_norm(self) -> bool:
+        return self.output_prev_norm is not None
+
+    def has_relative_transformation_rate(self) -> bool:
+        return self.relative_transformation_rate is not None
+
+    def prepare_timesteps(self, model_sampling):
+        self.start_t = model_sampling.percent_to_sigma(self.start_percent)
+        self.end_t = model_sampling.percent_to_sigma(self.end_percent)
+        return self
+
+    def subsample(self, x: torch.Tensor, uuids: list[UUID], clone: bool = True) -> torch.Tensor:
+        batch_offset = x.shape[0] // len(uuids)
+        uuid_idx = uuids.index(self.first_cond_uuid)
+        if self.subsample_factor > 1:
+            to_return = x[uuid_idx*batch_offset:(uuid_idx+1)*batch_offset, ..., ::self.subsample_factor, ::self.subsample_factor]
+            if clone:
+                return to_return.clone()
+            return to_return
+        to_return = x[uuid_idx*batch_offset:(uuid_idx+1)*batch_offset, ...]
+        if clone:
+            return to_return.clone()
+        return to_return
+
+    def apply_cache_diff(self, x: torch.Tensor, uuids: list[UUID]):
+        if self.first_cond_uuid in uuids:
+            self.total_steps_skipped += 1
+        batch_offset = x.shape[0] // len(uuids)
+        for i, uuid in enumerate(uuids):
+            # if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
+            if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]:
+                if not self.allow_mismatch:
+                    raise ValueError(f"Cached dims {self.uuid_cache_diffs[uuid].shape} don't match x dims {x.shape} - this is no good")
+                slicing = []
+                skip_this_dim = True
+                for dim_u, dim_x in zip(self.uuid_cache_diffs[uuid].shape, x.shape):
+                    if skip_this_dim:
+                        skip_this_dim = False
+                        continue
+                    if dim_u != dim_x:
+                        if self.cut_from_start:
+                            slicing.append(slice(dim_x-dim_u, None))
+                        else:
+                            slicing.append(slice(None, dim_u))
+                    else:
+                        slicing.append(slice(None))
+                slicing = [slice(i*batch_offset,(i+1)*batch_offset)] + slicing
+                x = x[slicing]
+            x += self.uuid_cache_diffs[uuid].to(x.device)
+        return x
+
+    def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]):
+        # if output dims don't match x dims, cut off excess and hope for the best (cosmos world2video)
+        if output.shape[1:] != x.shape[1:]:
+            if not self.allow_mismatch:
+                raise ValueError(f"Output dims {output.shape} don't match x dims {x.shape} - this is no good")
+            slicing = []
+            skip_dim = True
+            for dim_o, dim_x in zip(output.shape, x.shape):
+                if not skip_dim and dim_o != dim_x:
+                    if self.cut_from_start:
+                        slicing.append(slice(dim_x-dim_o, None))
+                    else:
+                        slicing.append(slice(None, dim_o))
+                else:
+                    slicing.append(slice(None))
+                skip_dim = False
+            x = x[slicing]
+        diff = output - x
+        batch_offset = diff.shape[0] // len(uuids)
+        for i, uuid in enumerate(uuids):
+            self.uuid_cache_diffs[uuid] = diff[i*batch_offset:(i+1)*batch_offset, ...]
+
+    def has_first_cond_uuid(self, uuids: list[UUID]) -> bool:
+        return self.first_cond_uuid in uuids
+
+    def reset(self):
+        self.relative_transformation_rate = 0.0
+        self.cumulative_change_rate = 0.0
+        self.initial_step = True
+        self.skip_current_step = False
+        self.output_change_rates = []
+        self.first_cond_uuid = None
+        del self.x_prev_subsampled
+        self.x_prev_subsampled = None
+        del self.output_prev_subsampled
+        self.output_prev_subsampled = None
+        del self.output_prev_norm
+        self.output_prev_norm = None
+        del self.uuid_cache_diffs
+        self.uuid_cache_diffs = {}
+        self.total_steps_skipped = 0
+        return self
+
+    def clone(self):
+        return EasyCacheHolder(self.reuse_threshold, self.start_percent, self.end_percent, self.subsample_factor, self.offload_cache_diff, self.verbose)
+
+
+class EasyCacheNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="EasyCache",
+            display_name="EasyCache",
+            description="Native EasyCache implementation.",
+            category="advanced/debug/model",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model", tooltip="The model to add EasyCache to."),
+                io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps."),
+                io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of EasyCache."),
+                io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of EasyCache."),
+                io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information."),
+            ],
+            outputs=[
+                io.Model.Output(tooltip="The model with EasyCache."),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model: io.Model.Type, reuse_threshold: float, start_percent: float, end_percent: float, verbose: bool) -> io.NodeOutput:
+        model = model.clone()
+        model.model_options["transformer_options"]["easycache"] = EasyCacheHolder(reuse_threshold, start_percent, end_percent, subsample_factor=8, offload_cache_diff=False, verbose=verbose)
+        model.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.OUTER_SAMPLE, "easycache", easycache_sample_wrapper)
+        model.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.CALC_COND_BATCH, "easycache", easycache_calc_cond_batch_wrapper)
+        model.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, "easycache", easycache_forward_wrapper)
+        return io.NodeOutput(model)
+
+
+class LazyCacheHolder:
+    def __init__(self, reuse_threshold: float, start_percent: float, end_percent: float, subsample_factor: int, offload_cache_diff: bool, verbose: bool=False):
+        self.name = "LazyCache"
+        self.reuse_threshold = reuse_threshold
+        self.start_percent = start_percent
+        self.end_percent = end_percent
+        self.subsample_factor = subsample_factor
+        self.offload_cache_diff = offload_cache_diff
+        self.verbose = verbose
+        # timestep values
+        self.start_t = 0.0
+        self.end_t = 0.0
+        # control values
+        self.relative_transformation_rate: float = None
+        self.cumulative_change_rate = 0.0
+        self.initial_step = True
+        # cache values
+        self.x_prev_subsampled: torch.Tensor = None
+        self.output_prev_subsampled: torch.Tensor = None
+        self.output_prev_norm: torch.Tensor = None
+        self.cache_diff: torch.Tensor = None
+        self.output_change_rates = []
+        self.approx_output_change_rates = []
+        self.total_steps_skipped = 0
+
+    def has_cache_diff(self) -> bool:
+        return self.cache_diff is not None
+
+    def is_past_end_timestep(self, timestep: float) -> bool:
+        return not (timestep[0] > self.end_t).item()
+
+    def should_do_easycache(self, timestep: float) -> bool:
+        return (timestep[0] <= self.start_t).item()
+
+    def has_x_prev_subsampled(self) -> bool:
+        return self.x_prev_subsampled is not None
+
+    def has_output_prev_subsampled(self) -> bool:
+        return self.output_prev_subsampled is not None
+
+    def has_output_prev_norm(self) -> bool:
+        return self.output_prev_norm is not None
+
+    def has_relative_transformation_rate(self) -> bool:
+        return self.relative_transformation_rate is not None
+
+    def prepare_timesteps(self, model_sampling):
+        self.start_t = model_sampling.percent_to_sigma(self.start_percent)
+        self.end_t = model_sampling.percent_to_sigma(self.end_percent)
+        return self
+
+    def subsample(self, x: torch.Tensor, clone: bool = True) -> torch.Tensor:
+        if self.subsample_factor > 1:
+            to_return = x[..., ::self.subsample_factor, ::self.subsample_factor]
+            if clone:
+                return to_return.clone()
+            return to_return
+        if clone:
+            return x.clone()
+        return x
+
+    def apply_cache_diff(self, x: torch.Tensor):
+        self.total_steps_skipped += 1
+        return x + self.cache_diff.to(x.device)
+
+    def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor):
+        self.cache_diff = output - x
+
+    def reset(self):
+        self.relative_transformation_rate = 0.0
+        self.cumulative_change_rate = 0.0
+        self.initial_step = True
+        self.output_change_rates = []
+        self.approx_output_change_rates = []
+        del self.cache_diff
+        self.cache_diff = None
+        self.total_steps_skipped = 0
+        return self
+
+    def clone(self):
+        return LazyCacheHolder(self.reuse_threshold, self.start_percent, self.end_percent, self.subsample_factor, self.offload_cache_diff, self.verbose)
+
+class LazyCacheNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="LazyCache",
+            display_name="LazyCache",
+            description="A homebrew version of EasyCache - even 'easier' version of EasyCache to implement. Overall works worse than EasyCache, but better in some rare cases AND universal compatibility with everything in ComfyUI.",
+            category="advanced/debug/model",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model", tooltip="The model to add LazyCache to."),
+                io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps."),
+                io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of LazyCache."),
+                io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of LazyCache."),
+                io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information."),
+            ],
+            outputs=[
+                io.Model.Output(tooltip="The model with LazyCache."),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, model: io.Model.Type, reuse_threshold: float, start_percent: float, end_percent: float, verbose: bool) -> io.NodeOutput:
+        model = model.clone()
+        model.model_options["transformer_options"]["easycache"] = LazyCacheHolder(reuse_threshold, start_percent, end_percent, subsample_factor=8, offload_cache_diff=False, verbose=verbose)
+        model.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.OUTER_SAMPLE, "lazycache", easycache_sample_wrapper)
+        model.add_wrapper_with_key(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, "lazycache", lazycache_predict_noise_wrapper)
+        return io.NodeOutput(model)
+
+
+class EasyCacheExtension(ComfyExtension):
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            EasyCacheNode,
+            LazyCacheNode,
+        ]
+
+def comfy_entrypoint():
+    return EasyCacheExtension()
diff --git a/nodes.py b/nodes.py
index 9681750d3..723ce3384 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2322,7 +2322,8 @@ async def init_builtin_extra_nodes():
         "nodes_tcfg.py",
         "nodes_context_windows.py",
         "nodes_qwen.py",
-        "nodes_model_patch.py"
+        "nodes_model_patch.py",
+        "nodes_easycache.py",
     ]
 
     import_failed = []

From 41048c69b4ccf63f876213a95a51cdde1cb0ab84 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 22 Aug 2025 20:15:44 -0700
Subject: [PATCH 02/12] Fix Conditioning masks on 3d latents. (#9506)

---
 comfy/samplers.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index ec7e0b350..c7dfef4ea 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -17,6 +17,7 @@ import comfy.model_patcher
 import comfy.patcher_extension
 import comfy.hooks
 import comfy.context_windows
+import comfy.utils
 import scipy.stats
 import numpy
 
@@ -61,7 +62,7 @@ def get_area_and_mult(conds, x_in, timestep_in):
         if "mask_strength" in conds:
             mask_strength = conds["mask_strength"]
         mask = conds['mask']
-        assert (mask.shape[1:] == x_in.shape[2:])
+        # assert (mask.shape[1:] == x_in.shape[2:])
 
         mask = mask[:input_x.shape[0]]
         if area is not None:
@@ -69,7 +70,7 @@ def get_area_and_mult(conds, x_in, timestep_in):
                 mask = mask.narrow(i + 1, area[len(dims) + i], area[i])
 
         mask = mask * mask_strength
-        mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1)
+        mask = mask.unsqueeze(1).repeat((input_x.shape[0] // mask.shape[0], input_x.shape[1]) + (1, ) * (mask.ndim - 1))
     else:
         mask = torch.ones_like(input_x)
     mult = mask * strength
@@ -553,7 +554,10 @@ def resolve_areas_and_cond_masks_multidim(conditions, dims, device):
             if len(mask.shape) == len(dims):
                 mask = mask.unsqueeze(0)
             if mask.shape[1:] != dims:
-                mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=dims, mode='bilinear', align_corners=False).squeeze(1)
+                if mask.ndim < 4:
+                    mask = comfy.utils.common_upscale(mask.unsqueeze(1), dims[-1], dims[-2], 'bilinear', 'none').squeeze(1)
+                else:
+                    mask = comfy.utils.common_upscale(mask, dims[-1], dims[-2], 'bilinear', 'none')
 
             if modified.get("set_area_to_bounds", False): #TODO: handle dim != 2
                 bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0)

From 59eddda90030b61f172e155bc1e2526a51a27dff Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 22 Aug 2025 22:36:44 -0700
Subject: [PATCH 03/12] Python 3.13 is well supported. (#9511)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 79a8a8c79..99a50571b 100644
--- a/README.md
+++ b/README.md
@@ -192,7 +192,7 @@ comfy install
 
 ## Manual Install (Windows, Linux)
 
-python 3.13 is supported but using 3.12 is recommended because some custom nodes and their dependencies might not support it yet.
+Python 3.13 is very well supported. If you have trouble with some custom node dependencies you can try 3.12
 
 Git clone this repo.
 

From 8be0d22ab76a3d548c9c376fd816b39d4c028c12 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sat, 23 Aug 2025 10:56:17 -0700
Subject: [PATCH 04/12] Don't use the annoying new navigation mode by default.
 (#9518)

---
 app/app_settings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/app_settings.py b/app/app_settings.py
index c7ac73bf6..eb69133a3 100644
--- a/app/app_settings.py
+++ b/app/app_settings.py
@@ -25,7 +25,7 @@ class AppSettings():
                 logging.error(f"The user settings file is corrupted: {file}")
                 return {}
         else:
-            return {}
+            return {"Comfy.Canvas.NavigationMode": "legacy"}
 
     def save_settings(self, request, settings):
         file = self.user_manager.get_request_user_filepath(

From 3e316c6338503a535801db3ddac9572a38a607ef Mon Sep 17 00:00:00 2001
From: Christian Byrne <cbyrne@comfy.org>
Date: Sat, 23 Aug 2025 14:54:01 -0700
Subject: [PATCH 05/12] Update frontend to v1.25.10 and revert navigation mode
 override (#9522)

- Update comfyui-frontend-package from 1.25.9 to 1.25.10
- Revert forced legacy navigation mode from PR #9518
- Frontend v1.25.10 includes proper navigation mode fixes and improved display text
---
 app/app_settings.py | 2 +-
 requirements.txt    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/app_settings.py b/app/app_settings.py
index eb69133a3..c7ac73bf6 100644
--- a/app/app_settings.py
+++ b/app/app_settings.py
@@ -25,7 +25,7 @@ class AppSettings():
                 logging.error(f"The user settings file is corrupted: {file}")
                 return {}
         else:
-            return {"Comfy.Canvas.NavigationMode": "legacy"}
+            return {}
 
     def save_settings(self, request, settings):
         file = self.user_manager.get_request_user_filepath(
diff --git a/requirements.txt b/requirements.txt
index 6b53fabc1..131484ce8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.25.9
+comfyui-frontend-package==1.25.10
 comfyui-workflow-templates==0.1.65
 comfyui-embedded-docs==0.2.6
 torch

From 71ed4a399ec76a75aa2870b772d2022e4b9a69a3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sat, 23 Aug 2025 18:57:09 -0400
Subject: [PATCH 06/12] ComfyUI version 0.3.52

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 65f06cf37..834c3e8c2 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.51"
+__version__ = "0.3.52"
diff --git a/pyproject.toml b/pyproject.toml
index ecbf04303..f6e765a81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.51"
+version = "0.3.52"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From 95ac7794b7c735de8e5426442507d08edd29bec5 Mon Sep 17 00:00:00 2001
From: blepping <157360029+blepping@users.noreply.github.com>
Date: Sun, 24 Aug 2025 13:29:49 -0600
Subject: [PATCH 07/12] Fix EasyCache/LazyCache crash when tensor
 shape/dtype/device changes during sampling (#9528)

* Fix EasyCache/LazyCache crash when tensor shape/dtype/device changes during sampling

* Fix missing LazyCache check_metadata method
Ensure LazyCache reset method resets all the tensor state values
---
 comfy_extras/nodes_easycache.py | 34 +++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/comfy_extras/nodes_easycache.py b/comfy_extras/nodes_easycache.py
index e2b2efcd9..9d2988f5f 100644
--- a/comfy_extras/nodes_easycache.py
+++ b/comfy_extras/nodes_easycache.py
@@ -28,6 +28,7 @@ def easycache_forward_wrapper(executor, *args, **kwargs):
     input_change = None
     do_easycache = easycache.should_do_easycache(sigmas)
     if do_easycache:
+        easycache.check_metadata(x)
         # if first cond marked this step for skipping, skip it and use appropriate cached values
         if easycache.skip_current_step:
             if easycache.verbose:
@@ -92,6 +93,7 @@ def lazycache_predict_noise_wrapper(executor, *args, **kwargs):
     input_change = None
     do_easycache = easycache.should_do_easycache(timestep)
     if do_easycache:
+        easycache.check_metadata(x)
         if easycache.has_x_prev_subsampled():
             if easycache.has_x_prev_subsampled():
                 input_change = (easycache.subsample(x, clone=False) - easycache.x_prev_subsampled).flatten().abs().mean()
@@ -194,6 +196,7 @@ class EasyCacheHolder:
         # how to deal with mismatched dims
         self.allow_mismatch = True
         self.cut_from_start = True
+        self.state_metadata = None
 
     def is_past_end_timestep(self, timestep: float) -> bool:
         return not (timestep[0] > self.end_t).item()
@@ -283,6 +286,17 @@ class EasyCacheHolder:
     def has_first_cond_uuid(self, uuids: list[UUID]) -> bool:
         return self.first_cond_uuid in uuids
 
+    def check_metadata(self, x: torch.Tensor) -> bool:
+        metadata = (x.device, x.dtype, x.shape[1:])
+        if self.state_metadata is None:
+            self.state_metadata = metadata
+            return True
+        if metadata == self.state_metadata:
+            return True
+        logging.warn(f"{self.name} - Tensor shape, dtype or device changed, resetting state")
+        self.reset()
+        return False
+
     def reset(self):
         self.relative_transformation_rate = 0.0
         self.cumulative_change_rate = 0.0
@@ -299,6 +313,7 @@ class EasyCacheHolder:
         del self.uuid_cache_diffs
         self.uuid_cache_diffs = {}
         self.total_steps_skipped = 0
+        self.state_metadata = None
         return self
 
     def clone(self):
@@ -360,6 +375,7 @@ class LazyCacheHolder:
         self.output_change_rates = []
         self.approx_output_change_rates = []
         self.total_steps_skipped = 0
+        self.state_metadata = None
 
     def has_cache_diff(self) -> bool:
         return self.cache_diff is not None
@@ -404,6 +420,17 @@ class LazyCacheHolder:
     def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor):
         self.cache_diff = output - x
 
+    def check_metadata(self, x: torch.Tensor) -> bool:
+        metadata = (x.device, x.dtype, x.shape)
+        if self.state_metadata is None:
+            self.state_metadata = metadata
+            return True
+        if metadata == self.state_metadata:
+            return True
+        logging.warn(f"{self.name} - Tensor shape, dtype or device changed, resetting state")
+        self.reset()
+        return False
+
     def reset(self):
         self.relative_transformation_rate = 0.0
         self.cumulative_change_rate = 0.0
@@ -412,7 +439,14 @@ class LazyCacheHolder:
         self.approx_output_change_rates = []
         del self.cache_diff
         self.cache_diff = None
+        del self.x_prev_subsampled
+        self.x_prev_subsampled = None
+        del self.output_prev_subsampled
+        self.output_prev_subsampled = None
+        del self.output_prev_norm
+        self.output_prev_norm = None
         self.total_steps_skipped = 0
+        self.state_metadata = None
         return self
 
     def clone(self):

From f6b93d41a03081fad3c1a01221eac9c42d6790df Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 24 Aug 2025 12:40:32 -0700
Subject: [PATCH 08/12] Remove models from readme that are not fully
 implemented. (#9535)

Cosmos model implementations are currently missing the safety part so it is technically not fully implemented and should not be advertised as such.
---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 99a50571b..8024870c2 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,6 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
    - [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
    - [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
    - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
-   - [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
    - [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
 - Image Editing Models
    - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
@@ -77,7 +76,6 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
    - [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
    - [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
    - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
-   - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) and [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
    - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
    - [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
 - Audio Models

From e633a47ad1b875e52758be27ec34cb8907ebe1fb Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 25 Aug 2025 17:13:54 -0700
Subject: [PATCH 09/12] Add models/audio_encoders directory. (#9548)

---
 folder_paths.py                                     | 2 ++
 models/audio_encoders/put_audio_encoder_models_here | 0
 2 files changed, 2 insertions(+)
 create mode 100644 models/audio_encoders/put_audio_encoder_models_here

diff --git a/folder_paths.py b/folder_paths.py
index b34af39e8..f110d832b 100644
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -48,6 +48,8 @@ folder_names_and_paths["classifiers"] = ([os.path.join(models_dir, "classifiers"
 
 folder_names_and_paths["model_patches"] = ([os.path.join(models_dir, "model_patches")], supported_pt_extensions)
 
+folder_names_and_paths["audio_encoders"] = ([os.path.join(models_dir, "audio_encoders")], supported_pt_extensions)
+
 output_directory = os.path.join(base_path, "output")
 temp_directory = os.path.join(base_path, "temp")
 input_directory = os.path.join(base_path, "input")
diff --git a/models/audio_encoders/put_audio_encoder_models_here b/models/audio_encoders/put_audio_encoder_models_here
new file mode 100644
index 000000000..e69de29bb

From 914c2a29731be9c082f773c4b95892f553ac5ae8 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 25 Aug 2025 20:26:47 -0700
Subject: [PATCH 10/12] Implement wav2vec2 as an audio encoder model. (#9549)

This is useless on its own but there are multiple models that use it.
---
 comfy/audio_encoders/audio_encoders.py |  42 +++++
 comfy/audio_encoders/wav2vec2.py       | 207 +++++++++++++++++++++++++
 comfy_api/latest/_io.py                |   8 +
 comfy_extras/nodes_audio_encoder.py    |  44 ++++++
 nodes.py                               |   1 +
 5 files changed, 302 insertions(+)
 create mode 100644 comfy/audio_encoders/audio_encoders.py
 create mode 100644 comfy/audio_encoders/wav2vec2.py
 create mode 100644 comfy_extras/nodes_audio_encoder.py

diff --git a/comfy/audio_encoders/audio_encoders.py b/comfy/audio_encoders/audio_encoders.py
new file mode 100644
index 000000000..538c21bd5
--- /dev/null
+++ b/comfy/audio_encoders/audio_encoders.py
@@ -0,0 +1,42 @@
+from .wav2vec2 import Wav2Vec2Model
+import comfy.model_management
+import comfy.ops
+import comfy.utils
+import logging
+import torchaudio
+
+
+class AudioEncoderModel():
+    def __init__(self, config):
+        self.load_device = comfy.model_management.text_encoder_device()
+        offload_device = comfy.model_management.text_encoder_offload_device()
+        self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
+        self.model = Wav2Vec2Model(dtype=self.dtype, device=offload_device, operations=comfy.ops.manual_cast)
+        self.model.eval()
+        self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.model_sample_rate = 16000
+
+    def load_sd(self, sd):
+        return self.model.load_state_dict(sd, strict=False)
+
+    def get_sd(self):
+        return self.model.state_dict()
+
+    def encode_audio(self, audio, sample_rate):
+        comfy.model_management.load_model_gpu(self.patcher)
+        audio = torchaudio.functional.resample(audio, sample_rate, self.model_sample_rate)
+        out, all_layers = self.model(audio.to(self.load_device))
+        outputs = {}
+        outputs["encoded_audio"] = out
+        outputs["encoded_audio_all_layers"] = all_layers
+        return outputs
+
+
+def load_audio_encoder_from_sd(sd, prefix=""):
+    audio_encoder = AudioEncoderModel(None)
+    sd = comfy.utils.state_dict_prefix_replace(sd, {"wav2vec2.": ""})
+    m, u = audio_encoder.load_sd(sd)
+    if len(m) > 0:
+        logging.warning("missing audio encoder: {}".format(m))
+
+    return audio_encoder
diff --git a/comfy/audio_encoders/wav2vec2.py b/comfy/audio_encoders/wav2vec2.py
new file mode 100644
index 000000000..de906622a
--- /dev/null
+++ b/comfy/audio_encoders/wav2vec2.py
@@ -0,0 +1,207 @@
+import torch
+import torch.nn as nn
+from comfy.ldm.modules.attention import optimized_attention_masked
+
+
+class LayerNormConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, bias=False, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.conv = operations.Conv1d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, bias=bias, device=device, dtype=dtype)
+        self.layer_norm = operations.LayerNorm(out_channels, elementwise_affine=True, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.conv(x)
+        return torch.nn.functional.gelu(self.layer_norm(x.transpose(-2, -1)).transpose(-2, -1))
+
+
+class ConvFeatureEncoder(nn.Module):
+    def __init__(self, conv_dim, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.conv_layers = nn.ModuleList([
+            LayerNormConv(1, conv_dim, kernel_size=10, stride=5, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=3, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=2, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+            LayerNormConv(conv_dim, conv_dim, kernel_size=2, stride=2, bias=True, device=device, dtype=dtype, operations=operations),
+        ])
+
+    def forward(self, x):
+        x = x.unsqueeze(1)
+
+        for conv in self.conv_layers:
+            x = conv(x)
+
+        return x.transpose(1, 2)
+
+
+class FeatureProjection(nn.Module):
+    def __init__(self, conv_dim, embed_dim, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.layer_norm = operations.LayerNorm(conv_dim, eps=1e-05, device=device, dtype=dtype)
+        self.projection = operations.Linear(conv_dim, embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.layer_norm(x)
+        x = self.projection(x)
+        return x
+
+
+class PositionalConvEmbedding(nn.Module):
+    def __init__(self, embed_dim=768, kernel_size=128, groups=16):
+        super().__init__()
+        self.conv = nn.Conv1d(
+            embed_dim,
+            embed_dim,
+            kernel_size=kernel_size,
+            padding=kernel_size // 2,
+            groups=groups,
+        )
+        self.conv = torch.nn.utils.parametrizations.weight_norm(self.conv, name="weight", dim=2)
+        self.activation = nn.GELU()
+
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = self.conv(x)[:, :, :-1]
+        x = self.activation(x)
+        x = x.transpose(1, 2)
+        return x
+
+
+class TransformerEncoder(nn.Module):
+    def __init__(
+        self,
+        embed_dim=768,
+        num_heads=12,
+        num_layers=12,
+        mlp_ratio=4.0,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        self.pos_conv_embed = PositionalConvEmbedding(embed_dim=embed_dim)
+        self.layers = nn.ModuleList([
+            TransformerEncoderLayer(
+                embed_dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                device=device, dtype=dtype, operations=operations
+            )
+            for _ in range(num_layers)
+        ])
+
+        self.layer_norm = operations.LayerNorm(embed_dim, eps=1e-05, device=device, dtype=dtype)
+
+    def forward(self, x, mask=None):
+        x = x + self.pos_conv_embed(x)
+        all_x = ()
+        for layer in self.layers:
+            all_x += (x,)
+            x = layer(x, mask)
+        x = self.layer_norm(x)
+        all_x += (x,)
+        return x, all_x
+
+
+class Attention(nn.Module):
+    def __init__(self, embed_dim, num_heads, bias=True, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+
+        self.k_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.v_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.q_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+        self.out_proj = operations.Linear(embed_dim, embed_dim, bias=bias, device=device, dtype=dtype)
+
+    def forward(self, x, mask=None):
+        assert (mask is None)  # TODO?
+        q = self.q_proj(x)
+        k = self.k_proj(x)
+        v = self.v_proj(x)
+
+        out = optimized_attention_masked(q, k, v, self.num_heads)
+        return self.out_proj(out)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, embed_dim, mlp_ratio, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.intermediate_dense = operations.Linear(embed_dim, int(embed_dim * mlp_ratio), device=device, dtype=dtype)
+        self.output_dense = operations.Linear(int(embed_dim * mlp_ratio), embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x):
+        x = self.intermediate_dense(x)
+        x = torch.nn.functional.gelu(x)
+        x = self.output_dense(x)
+        return x
+
+
+class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        embed_dim=768,
+        num_heads=12,
+        mlp_ratio=4.0,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        self.attention = Attention(embed_dim, num_heads, device=device, dtype=dtype, operations=operations)
+
+        self.layer_norm = operations.LayerNorm(embed_dim, device=device, dtype=dtype)
+        self.feed_forward = FeedForward(embed_dim, mlp_ratio, device=device, dtype=dtype, operations=operations)
+        self.final_layer_norm = operations.LayerNorm(embed_dim, device=device, dtype=dtype)
+
+    def forward(self, x, mask=None):
+        residual = x
+        x = self.layer_norm(x)
+        x = self.attention(x, mask=mask)
+        x = residual + x
+
+        x = x + self.feed_forward(self.final_layer_norm(x))
+        return x
+
+
+class Wav2Vec2Model(nn.Module):
+    """Complete Wav2Vec 2.0 model."""
+
+    def __init__(
+        self,
+        embed_dim=1024,
+        final_dim=256,
+        num_heads=16,
+        num_layers=24,
+        dtype=None, device=None, operations=None
+    ):
+        super().__init__()
+
+        conv_dim = 512
+        self.feature_extractor = ConvFeatureEncoder(conv_dim, device=device, dtype=dtype, operations=operations)
+        self.feature_projection = FeatureProjection(conv_dim, embed_dim, device=device, dtype=dtype, operations=operations)
+
+        self.masked_spec_embed = nn.Parameter(torch.empty(embed_dim, device=device, dtype=dtype))
+
+        self.encoder = TransformerEncoder(
+            embed_dim=embed_dim,
+            num_heads=num_heads,
+            num_layers=num_layers,
+            device=device, dtype=dtype, operations=operations
+        )
+
+    def forward(self, x, mask_time_indices=None, return_dict=False):
+
+        x = torch.mean(x, dim=1)
+
+        x = (x - x.mean()) / torch.sqrt(x.var() + 1e-7)
+
+        features = self.feature_extractor(x)
+        features = self.feature_projection(features)
+
+        batch_size, seq_len, _ = features.shape
+
+        x, all_x = self.encoder(features)
+
+        return x, all_x
diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
index a3a21facc..5cb474459 100644
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -730,6 +730,14 @@ class AnyType(ComfyTypeIO):
 class MODEL_PATCH(ComfyTypeIO):
     Type = Any
 
+@comfytype(io_type="AUDIO_ENCODER")
+class AUDIO_ENCODER(ComfyTypeIO):
+    Type = Any
+
+@comfytype(io_type="AUDIO_ENCODER_OUTPUT")
+class AUDIO_ENCODER_OUTPUT(ComfyTypeIO):
+    Type = Any
+
 @comfytype(io_type="COMFY_MULTITYPED_V3")
 class MultiType:
     Type = Any
diff --git a/comfy_extras/nodes_audio_encoder.py b/comfy_extras/nodes_audio_encoder.py
new file mode 100644
index 000000000..39a140fef
--- /dev/null
+++ b/comfy_extras/nodes_audio_encoder.py
@@ -0,0 +1,44 @@
+import folder_paths
+import comfy.audio_encoders.audio_encoders
+import comfy.utils
+
+
+class AudioEncoderLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "audio_encoder_name": (folder_paths.get_filename_list("audio_encoders"), ),
+                             }}
+    RETURN_TYPES = ("AUDIO_ENCODER",)
+    FUNCTION = "load_model"
+
+    CATEGORY = "loaders"
+
+    def load_model(self, audio_encoder_name):
+        audio_encoder_name = folder_paths.get_full_path_or_raise("audio_encoders", audio_encoder_name)
+        sd = comfy.utils.load_torch_file(audio_encoder_name, safe_load=True)
+        audio_encoder = comfy.audio_encoders.audio_encoders.load_audio_encoder_from_sd(sd)
+        if audio_encoder is None:
+            raise RuntimeError("ERROR: audio encoder file is invalid and does not contain a valid model.")
+        return (audio_encoder,)
+
+
+class AudioEncoderEncode:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "audio_encoder": ("AUDIO_ENCODER",),
+                              "audio": ("AUDIO",),
+                             }}
+    RETURN_TYPES = ("AUDIO_ENCODER_OUTPUT",)
+    FUNCTION = "encode"
+
+    CATEGORY = "conditioning"
+
+    def encode(self, audio_encoder, audio):
+        output = audio_encoder.encode_audio(audio["waveform"], audio["sample_rate"])
+        return (output,)
+
+
+NODE_CLASS_MAPPINGS = {
+    "AudioEncoderLoader": AudioEncoderLoader,
+    "AudioEncoderEncode": AudioEncoderEncode,
+}
diff --git a/nodes.py b/nodes.py
index 723ce3384..0aff6b14a 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2324,6 +2324,7 @@ async def init_builtin_extra_nodes():
         "nodes_qwen.py",
         "nodes_model_patch.py",
         "nodes_easycache.py",
+        "nodes_audio_encoder.py",
     ]
 
     import_failed = []

From 39aa06bd5d630e50c88d3be1586d21737c4387c1 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 26 Aug 2025 09:50:46 -0700
Subject: [PATCH 11/12] Make AudioEncoderOutput usable in v3 node schema.
 (#9554)

---
 comfy_api/latest/_io.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
index 5cb474459..e0ee943a7 100644
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -731,11 +731,11 @@ class MODEL_PATCH(ComfyTypeIO):
     Type = Any
 
 @comfytype(io_type="AUDIO_ENCODER")
-class AUDIO_ENCODER(ComfyTypeIO):
+class AudioEncoder(ComfyTypeIO):
     Type = Any
 
 @comfytype(io_type="AUDIO_ENCODER_OUTPUT")
-class AUDIO_ENCODER_OUTPUT(ComfyTypeIO):
+class AudioEncoderOutput(ComfyTypeIO):
     Type = Any
 
 @comfytype(io_type="COMFY_MULTITYPED_V3")
@@ -1592,6 +1592,7 @@ class _IO:
     Model = Model
     ClipVision = ClipVision
     ClipVisionOutput = ClipVisionOutput
+    AudioEncoderOutput = AudioEncoderOutput
     StyleModel = StyleModel
     Gligen = Gligen
     UpscaleModel = UpscaleModel

From 5352abc6d389570455776c457738db54367cd6cb Mon Sep 17 00:00:00 2001
From: ComfyUI Wiki <contact@comfyui-wiki.com>
Date: Wed, 27 Aug 2025 01:33:54 +0800
Subject: [PATCH 12/12] Update template to 0.1.66 (#9557)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 131484ce8..db59bb38c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.25.10
-comfyui-workflow-templates==0.1.65
+comfyui-workflow-templates==0.1.66
 comfyui-embedded-docs==0.2.6
 torch
 torchsde