From d68ece7301c63da11e0b565da0ecc2900c8ea447 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Sun, 12 Oct 2025 20:54:41 -0700
Subject: [PATCH 1/5] Update the extra_model_paths.yaml.example (#10319)

---
 extra_model_paths.yaml.example | 43 ++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/extra_model_paths.yaml.example b/extra_model_paths.yaml.example
index 8d576e51d..34df01681 100644
--- a/extra_model_paths.yaml.example
+++ b/extra_model_paths.yaml.example
@@ -1,25 +1,5 @@
 #Rename this to extra_model_paths.yaml and ComfyUI will load it
 
-
-#config for a1111 ui
-#all you have to do is change the base_path to where yours is installed
-a111:
-    base_path: path/to/stable-diffusion-webui/
-
-    checkpoints: models/Stable-diffusion
-    configs: models/Stable-diffusion
-    vae: models/VAE
-    loras: |
-         models/Lora
-         models/LyCORIS
-    upscale_models: |
-                  models/ESRGAN
-                  models/RealESRGAN
-                  models/SwinIR
-    embeddings: embeddings
-    hypernetworks: models/hypernetworks
-    controlnet: models/ControlNet
-
 #config for comfyui
 #your base path should be either an existing comfy install or a central folder where you store all of your models, loras, etc.
 
@@ -41,6 +21,29 @@ a111:
 #     loras: models/loras/
 #     upscale_models: models/upscale_models/
 #     vae: models/vae/
+#     audio_encoders: models/audio_encoders/
+#     model_patches: models/model_patches/
+
+
+#config for a1111 ui
+#all you have to do is uncomment this (remove the #) and change the base_path to where yours is installed
+
+#a111:
+#     base_path: path/to/stable-diffusion-webui/
+#     checkpoints: models/Stable-diffusion
+#     configs: models/Stable-diffusion
+#     vae: models/VAE
+#     loras: |
+#          models/Lora
+#          models/LyCORIS
+#     upscale_models: |
+#                   models/ESRGAN
+#                   models/RealESRGAN
+#                   models/SwinIR
+#     embeddings: embeddings
+#     hypernetworks: models/hypernetworks
+#     controlnet: models/ControlNet
+
 
 # For a full list of supported keys (style_models, vae_approx, hypernetworks, photomaker,
 # model_patches, audio_encoders, classifiers, etc.) see folder_paths.py.

From e693e4db6a2df8482599eed348be15f87799b910 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 13 Oct 2025 11:57:27 -0700
Subject: [PATCH 2/5] Always set diffusion model to eval() mode. (#10331)

---
 comfy/model_base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/comfy/model_base.py b/comfy/model_base.py
index b0b9cde7d..8274c7dea 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -138,6 +138,7 @@ class BaseModel(torch.nn.Module):
             else:
                 operations = model_config.custom_operations
             self.diffusion_model = unet_model(**unet_config, device=device, operations=operations)
+            self.diffusion_model.eval()
             if comfy.model_management.force_channels_last():
                 self.diffusion_model.to(memory_format=torch.channels_last)
                 logging.debug("using channels last mode for diffusion model")
@@ -669,7 +670,6 @@ class Lotus(BaseModel):
 class StableCascade_C(BaseModel):
     def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=StageC)
-        self.diffusion_model.eval().requires_grad_(False)
 
     def extra_conds(self, **kwargs):
         out = {}
@@ -698,7 +698,6 @@ class StableCascade_C(BaseModel):
 class StableCascade_B(BaseModel):
     def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=StageB)
-        self.diffusion_model.eval().requires_grad_(False)
 
     def extra_conds(self, **kwargs):
         out = {}

From 27ffd12c45d4237338fe8789779313db9bab59f1 Mon Sep 17 00:00:00 2001
From: Daniel Harte <norgeous@users.noreply.github.com>
Date: Mon, 13 Oct 2025 20:14:52 +0100
Subject: [PATCH 3/5] add indent=4 kwarg to json.dumps() (#10307)

---
 comfy_extras/nodes_preview_any.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_preview_any.py b/comfy_extras/nodes_preview_any.py
index e6805696f..e749fa6ae 100644
--- a/comfy_extras/nodes_preview_any.py
+++ b/comfy_extras/nodes_preview_any.py
@@ -25,7 +25,7 @@ class PreviewAny():
             value = str(source)
         elif source is not None:
             try:
-                value = json.dumps(source)
+                value = json.dumps(source, indent=4)
             except Exception:
                 try:
                     value = str(source)

From 95ca2e56c82c1c714dba685bd81ebf3f7baf8efa Mon Sep 17 00:00:00 2001
From: rattus128 <46076784+rattus128@users.noreply.github.com>
Date: Tue, 14 Oct 2025 05:23:11 +1000
Subject: [PATCH 4/5] WAN2.2: Fix cache VRAM leak on error (#10308)

Same change pattern as 7e8dd275c243ad460ed5015d2e13611d81d2a569
applied to WAN2.2

If this suffers an exception (such as a VRAM oom) it will leave the
encode() and decode() methods which skips the cleanup of the WAN
feature cache. The comfy node cache then ultimately keeps a reference
this object which is in turn reffing large tensors from the failed
execution.

The feature cache is currently setup at a class variable on the
encoder/decoder however, the encode and decode functions always clear
it on both entry and exit of normal execution.

Its likely the design intent is this is usable as a streaming encoder
where the input comes in batches, however the functions as they are
today don't support that.

So simplify by bringing the cache back to local variable, so that if
it does VRAM OOM the cache itself is properly garbage when the
encode()/decode() functions dissappear from the stack.
---
 comfy/ldm/wan/vae2_2.py | 37 ++++++++++++++-----------------------
 1 file changed, 14 insertions(+), 23 deletions(-)

diff --git a/comfy/ldm/wan/vae2_2.py b/comfy/ldm/wan/vae2_2.py
index 1f6d584a2..8e1593a54 100644
--- a/comfy/ldm/wan/vae2_2.py
+++ b/comfy/ldm/wan/vae2_2.py
@@ -657,51 +657,51 @@ class WanVAE(nn.Module):
         )
 
     def encode(self, x):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.encoder)
         x = patchify(x, patch_size=2)
         t = x.shape[2]
         iter_ = 1 + (t - 1) // 4
         for i in range(iter_):
-            self._enc_conv_idx = [0]
+            conv_idx = [0]
             if i == 0:
                 out = self.encoder(
                     x[:, :, :1, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                 )
             else:
                 out_ = self.encoder(
                     x[:, :, 1 + 4 * (i - 1):1 + 4 * i, :, :],
-                    feat_cache=self._enc_feat_map,
-                    feat_idx=self._enc_conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                 )
                 out = torch.cat([out, out_], 2)
         mu, log_var = self.conv1(out).chunk(2, dim=1)
-        self.clear_cache()
         return mu
 
     def decode(self, z):
-        self.clear_cache()
+        conv_idx = [0]
+        feat_map = [None] * count_conv3d(self.decoder)
         iter_ = z.shape[2]
         x = self.conv2(z)
         for i in range(iter_):
-            self._conv_idx = [0]
+            conv_idx = [0]
             if i == 0:
                 out = self.decoder(
                     x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                     first_chunk=True,
                 )
             else:
                 out_ = self.decoder(
                     x[:, :, i:i + 1, :, :],
-                    feat_cache=self._feat_map,
-                    feat_idx=self._conv_idx,
+                    feat_cache=feat_map,
+                    feat_idx=conv_idx,
                 )
                 out = torch.cat([out, out_], 2)
         out = unpatchify(out, patch_size=2)
-        self.clear_cache()
         return out
 
     def reparameterize(self, mu, log_var):
@@ -715,12 +715,3 @@ class WanVAE(nn.Module):
             return mu
         std = torch.exp(0.5 * log_var.clamp(-30.0, 20.0))
         return mu + std * torch.randn_like(std)
-
-    def clear_cache(self):
-        self._conv_num = count_conv3d(self.decoder)
-        self._conv_idx = [0]
-        self._feat_map = [None] * self._conv_num
-        # cache encode
-        self._enc_conv_num = count_conv3d(self.encoder)
-        self._enc_conv_idx = [0]
-        self._enc_feat_map = [None] * self._enc_conv_num

From 3dfdcf66b643b6c191743d3b30fd8198ce690f2d Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Mon, 13 Oct 2025 22:36:26 +0300
Subject: [PATCH 5/5] convert nodes_hunyuan.py to V3 schema (#10136)

---
 comfy_extras/nodes_hunyuan.py | 241 +++++++++++++++++++++-------------
 1 file changed, 150 insertions(+), 91 deletions(-)

diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py
index db398cdf1..f7c34d059 100644
--- a/comfy_extras/nodes_hunyuan.py
+++ b/comfy_extras/nodes_hunyuan.py
@@ -2,42 +2,60 @@ import nodes
 import node_helpers
 import torch
 import comfy.model_management
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
 
 
-class CLIPTextEncodeHunyuanDiT:
+class CLIPTextEncodeHunyuanDiT(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "bert": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "mt5xl": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CLIPTextEncodeHunyuanDiT",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("bert", multiline=True, dynamic_prompts=True),
+                io.String.Input("mt5xl", multiline=True, dynamic_prompts=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
 
-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, bert, mt5xl):
+    @classmethod
+    def execute(cls, clip, bert, mt5xl) -> io.NodeOutput:
         tokens = clip.tokenize(bert)
         tokens["mt5xl"] = clip.tokenize(mt5xl)["mt5xl"]
 
-        return (clip.encode_from_tokens_scheduled(tokens), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
 
-class EmptyHunyuanLatentVideo:
+    encode = execute  # TODO: remove
+
+
+class EmptyHunyuanLatentVideo(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                              "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                              "length": ("INT", {"default": 25, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyHunyuanLatentVideo",
+            category="latent/video",
+            inputs=[
+                io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=25, min=1, max=nodes.MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )
 
-    CATEGORY = "latent/video"
-
-    def generate(self, width, height, length, batch_size=1):
+    @classmethod
+    def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
         latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        return ({"samples":latent}, )
+        return io.NodeOutput({"samples":latent})
+
+    generate = execute  # TODO: remove
+
 
 PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
     "<|start_header_id|>system<|end_header_id|>\n\n<image>\nDescribe the video by detailing the following aspects according to the reference image: "
@@ -50,45 +68,61 @@ PROMPT_TEMPLATE_ENCODE_VIDEO_I2V = (
     "<|start_header_id|>assistant<|end_header_id|>\n\n"
 )
 
-class TextEncodeHunyuanVideo_ImageToVideo:
+class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "clip": ("CLIP", ),
-            "clip_vision_output": ("CLIP_VISION_OUTPUT", ),
-            "prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
-            "image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}),
-            }}
-    RETURN_TYPES = ("CONDITIONING",)
-    FUNCTION = "encode"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TextEncodeHunyuanVideo_ImageToVideo",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.ClipVisionOutput.Input("clip_vision_output"),
+                io.String.Input("prompt", multiline=True, dynamic_prompts=True),
+                io.Int.Input(
+                    "image_interleave",
+                    default=2,
+                    min=1,
+                    max=512,
+                    tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.",
+                ),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
 
-    CATEGORY = "advanced/conditioning"
-
-    def encode(self, clip, clip_vision_output, prompt, image_interleave):
+    @classmethod
+    def execute(cls, clip, clip_vision_output, prompt, image_interleave) -> io.NodeOutput:
         tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave)
-        return (clip.encode_from_tokens_scheduled(tokens), )
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
 
-class HunyuanImageToVideo:
+    encode = execute  # TODO: remove
+
+
+class HunyuanImageToVideo(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "vae": ("VAE", ),
-                             "width": ("INT", {"default": 848, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}),
-                             "length": ("INT", {"default": 53, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}),
-                             "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
-                             "guidance_type": (["v1 (concat)", "v2 (replace)", "custom"], )
-                },
-                "optional": {"start_image": ("IMAGE", ),
-                }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanImageToVideo",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Vae.Input("vae"),
+                io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=53, min=1, max=nodes.MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Combo.Input("guidance_type", options=["v1 (concat)", "v2 (replace)", "custom"]),
+                io.Image.Input("start_image", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )
 
-    RETURN_TYPES = ("CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "latent")
-    FUNCTION = "encode"
-
-    CATEGORY = "conditioning/video_models"
-
-    def encode(self, positive, vae, width, height, length, batch_size, guidance_type, start_image=None):
+    @classmethod
+    def execute(cls, positive, vae, width, height, length, batch_size, guidance_type, start_image=None) -> io.NodeOutput:
         latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device())
         out_latent = {}
 
@@ -111,51 +145,76 @@ class HunyuanImageToVideo:
             positive = node_helpers.conditioning_set_values(positive, cond)
 
         out_latent["samples"] = latent
-        return (positive, out_latent)
+        return io.NodeOutput(positive, out_latent)
 
-class EmptyHunyuanImageLatent:
+    encode = execute  # TODO: remove
+
+
+class EmptyHunyuanImageLatent(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": { "width": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                              "height": ("INT", {"default": 2048, "min": 64, "max": nodes.MAX_RESOLUTION, "step": 32}),
-                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096})}}
-    RETURN_TYPES = ("LATENT",)
-    FUNCTION = "generate"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="EmptyHunyuanImageLatent",
+            category="latent",
+            inputs=[
+                io.Int.Input("width", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("height", default=2048, min=64, max=nodes.MAX_RESOLUTION, step=32),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ],
+        )
 
-    CATEGORY = "latent"
-
-    def generate(self, width, height, batch_size=1):
+    @classmethod
+    def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
         latent = torch.zeros([batch_size, 64, height // 32, width // 32], device=comfy.model_management.intermediate_device())
-        return ({"samples":latent}, )
+        return io.NodeOutput({"samples":latent})
 
-class HunyuanRefinerLatent:
+    generate = execute  # TODO: remove
+
+
+class HunyuanRefinerLatent(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {"positive": ("CONDITIONING", ),
-                             "negative": ("CONDITIONING", ),
-                             "latent": ("LATENT", ),
-                             "noise_augmentation": ("FLOAT", {"default": 0.10, "min": 0.0, "max": 1.0, "step": 0.01}),
-                             }}
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanRefinerLatent",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Latent.Input("latent"),
+                io.Float.Input("noise_augmentation", default=0.10, min=0.0, max=1.0, step=0.01),
 
-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
-    RETURN_NAMES = ("positive", "negative", "latent")
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )
 
-    FUNCTION = "execute"
-
-    def execute(self, positive, negative, latent, noise_augmentation):
+    @classmethod
+    def execute(cls, positive, negative, latent, noise_augmentation) -> io.NodeOutput:
         latent = latent["samples"]
         positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
         negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": latent, "noise_augmentation": noise_augmentation})
         out_latent = {}
         out_latent["samples"] = torch.zeros([latent.shape[0], 32, latent.shape[-3], latent.shape[-2], latent.shape[-1]], device=comfy.model_management.intermediate_device())
-        return (positive, negative, out_latent)
+        return io.NodeOutput(positive, negative, out_latent)
 
 
-NODE_CLASS_MAPPINGS = {
-    "CLIPTextEncodeHunyuanDiT": CLIPTextEncodeHunyuanDiT,
-    "TextEncodeHunyuanVideo_ImageToVideo": TextEncodeHunyuanVideo_ImageToVideo,
-    "EmptyHunyuanLatentVideo": EmptyHunyuanLatentVideo,
-    "HunyuanImageToVideo": HunyuanImageToVideo,
-    "EmptyHunyuanImageLatent": EmptyHunyuanImageLatent,
-    "HunyuanRefinerLatent": HunyuanRefinerLatent,
-}
+class HunyuanExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            CLIPTextEncodeHunyuanDiT,
+            TextEncodeHunyuanVideo_ImageToVideo,
+            EmptyHunyuanLatentVideo,
+            HunyuanImageToVideo,
+            EmptyHunyuanImageLatent,
+            HunyuanRefinerLatent,
+        ]
+
+
+async def comfy_entrypoint() -> HunyuanExtension:
+    return HunyuanExtension()