From af81cb962d9dd283ddb551962cc223b5a186a1ce Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 08:40:32 -0800
Subject: [PATCH 01/24] Add Flux 2 support to README. (#10882)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 28beec427..b9300ab07 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
    - [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
    - [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
    - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
+   - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/)
 - Image Editing Models
    - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
    - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)

From 828b1b9953175b6df79459f417d1032869d0b46a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 25 Nov 2025 12:40:58 -0500
Subject: [PATCH 02/24] ComfyUI version v0.3.72

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index b4655d553..dac038c26 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.71"
+__version__ = "0.3.72"
diff --git a/pyproject.toml b/pyproject.toml
index 280dbaf53..75df8fb7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.71"
+version = "0.3.72"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From dff996ca39d86265bbabf15e666484e051f0b3d5 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 11:30:24 -0800
Subject: [PATCH 03/24] Fix crash. (#10885)

---
 comfy/text_encoders/flux.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py
index 8dbbca16e..024504a5b 100644
--- a/comfy/text_encoders/flux.py
+++ b/comfy/text_encoders/flux.py
@@ -87,6 +87,7 @@ def load_mistral_tokenizer(data):
     vocab = {}
 
     max_vocab = mistral_vocab["config"]["default_vocab_size"]
+    max_vocab -= len(mistral_vocab["special_tokens"])
 
     for w in mistral_vocab["vocab"]:
         r = w["rank"]

From 18b79acba95d44b4ea00bbbfc1856bc71bd58841 Mon Sep 17 00:00:00 2001
From: ComfyUI Wiki <contact@comfyui-wiki.com>
Date: Wed, 26 Nov 2025 03:58:21 +0800
Subject: [PATCH 04/24] Update workflow templates to v0.7.20 (#10883)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b7014f956..5f20816d6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 comfyui-frontend-package==1.30.6
-comfyui-workflow-templates==0.7.9
+comfyui-workflow-templates==0.7.20
 comfyui-embedded-docs==0.3.1
 torch
 torchsde

From d196a905bb379a6d800d0c13f9b4fdea3965311a Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 11:58:39 -0800
Subject: [PATCH 05/24] Lower vram usage for flux 2 text encoder. (#10887)

---
 comfy/sd1_clip.py            |  7 ++++---
 comfy/text_encoders/flux.py  |  4 ++--
 comfy/text_encoders/llama.py | 12 +++++++++---
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
index 8f509bab1..0fc9ab3db 100644
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -90,7 +90,6 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
                  special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False,
                  return_projected_pooled=True, return_attention_masks=False, model_options={}):  # clip-vit-base-patch32
         super().__init__()
-        assert layer in self.LAYERS
 
         if textmodel_json_config is None:
             textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json")
@@ -164,7 +163,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
     def set_clip_options(self, options):
         layer_idx = options.get("layer", self.layer_idx)
         self.return_projected_pooled = options.get("projected_pooled", self.return_projected_pooled)
-        if self.layer == "all":
+        if isinstance(self.layer, list) or self.layer == "all":
             pass
         elif layer_idx is None or abs(layer_idx) > self.num_layers:
             self.layer = "last"
@@ -266,7 +265,9 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         if self.enable_attention_masks:
             attention_mask_model = attention_mask
 
-        if self.layer == "all":
+        if isinstance(self.layer, list):
+            intermediate_output = self.layer
+        elif self.layer == "all":
             intermediate_output = "all"
         else:
             intermediate_output = self.layer_idx
diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py
index 024504a5b..99f4812bb 100644
--- a/comfy/text_encoders/flux.py
+++ b/comfy/text_encoders/flux.py
@@ -138,7 +138,7 @@ class Flux2Tokenizer(sd1_clip.SD1Tokenizer):
         return tokens
 
 class Mistral3_24BModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
+    def __init__(self, device="cpu", layer=[10, 20, 30], layer_idx=None, dtype=None, attention_mask=True, model_options={}):
         textmodel_json_config = {}
         num_layers = model_options.get("num_layers", None)
         if num_layers is not None:
@@ -154,7 +154,7 @@ class Flux2TEModel(sd1_clip.SD1ClipModel):
     def encode_token_weights(self, token_weight_pairs):
         out, pooled, extra = super().encode_token_weights(token_weight_pairs)
 
-        out = torch.stack((out[:, 10], out[:, 20], out[:, 30]), dim=1)
+        out = torch.stack((out[:, 0], out[:, 1], out[:, 2]), dim=1)
         out = out.movedim(1, 2)
         out = out.reshape(out.shape[0], out.shape[1], -1)
         return out, pooled, extra
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index 749ff581b..d47ed27bc 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -434,8 +434,12 @@ class Llama2_(nn.Module):
 
         intermediate = None
         all_intermediate = None
+        only_layers = None
         if intermediate_output is not None:
-            if intermediate_output == "all":
+            if isinstance(intermediate_output, list):
+                all_intermediate = []
+                only_layers = set(intermediate_output)
+            elif intermediate_output == "all":
                 all_intermediate = []
                 intermediate_output = None
             elif intermediate_output < 0:
@@ -443,7 +447,8 @@ class Llama2_(nn.Module):
 
         for i, layer in enumerate(self.layers):
             if all_intermediate is not None:
-                all_intermediate.append(x.unsqueeze(1).clone())
+                if only_layers is None or (i in only_layers):
+                    all_intermediate.append(x.unsqueeze(1).clone())
             x = layer(
                 x=x,
                 attention_mask=mask,
@@ -457,7 +462,8 @@ class Llama2_(nn.Module):
             x = self.norm(x)
 
         if all_intermediate is not None:
-            all_intermediate.append(x.unsqueeze(1).clone())
+            if only_layers is None or ((i + 1) in only_layers):
+                all_intermediate.append(x.unsqueeze(1).clone())
 
         if all_intermediate is not None:
             intermediate = torch.cat(all_intermediate, dim=1)

From 0c18842acbdf546883b08808dd9feea7605d7649 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 25 Nov 2025 14:59:37 -0500
Subject: [PATCH 06/24] ComfyUI v0.3.73

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index dac038c26..f8818838e 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.72"
+__version__ = "0.3.73"
diff --git a/pyproject.toml b/pyproject.toml
index 75df8fb7c..7e4bac12d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.72"
+version = "0.3.73"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From e9aae31fa241a6a63a368800146ea91629d4e8c2 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:41:45 -0800
Subject: [PATCH 07/24] Z Image model. (#10892)

---
 comfy/ldm/lumina/model.py                   | 219 +++++++-------------
 comfy/ldm/modules/diffusionmodules/mmdit.py |   6 +-
 comfy/model_base.py                         |   4 +
 comfy/model_detection.py                    |  29 ++-
 comfy/sd.py                                 |   8 +
 comfy/text_encoders/llama.py                |  31 +++
 comfy/text_encoders/z_image.py              |  48 +++++
 7 files changed, 196 insertions(+), 149 deletions(-)
 create mode 100644 comfy/text_encoders/z_image.py

diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
index b4494a51d..c8643eb82 100644
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -11,6 +11,7 @@ import comfy.ldm.common_dit
 from comfy.ldm.modules.diffusionmodules.mmdit import TimestepEmbedder
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.ldm.flux.layers import EmbedND
+from comfy.ldm.flux.math import apply_rope
 import comfy.patcher_extension
 
 
@@ -31,6 +32,7 @@ class JointAttention(nn.Module):
         n_heads: int,
         n_kv_heads: Optional[int],
         qk_norm: bool,
+        out_bias: bool = False,
         operation_settings={},
     ):
         """
@@ -59,7 +61,7 @@ class JointAttention(nn.Module):
         self.out = operation_settings.get("operations").Linear(
             n_heads * self.head_dim,
             dim,
-            bias=False,
+            bias=out_bias,
             device=operation_settings.get("device"),
             dtype=operation_settings.get("dtype"),
         )
@@ -70,35 +72,6 @@ class JointAttention(nn.Module):
         else:
             self.q_norm = self.k_norm = nn.Identity()
 
-    @staticmethod
-    def apply_rotary_emb(
-        x_in: torch.Tensor,
-        freqs_cis: torch.Tensor,
-    ) -> torch.Tensor:
-        """
-        Apply rotary embeddings to input tensors using the given frequency
-        tensor.
-
-        This function applies rotary embeddings to the given query 'xq' and
-        key 'xk' tensors using the provided frequency tensor 'freqs_cis'. The
-        input tensors are reshaped as complex numbers, and the frequency tensor
-        is reshaped for broadcasting compatibility. The resulting tensors
-        contain rotary embeddings and are returned as real tensors.
-
-        Args:
-            x_in (torch.Tensor): Query or Key tensor to apply rotary embeddings.
-            freqs_cis (torch.Tensor): Precomputed frequency tensor for complex
-                exponentials.
-
-        Returns:
-            Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor
-                and key tensor with rotary embeddings.
-        """
-
-        t_ = x_in.reshape(*x_in.shape[:-1], -1, 1, 2)
-        t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1]
-        return t_out.reshape(*x_in.shape)
-
     def forward(
         self,
         x: torch.Tensor,
@@ -134,8 +107,7 @@ class JointAttention(nn.Module):
         xq = self.q_norm(xq)
         xk = self.k_norm(xk)
 
-        xq = JointAttention.apply_rotary_emb(xq, freqs_cis=freqs_cis)
-        xk = JointAttention.apply_rotary_emb(xk, freqs_cis=freqs_cis)
+        xq, xk = apply_rope(xq, xk, freqs_cis)
 
         n_rep = self.n_local_heads // self.n_local_kv_heads
         if n_rep >= 1:
@@ -215,6 +187,8 @@ class JointTransformerBlock(nn.Module):
         norm_eps: float,
         qk_norm: bool,
         modulation=True,
+        z_image_modulation=False,
+        attn_out_bias=False,
         operation_settings={},
     ) -> None:
         """
@@ -235,10 +209,10 @@ class JointTransformerBlock(nn.Module):
         super().__init__()
         self.dim = dim
         self.head_dim = dim // n_heads
-        self.attention = JointAttention(dim, n_heads, n_kv_heads, qk_norm, operation_settings=operation_settings)
+        self.attention = JointAttention(dim, n_heads, n_kv_heads, qk_norm, out_bias=attn_out_bias, operation_settings=operation_settings)
         self.feed_forward = FeedForward(
             dim=dim,
-            hidden_dim=4 * dim,
+            hidden_dim=dim,
             multiple_of=multiple_of,
             ffn_dim_multiplier=ffn_dim_multiplier,
             operation_settings=operation_settings,
@@ -252,16 +226,27 @@ class JointTransformerBlock(nn.Module):
 
         self.modulation = modulation
         if modulation:
-            self.adaLN_modulation = nn.Sequential(
-                nn.SiLU(),
-                operation_settings.get("operations").Linear(
-                    min(dim, 1024),
-                    4 * dim,
-                    bias=True,
-                    device=operation_settings.get("device"),
-                    dtype=operation_settings.get("dtype"),
-                ),
-            )
+            if z_image_modulation:
+                self.adaLN_modulation = nn.Sequential(
+                    operation_settings.get("operations").Linear(
+                        min(dim, 256),
+                        4 * dim,
+                        bias=True,
+                        device=operation_settings.get("device"),
+                        dtype=operation_settings.get("dtype"),
+                    ),
+                )
+            else:
+                self.adaLN_modulation = nn.Sequential(
+                    nn.SiLU(),
+                    operation_settings.get("operations").Linear(
+                        min(dim, 1024),
+                        4 * dim,
+                        bias=True,
+                        device=operation_settings.get("device"),
+                        dtype=operation_settings.get("dtype"),
+                    ),
+                )
 
     def forward(
         self,
@@ -323,7 +308,7 @@ class FinalLayer(nn.Module):
     The final layer of NextDiT.
     """
 
-    def __init__(self, hidden_size, patch_size, out_channels, operation_settings={}):
+    def __init__(self, hidden_size, patch_size, out_channels, z_image_modulation=False, operation_settings={}):
         super().__init__()
         self.norm_final = operation_settings.get("operations").LayerNorm(
             hidden_size,
@@ -340,10 +325,15 @@ class FinalLayer(nn.Module):
             dtype=operation_settings.get("dtype"),
         )
 
+        if z_image_modulation:
+            min_mod = 256
+        else:
+            min_mod = 1024
+
         self.adaLN_modulation = nn.Sequential(
             nn.SiLU(),
             operation_settings.get("operations").Linear(
-                min(hidden_size, 1024),
+                min(hidden_size, min_mod),
                 hidden_size,
                 bias=True,
                 device=operation_settings.get("device"),
@@ -373,12 +363,16 @@ class NextDiT(nn.Module):
         n_heads: int = 32,
         n_kv_heads: Optional[int] = None,
         multiple_of: int = 256,
-        ffn_dim_multiplier: Optional[float] = None,
+        ffn_dim_multiplier: float = 4.0,
         norm_eps: float = 1e-5,
         qk_norm: bool = False,
         cap_feat_dim: int = 5120,
         axes_dims: List[int] = (16, 56, 56),
         axes_lens: List[int] = (1, 512, 512),
+        rope_theta=10000.0,
+        z_image_modulation=False,
+        time_scale=1.0,
+        pad_tokens_multiple=None,
         image_model=None,
         device=None,
         dtype=None,
@@ -390,6 +384,8 @@ class NextDiT(nn.Module):
         self.in_channels = in_channels
         self.out_channels = in_channels
         self.patch_size = patch_size
+        self.time_scale = time_scale
+        self.pad_tokens_multiple = pad_tokens_multiple
 
         self.x_embedder = operation_settings.get("operations").Linear(
             in_features=patch_size * patch_size * in_channels,
@@ -411,6 +407,7 @@ class NextDiT(nn.Module):
                     norm_eps,
                     qk_norm,
                     modulation=True,
+                    z_image_modulation=z_image_modulation,
                     operation_settings=operation_settings,
                 )
                 for layer_id in range(n_refiner_layers)
@@ -434,7 +431,7 @@ class NextDiT(nn.Module):
             ]
         )
 
-        self.t_embedder = TimestepEmbedder(min(dim, 1024), **operation_settings)
+        self.t_embedder = TimestepEmbedder(min(dim, 1024), output_size=256 if z_image_modulation else None, **operation_settings)
         self.cap_embedder = nn.Sequential(
             operation_settings.get("operations").RMSNorm(cap_feat_dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype")),
             operation_settings.get("operations").Linear(
@@ -457,18 +454,24 @@ class NextDiT(nn.Module):
                     ffn_dim_multiplier,
                     norm_eps,
                     qk_norm,
+                    z_image_modulation=z_image_modulation,
+                    attn_out_bias=False,
                     operation_settings=operation_settings,
                 )
                 for layer_id in range(n_layers)
             ]
         )
         self.norm_final = operation_settings.get("operations").RMSNorm(dim, eps=norm_eps, elementwise_affine=True, device=operation_settings.get("device"), dtype=operation_settings.get("dtype"))
-        self.final_layer = FinalLayer(dim, patch_size, self.out_channels, operation_settings=operation_settings)
+        self.final_layer = FinalLayer(dim, patch_size, self.out_channels, z_image_modulation=z_image_modulation, operation_settings=operation_settings)
+
+        if self.pad_tokens_multiple is not None:
+            self.x_pad_token = nn.Parameter(torch.empty((1, dim), device=device, dtype=dtype))
+            self.cap_pad_token = nn.Parameter(torch.empty((1, dim), device=device, dtype=dtype))
 
         assert (dim // n_heads) == sum(axes_dims)
         self.axes_dims = axes_dims
         self.axes_lens = axes_lens
-        self.rope_embedder = EmbedND(dim=dim // n_heads, theta=10000.0, axes_dim=axes_dims)
+        self.rope_embedder = EmbedND(dim=dim // n_heads, theta=rope_theta, axes_dim=axes_dims)
         self.dim = dim
         self.n_heads = n_heads
 
@@ -503,108 +506,42 @@ class NextDiT(nn.Module):
         bsz = len(x)
         pH = pW = self.patch_size
         device = x[0].device
-        dtype = x[0].dtype
 
-        if cap_mask is not None:
-            l_effective_cap_len = cap_mask.sum(dim=1).tolist()
-        else:
-            l_effective_cap_len = [num_tokens] * bsz
+        if self.pad_tokens_multiple is not None:
+            pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
+            cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
 
-        if cap_mask is not None and not torch.is_floating_point(cap_mask):
-            cap_mask = (cap_mask - 1).to(dtype) * torch.finfo(dtype).max
+        cap_pos_ids = torch.zeros(bsz, cap_feats.shape[1], 3, dtype=torch.float32, device=device)
+        cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0
 
-        img_sizes = [(img.size(1), img.size(2)) for img in x]
-        l_effective_img_len = [(H // pH) * (W // pW) for (H, W) in img_sizes]
+        B, C, H, W = x.shape
+        x = self.x_embedder(x.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
 
-        max_seq_len = max(
-            (cap_len+img_len for cap_len, img_len in zip(l_effective_cap_len, l_effective_img_len))
-        )
-        max_cap_len = max(l_effective_cap_len)
-        max_img_len = max(l_effective_img_len)
+        H_tokens, W_tokens = H // pH, W // pW
+        x_pos_ids = torch.zeros((bsz, x.shape[1], 3), dtype=torch.float32, device=device)
+        x_pos_ids[:, :, 0] = cap_feats.shape[1] + 1
+        x_pos_ids[:, :, 1] = torch.arange(H_tokens, dtype=torch.float32, device=device).view(-1, 1).repeat(1, W_tokens).flatten()
+        x_pos_ids[:, :, 2] = torch.arange(W_tokens, dtype=torch.float32, device=device).view(1, -1).repeat(H_tokens, 1).flatten()
 
-        position_ids = torch.zeros(bsz, max_seq_len, 3, dtype=torch.float32, device=device)
+        if self.pad_tokens_multiple is not None:
+            pad_extra = (-x.shape[1]) % self.pad_tokens_multiple
+            x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
+            x_pos_ids = torch.nn.functional.pad(x_pos_ids, (0, 0, 0, pad_extra))
 
-        for i in range(bsz):
-            cap_len = l_effective_cap_len[i]
-            img_len = l_effective_img_len[i]
-            H, W = img_sizes[i]
-            H_tokens, W_tokens = H // pH, W // pW
-            assert H_tokens * W_tokens == img_len
-
-            rope_options = transformer_options.get("rope_options", None)
-            h_scale = 1.0
-            w_scale = 1.0
-            h_start = 0
-            w_start = 0
-            if rope_options is not None:
-                h_scale = rope_options.get("scale_y", 1.0)
-                w_scale = rope_options.get("scale_x", 1.0)
-
-                h_start = rope_options.get("shift_y", 0.0)
-                w_start = rope_options.get("shift_x", 0.0)
-
-            position_ids[i, :cap_len, 0] = torch.arange(cap_len, dtype=torch.float32, device=device)
-            position_ids[i, cap_len:cap_len+img_len, 0] = cap_len
-            row_ids = (torch.arange(H_tokens, dtype=torch.float32, device=device) * h_scale + h_start).view(-1, 1).repeat(1, W_tokens).flatten()
-            col_ids = (torch.arange(W_tokens, dtype=torch.float32, device=device) * w_scale + w_start).view(1, -1).repeat(H_tokens, 1).flatten()
-            position_ids[i, cap_len:cap_len+img_len, 1] = row_ids
-            position_ids[i, cap_len:cap_len+img_len, 2] = col_ids
-
-        freqs_cis = self.rope_embedder(position_ids).movedim(1, 2).to(dtype)
-
-        # build freqs_cis for cap and image individually
-        cap_freqs_cis_shape = list(freqs_cis.shape)
-        # cap_freqs_cis_shape[1] = max_cap_len
-        cap_freqs_cis_shape[1] = cap_feats.shape[1]
-        cap_freqs_cis = torch.zeros(*cap_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        img_freqs_cis_shape = list(freqs_cis.shape)
-        img_freqs_cis_shape[1] = max_img_len
-        img_freqs_cis = torch.zeros(*img_freqs_cis_shape, device=device, dtype=freqs_cis.dtype)
-
-        for i in range(bsz):
-            cap_len = l_effective_cap_len[i]
-            img_len = l_effective_img_len[i]
-            cap_freqs_cis[i, :cap_len] = freqs_cis[i, :cap_len]
-            img_freqs_cis[i, :img_len] = freqs_cis[i, cap_len:cap_len+img_len]
+        freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)
 
         # refine context
         for layer in self.context_refiner:
-            cap_feats = layer(cap_feats, cap_mask, cap_freqs_cis, transformer_options=transformer_options)
+            cap_feats = layer(cap_feats, cap_mask, freqs_cis[:, :cap_pos_ids.shape[1]], transformer_options=transformer_options)
 
-        # refine image
-        flat_x = []
-        for i in range(bsz):
-            img = x[i]
-            C, H, W = img.size()
-            img = img.view(C, H // pH, pH, W // pW, pW).permute(1, 3, 2, 4, 0).flatten(2).flatten(0, 1)
-            flat_x.append(img)
-        x = flat_x
-        padded_img_embed = torch.zeros(bsz, max_img_len, x[0].shape[-1], device=device, dtype=x[0].dtype)
-        padded_img_mask = torch.zeros(bsz, max_img_len, dtype=dtype, device=device)
-        for i in range(bsz):
-            padded_img_embed[i, :l_effective_img_len[i]] = x[i]
-            padded_img_mask[i, l_effective_img_len[i]:] = -torch.finfo(dtype).max
-
-        padded_img_embed = self.x_embedder(padded_img_embed)
-        padded_img_mask = padded_img_mask.unsqueeze(1)
+        padded_img_mask = None
         for layer in self.noise_refiner:
-            padded_img_embed = layer(padded_img_embed, padded_img_mask, img_freqs_cis, t, transformer_options=transformer_options)
-
-        if cap_mask is not None:
-            mask = torch.zeros(bsz, max_seq_len, dtype=dtype, device=device)
-            mask[:, :max_cap_len] = cap_mask[:, :max_cap_len]
-        else:
-            mask = None
-
-        padded_full_embed = torch.zeros(bsz, max_seq_len, self.dim, device=device, dtype=x[0].dtype)
-        for i in range(bsz):
-            cap_len = l_effective_cap_len[i]
-            img_len = l_effective_img_len[i]
-
-            padded_full_embed[i, :cap_len] = cap_feats[i, :cap_len]
-            padded_full_embed[i, cap_len:cap_len+img_len] = padded_img_embed[i, :img_len]
+            x = layer(x, padded_img_mask, freqs_cis[:, cap_pos_ids.shape[1]:], t, transformer_options=transformer_options)
 
+        padded_full_embed = torch.cat((cap_feats, x), dim=1)
+        mask = None
+        img_sizes = [(H, W)] * bsz
+        l_effective_cap_len = [cap_feats.shape[1]] * bsz
         return padded_full_embed, mask, img_sizes, l_effective_cap_len, freqs_cis
 
     def forward(self, x, timesteps, context, num_tokens, attention_mask=None, **kwargs):
@@ -627,7 +564,7 @@ class NextDiT(nn.Module):
         y: (N,) tensor of text tokens/features
         """
 
-        t = self.t_embedder(t, dtype=x.dtype)  # (N, D)
+        t = self.t_embedder(t * self.time_scale, dtype=x.dtype)  # (N, D)
         adaln_input = t
 
         cap_feats = self.cap_embedder(cap_feats)  # (N, L, D)  # todo check if able to batchify w.o. redundant compute
diff --git a/comfy/ldm/modules/diffusionmodules/mmdit.py b/comfy/ldm/modules/diffusionmodules/mmdit.py
index 42f406f1a..0dc8fe789 100644
--- a/comfy/ldm/modules/diffusionmodules/mmdit.py
+++ b/comfy/ldm/modules/diffusionmodules/mmdit.py
@@ -211,12 +211,14 @@ class TimestepEmbedder(nn.Module):
     Embeds scalar timesteps into vector representations.
     """
 
-    def __init__(self, hidden_size, frequency_embedding_size=256, dtype=None, device=None, operations=None):
+    def __init__(self, hidden_size, frequency_embedding_size=256, output_size=None, dtype=None, device=None, operations=None):
         super().__init__()
+        if output_size is None:
+            output_size = hidden_size
         self.mlp = nn.Sequential(
             operations.Linear(frequency_embedding_size, hidden_size, bias=True, dtype=dtype, device=device),
             nn.SiLU(),
-            operations.Linear(hidden_size, hidden_size, bias=True, dtype=dtype, device=device),
+            operations.Linear(hidden_size, output_size, bias=True, dtype=dtype, device=device),
         )
         self.frequency_embedding_size = frequency_embedding_size
 
diff --git a/comfy/model_base.py b/comfy/model_base.py
index cad79ecbd..cc21b1de9 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -1114,9 +1114,13 @@ class Lumina2(BaseModel):
             if torch.numel(attention_mask) != attention_mask.sum():
                 out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
             out['num_tokens'] = comfy.conds.CONDConstant(max(1, torch.sum(attention_mask).item()))
+
         cross_attn = kwargs.get("cross_attn", None)
         if cross_attn is not None:
             out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+            if 'num_tokens' not in out:
+                out['num_tokens'] = comfy.conds.CONDConstant(cross_attn.shape[1])
+
         return out
 
 class WAN21(BaseModel):
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index b2ba1459d..7afe4a798 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -416,14 +416,31 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["image_model"] = "lumina2"
         dit_config["patch_size"] = 2
         dit_config["in_channels"] = 16
-        dit_config["dim"] = 2304
-        dit_config["cap_feat_dim"] = state_dict['{}cap_embedder.1.weight'.format(key_prefix)].shape[1]
+        w = state_dict['{}cap_embedder.1.weight'.format(key_prefix)]
+        dit_config["dim"] = w.shape[0]
+        dit_config["cap_feat_dim"] = w.shape[1]
         dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
-        dit_config["n_heads"] = 24
-        dit_config["n_kv_heads"] = 8
         dit_config["qk_norm"] = True
-        dit_config["axes_dims"] = [32, 32, 32]
-        dit_config["axes_lens"] = [300, 512, 512]
+
+        if dit_config["dim"] == 2304: # Original Lumina 2
+            dit_config["n_heads"] = 24
+            dit_config["n_kv_heads"] = 8
+            dit_config["axes_dims"] = [32, 32, 32]
+            dit_config["axes_lens"] = [300, 512, 512]
+            dit_config["rope_theta"] = 10000.0
+            dit_config["ffn_dim_multiplier"] = 4.0
+        elif dit_config["dim"] == 3840:  # Z image
+            dit_config["n_heads"] = 30
+            dit_config["n_kv_heads"] = 30
+            dit_config["axes_dims"] = [32, 48, 48]
+            dit_config["axes_lens"] = [1536, 512, 512]
+            dit_config["rope_theta"] = 256.0
+            dit_config["ffn_dim_multiplier"] = (8.0 / 3.0)
+            dit_config["z_image_modulation"] = True
+            dit_config["time_scale"] = 1000.0
+            if '{}cap_pad_token'.format(key_prefix) in state_dict_keys:
+                dit_config["pad_tokens_multiple"] = 32
+
         return dit_config
 
     if '{}head.modulation'.format(key_prefix) in state_dict_keys:  # Wan 2.1
diff --git a/comfy/sd.py b/comfy/sd.py
index 14dd8944c..350fae92b 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -52,6 +52,7 @@ import comfy.text_encoders.ace
 import comfy.text_encoders.omnigen2
 import comfy.text_encoders.qwen_image
 import comfy.text_encoders.hunyuan_image
+import comfy.text_encoders.z_image
 
 import comfy.model_patcher
 import comfy.lora
@@ -953,6 +954,8 @@ class TEModel(Enum):
     GEMMA_3_4B = 13
     MISTRAL3_24B = 14
     MISTRAL3_24B_PRUNED_FLUX2 = 15
+    QWEN3_4B = 16
+
 
 def detect_te_model(sd):
     if "text_model.encoder.layers.30.mlp.fc1.weight" in sd:
@@ -985,6 +988,8 @@ def detect_te_model(sd):
         if weight.shape[0] == 512:
             return TEModel.QWEN25_7B
     if "model.layers.0.post_attention_layernorm.weight" in sd:
+        if 'model.layers.0.self_attn.q_norm.weight' in sd:
+            return TEModel.QWEN3_4B
         weight = sd['model.layers.0.post_attention_layernorm.weight']
         if weight.shape[0] == 5120:
             if "model.layers.39.post_attention_layernorm.weight" in sd:
@@ -1110,6 +1115,9 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
             clip_target.clip = comfy.text_encoders.flux.flux2_te(**llama_detect(clip_data), pruned=te_model == TEModel.MISTRAL3_24B_PRUNED_FLUX2)
             clip_target.tokenizer = comfy.text_encoders.flux.Flux2Tokenizer
             tokenizer_data["tekken_model"] = clip_data[0].get("tekken_model", None)
+        elif te_model == TEModel.QWEN3_4B:
+            clip_target.clip = comfy.text_encoders.z_image.te(**llama_detect(clip_data))
+            clip_target.tokenizer = comfy.text_encoders.z_image.ZImageTokenizer
         else:
             # clip_l
             if clip_type == CLIPType.SD3:
diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
index d47ed27bc..cd4b5f76c 100644
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -78,6 +78,28 @@ class Qwen25_3BConfig:
     rope_scale = None
     final_norm: bool = True
 
+@dataclass
+class Qwen3_4BConfig:
+    vocab_size: int = 151936
+    hidden_size: int = 2560
+    intermediate_size: int = 9728
+    num_hidden_layers: int = 36
+    num_attention_heads: int = 32
+    num_key_value_heads: int = 8
+    max_position_embeddings: int = 40960
+    rms_norm_eps: float = 1e-6
+    rope_theta: float = 1000000.0
+    transformer_type: str = "llama"
+    head_dim = 128
+    rms_norm_add = False
+    mlp_activation = "silu"
+    qkv_bias = False
+    rope_dims = None
+    q_norm = "gemma3"
+    k_norm = "gemma3"
+    rope_scale = None
+    final_norm: bool = True
+
 @dataclass
 class Qwen25_7BVLI_Config:
     vocab_size: int = 152064
@@ -511,6 +533,15 @@ class Qwen25_3B(BaseLlama, torch.nn.Module):
         self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
         self.dtype = dtype
 
+class Qwen3_4B(BaseLlama, torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        config = Qwen3_4BConfig(**config_dict)
+        self.num_layers = config.num_hidden_layers
+
+        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
+
 class Qwen25_7BVLI(BaseLlama, torch.nn.Module):
     def __init__(self, config_dict, dtype, device, operations):
         super().__init__()
diff --git a/comfy/text_encoders/z_image.py b/comfy/text_encoders/z_image.py
new file mode 100644
index 000000000..bb9273b20
--- /dev/null
+++ b/comfy/text_encoders/z_image.py
@@ -0,0 +1,48 @@
+from transformers import Qwen2Tokenizer
+import comfy.text_encoders.llama
+from comfy import sd1_clip
+import os
+
+class Qwen3Tokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
+        super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
+
+
+class ZImageTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="qwen3_4b", tokenizer=Qwen3Tokenizer)
+        self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
+
+    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs):
+        if llama_template is None:
+            llama_text = self.llama_template.format(text)
+        else:
+            llama_text = llama_template.format(text)
+
+        tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
+        return tokens
+
+
+class Qwen3_4BModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+
+class ZImageTEModel(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(device=device, dtype=dtype, name="qwen3_4b", clip_model=Qwen3_4BModel, model_options=model_options)
+
+
+def te(dtype_llama=None, llama_scaled_fp8=None, llama_quantization_metadata=None):
+    class ZImageTEModel_(ZImageTEModel):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if llama_scaled_fp8 is not None and "scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["scaled_fp8"] = llama_scaled_fp8
+            if dtype_llama is not None:
+                dtype = dtype_llama
+            if llama_quantization_metadata is not None:
+                model_options["quantization_metadata"] = llama_quantization_metadata
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+    return ZImageTEModel_

From 0e24dbb19f34f242edb77c550396cf6806f7b22f Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 16:02:51 -0800
Subject: [PATCH 08/24] Adjustments to Z Image. (#10893)

---
 comfy/supported_models.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 8fe8e63f6..af8120400 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -21,6 +21,7 @@ import comfy.text_encoders.ace
 import comfy.text_encoders.omnigen2
 import comfy.text_encoders.qwen_image
 import comfy.text_encoders.hunyuan_image
+import comfy.text_encoders.z_image
 
 from . import supported_models_base
 from . import latent_formats
@@ -994,7 +995,7 @@ class Lumina2(supported_models_base.BASE):
         "shift": 6.0,
     }
 
-    memory_usage_factor = 1.2
+    memory_usage_factor = 1.4
 
     unet_extra_config = {}
     latent_format = latent_formats.Flux
@@ -1013,6 +1014,24 @@ class Lumina2(supported_models_base.BASE):
         hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}gemma2_2b.transformer.".format(pref))
         return supported_models_base.ClipTarget(comfy.text_encoders.lumina2.LuminaTokenizer, comfy.text_encoders.lumina2.te(**hunyuan_detect))
 
+class ZImage(Lumina2):
+    unet_config = {
+        "image_model": "lumina2",
+        "dim": 3840,
+    }
+
+    sampling_settings = {
+        "multiplier": 1.0,
+        "shift": 3.0,
+    }
+
+    memory_usage_factor = 1.7
+
+    def clip_target(self, state_dict={}):
+        pref = self.text_encoder_key_prefix[0]
+        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_4b.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.z_image.ZImageTokenizer, comfy.text_encoders.z_image.te(**hunyuan_detect))
+
 class WAN21_T2V(supported_models_base.BASE):
     unet_config = {
         "image_model": "wan2.1",
@@ -1453,7 +1472,7 @@ class HunyuanVideo15_SR_Distilled(HunyuanVideo):
         hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_7b.transformer.".format(pref))
         return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_video.HunyuanVideo15Tokenizer, comfy.text_encoders.hunyuan_image.te(**hunyuan_detect))
 
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2]
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2]
 
 
 models += [SVD_img2vid]

From bdb10a583f1b1e495ee00dbd1674f11016a6a93e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 21:07:58 -0800
Subject: [PATCH 09/24] Fix loras not working on mixed fp8. (#10899)

---
 comfy/model_patcher.py       |  2 +-
 comfy/ops.py                 | 22 +++++++++++++++++++++-
 comfy/quant_ops.py           | 21 ++++++++++++++-------
 comfy/weight_adapter/lora.py |  1 +
 4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 6551ced5a..73adc7f70 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -132,7 +132,7 @@ class LowVramPatch:
     def __call__(self, weight):
         intermediate_dtype = weight.dtype
         if self.convert_func is not None:
-            weight = self.convert_func(weight.to(dtype=torch.float32, copy=True), inplace=True)
+            weight = self.convert_func(weight, inplace=False)
 
         if intermediate_dtype not in [torch.float32, torch.float16, torch.bfloat16]: #intermediate_dtype has to be one that is supported in math ops
             intermediate_dtype = torch.float32
diff --git a/comfy/ops.py b/comfy/ops.py
index 785aa1c9f..a0ff4e8f1 100644
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -117,6 +117,8 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of
     if weight_has_function or weight.dtype != dtype:
         with wf_context:
             weight = weight.to(dtype=dtype)
+            if isinstance(weight, QuantizedTensor):
+                weight = weight.dequantize()
             for f in s.weight_function:
                 weight = f(weight)
 
@@ -502,7 +504,7 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None
                     weight *= self.scale_weight.to(device=weight.device, dtype=weight.dtype)
                     return weight
                 else:
-                    return weight * self.scale_weight.to(device=weight.device, dtype=weight.dtype)
+                    return weight.to(dtype=torch.float32) * self.scale_weight.to(device=weight.device, dtype=torch.float32)
 
             def set_weight(self, weight, inplace_update=False, seed=None, return_weight=False, **kwargs):
                 weight = comfy.float.stochastic_rounding(weight / self.scale_weight.to(device=weight.device, dtype=weight.dtype), self.weight.dtype, seed=seed)
@@ -643,6 +645,24 @@ def mixed_precision_ops(layer_quant_config={}, compute_dtype=torch.bfloat16, ful
                     not isinstance(input, QuantizedTensor)):
                     input = QuantizedTensor.from_float(input, self.layout_type, scale=self.input_scale, dtype=self.weight.dtype)
                 return self._forward(input, self.weight, self.bias)
+
+            def convert_weight(self, weight, inplace=False, **kwargs):
+                if isinstance(weight, QuantizedTensor):
+                    return weight.dequantize()
+                else:
+                    return weight
+
+            def set_weight(self, weight, inplace_update=False, seed=None, return_weight=False, **kwargs):
+                if getattr(self, 'layout_type', None) is not None:
+                    weight = QuantizedTensor.from_float(weight, self.layout_type, scale=None, dtype=self.weight.dtype, stochastic_rounding=seed, inplace_ops=True)
+                else:
+                    weight = weight.to(self.weight.dtype)
+                if return_weight:
+                    return weight
+
+                assert inplace_update is False  # TODO: eventually remove the inplace_update stuff
+                self.weight = torch.nn.Parameter(weight, requires_grad=False)
+
     return MixedPrecisionOps
 
 def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None, model_config=None):
diff --git a/comfy/quant_ops.py b/comfy/quant_ops.py
index 0c16bcf8d..d2f3e7397 100644
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@@ -1,6 +1,7 @@
 import torch
 import logging
 from typing import Tuple, Dict
+import comfy.float
 
 _LAYOUT_REGISTRY = {}
 _GENERIC_UTILS = {}
@@ -393,7 +394,7 @@ class TensorCoreFP8Layout(QuantizedLayout):
     - orig_dtype: Original dtype before quantization (for casting back)
     """
     @classmethod
-    def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn):
+    def quantize(cls, tensor, scale=None, dtype=torch.float8_e4m3fn, stochastic_rounding=0, inplace_ops=False):
         orig_dtype = tensor.dtype
 
         if scale is None:
@@ -403,17 +404,23 @@ class TensorCoreFP8Layout(QuantizedLayout):
             scale = torch.tensor(scale)
         scale = scale.to(device=tensor.device, dtype=torch.float32)
 
-        tensor_scaled = tensor * (1.0 / scale).to(tensor.dtype)
-        # TODO: uncomment this if it's actually needed because the clamp has a small performance penality'
-        lp_amax = torch.finfo(dtype).max
-        torch.clamp(tensor_scaled, min=-lp_amax, max=lp_amax, out=tensor_scaled)
-        qdata = tensor_scaled.to(dtype, memory_format=torch.contiguous_format)
+        if inplace_ops:
+            tensor *= (1.0 / scale).to(tensor.dtype)
+        else:
+            tensor = tensor * (1.0 / scale).to(tensor.dtype)
+
+        if stochastic_rounding > 0:
+            tensor = comfy.float.stochastic_rounding(tensor, dtype=dtype, seed=stochastic_rounding)
+        else:
+            lp_amax = torch.finfo(dtype).max
+            torch.clamp(tensor, min=-lp_amax, max=lp_amax, out=tensor)
+            tensor = tensor.to(dtype, memory_format=torch.contiguous_format)
 
         layout_params = {
             'scale': scale,
             'orig_dtype': orig_dtype
         }
-        return qdata, layout_params
+        return tensor, layout_params
 
     @staticmethod
     def dequantize(qdata, scale, orig_dtype, **kwargs):
diff --git a/comfy/weight_adapter/lora.py b/comfy/weight_adapter/lora.py
index 4db004e50..3cc60bb1b 100644
--- a/comfy/weight_adapter/lora.py
+++ b/comfy/weight_adapter/lora.py
@@ -194,6 +194,7 @@ class LoRAAdapter(WeightAdapterBase):
             lora_diff = torch.mm(
                 mat1.flatten(start_dim=1), mat2.flatten(start_dim=1)
             ).reshape(weight.shape)
+            del mat1, mat2
             if dora_scale is not None:
                 weight = weight_decompose(
                     dora_scale,

From 90b3995ec842335e44d70e0521ff6ff6c3ff9aaa Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 26 Nov 2025 00:34:15 -0500
Subject: [PATCH 10/24] ComfyUI v0.3.74

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index f8818838e..b565c7367 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.73"
+__version__ = "0.3.74"
diff --git a/pyproject.toml b/pyproject.toml
index 7e4bac12d..ccf0fcdb9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.73"
+version = "0.3.74"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From 58b85746618e2bc2dd32024c89403926aad59f48 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 25 Nov 2025 23:36:19 -0800
Subject: [PATCH 11/24] Fix Flux2 reference image mem estimation. (#10905)

---
 comfy/model_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/model_base.py b/comfy/model_base.py
index cc21b1de9..9b76c285e 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -926,7 +926,7 @@ class Flux(BaseModel):
         out = {}
         ref_latents = kwargs.get("reference_latents", None)
         if ref_latents is not None:
-            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
+            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
         return out
 
 class Flux2(Flux):

From 8402c8700a29a97bc5d706d6a0b14c41bc2c2d8a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 26 Nov 2025 02:41:13 -0500
Subject: [PATCH 12/24] ComfyUI version v0.3.75

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index b565c7367..fa4b4f4b0 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.74"
+__version__ = "0.3.75"
diff --git a/pyproject.toml b/pyproject.toml
index ccf0fcdb9..9009e65fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.74"
+version = "0.3.75"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From f16219e3aadcb7a301a1a313ab8989c3ebe53764 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 26 Nov 2025 01:00:43 -0800
Subject: [PATCH 13/24] Add cheap latent preview for flux 2. (#10907)

Thank you to the person who calculated them. You saved me a percent of my
time.
---
 comfy/latent_formats.py | 40 ++++++++++++++++++++++++++++++++++++++++
 latent_preview.py       |  7 +++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py
index e98c7d6d8..8e110f45d 100644
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -6,6 +6,7 @@ class LatentFormat:
     latent_dimensions = 2
     latent_rgb_factors = None
     latent_rgb_factors_bias = None
+    latent_rgb_factors_reshape = None
     taesd_decoder_name = None
 
     def process_in(self, latent):
@@ -181,6 +182,45 @@ class Flux(SD3):
 class Flux2(LatentFormat):
     latent_channels = 128
 
+    def __init__(self):
+        self.latent_rgb_factors =[
+            [0.0058, 0.0113, 0.0073],
+            [0.0495, 0.0443, 0.0836],
+            [-0.0099, 0.0096, 0.0644],
+            [0.2144, 0.3009, 0.3652],
+            [0.0166, -0.0039, -0.0054],
+            [0.0157, 0.0103, -0.0160],
+            [-0.0398, 0.0902, -0.0235],
+            [-0.0052, 0.0095, 0.0109],
+            [-0.3527, -0.2712, -0.1666],
+            [-0.0301, -0.0356, -0.0180],
+            [-0.0107, 0.0078, 0.0013],
+            [0.0746, 0.0090, -0.0941],
+            [0.0156, 0.0169, 0.0070],
+            [-0.0034, -0.0040, -0.0114],
+            [0.0032, 0.0181, 0.0080],
+            [-0.0939, -0.0008, 0.0186],
+            [0.0018, 0.0043, 0.0104],
+            [0.0284, 0.0056, -0.0127],
+            [-0.0024, -0.0022, -0.0030],
+            [0.1207, -0.0026, 0.0065],
+            [0.0128, 0.0101, 0.0142],
+            [0.0137, -0.0072, -0.0007],
+            [0.0095, 0.0092, -0.0059],
+            [0.0000, -0.0077, -0.0049],
+            [-0.0465, -0.0204, -0.0312],
+            [0.0095, 0.0012, -0.0066],
+            [0.0290, -0.0034, 0.0025],
+            [0.0220, 0.0169, -0.0048],
+            [-0.0332, -0.0457, -0.0468],
+            [-0.0085, 0.0389, 0.0609],
+            [-0.0076, 0.0003, -0.0043],
+            [-0.0111, -0.0460, -0.0614],
+        ]
+
+        self.latent_rgb_factors_bias = [-0.0329, -0.0718, -0.0851]
+        self.latent_rgb_factors_reshape = lambda t: t.reshape(t.shape[0], 32, 2, 2, t.shape[-2], t.shape[-1]).permute(0, 1, 4, 2, 5, 3).reshape(t.shape[0], 32, t.shape[-2] * 2, t.shape[-1] * 2)
+
     def process_in(self, latent):
         return latent
 
diff --git a/latent_preview.py b/latent_preview.py
index 95d3cb733..ddf6dcf49 100644
--- a/latent_preview.py
+++ b/latent_preview.py
@@ -37,13 +37,16 @@ class TAESDPreviewerImpl(LatentPreviewer):
 
 
 class Latent2RGBPreviewer(LatentPreviewer):
-    def __init__(self, latent_rgb_factors, latent_rgb_factors_bias=None):
+    def __init__(self, latent_rgb_factors, latent_rgb_factors_bias=None, latent_rgb_factors_reshape=None):
         self.latent_rgb_factors = torch.tensor(latent_rgb_factors, device="cpu").transpose(0, 1)
         self.latent_rgb_factors_bias = None
         if latent_rgb_factors_bias is not None:
             self.latent_rgb_factors_bias = torch.tensor(latent_rgb_factors_bias, device="cpu")
+        self.latent_rgb_factors_reshape = latent_rgb_factors_reshape
 
     def decode_latent_to_preview(self, x0):
+        if self.latent_rgb_factors_reshape is not None:
+            x0 = self.latent_rgb_factors_reshape(x0)
         self.latent_rgb_factors = self.latent_rgb_factors.to(dtype=x0.dtype, device=x0.device)
         if self.latent_rgb_factors_bias is not None:
             self.latent_rgb_factors_bias = self.latent_rgb_factors_bias.to(dtype=x0.dtype, device=x0.device)
@@ -85,7 +88,7 @@ def get_previewer(device, latent_format):
 
         if previewer is None:
             if latent_format.latent_rgb_factors is not None:
-                previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors, latent_format.latent_rgb_factors_bias)
+                previewer = Latent2RGBPreviewer(latent_format.latent_rgb_factors, latent_format.latent_rgb_factors_bias, latent_format.latent_rgb_factors_reshape)
     return previewer
 
 def prepare_callback(model, steps, x0_output_dict=None):

From 8938aa3f3064415758fa8f3a628476535a676183 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 26 Nov 2025 19:14:02 +0200
Subject: [PATCH 14/24] add Veo3 First-Last-Frame node (#10878)

---
 comfy_api_nodes/apis/veo_api.py |  38 +++-----
 comfy_api_nodes/nodes_veo2.py   | 155 ++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+), 25 deletions(-)

diff --git a/comfy_api_nodes/apis/veo_api.py b/comfy_api_nodes/apis/veo_api.py
index a55137afb..8328d1aa4 100644
--- a/comfy_api_nodes/apis/veo_api.py
+++ b/comfy_api_nodes/apis/veo_api.py
@@ -1,34 +1,21 @@
-from typing import Optional, Union
-from enum import Enum
+from typing import Optional
 
 from pydantic import BaseModel, Field
 
 
-class Image2(BaseModel):
-    bytesBase64Encoded: str
-    gcsUri: Optional[str] = None
-    mimeType: Optional[str] = None
+class VeoRequestInstanceImage(BaseModel):
+    bytesBase64Encoded: str | None = Field(None)
+    gcsUri: str | None = Field(None)
+    mimeType: str | None = Field(None)
 
 
-class Image3(BaseModel):
-    bytesBase64Encoded: Optional[str] = None
-    gcsUri: str
-    mimeType: Optional[str] = None
-
-
-class Instance1(BaseModel):
-    image: Optional[Union[Image2, Image3]] = Field(
-        None, description='Optional image to guide video generation'
-    )
+class VeoRequestInstance(BaseModel):
+    image: VeoRequestInstanceImage | None = Field(None)
+    lastFrame: VeoRequestInstanceImage | None = Field(None)
     prompt: str = Field(..., description='Text description of the video')
 
 
-class PersonGeneration1(str, Enum):
-    ALLOW = 'ALLOW'
-    BLOCK = 'BLOCK'
-
-
-class Parameters1(BaseModel):
+class VeoRequestParameters(BaseModel):
     aspectRatio: Optional[str] = Field(None, examples=['16:9'])
     durationSeconds: Optional[int] = None
     enhancePrompt: Optional[bool] = None
@@ -37,17 +24,18 @@ class Parameters1(BaseModel):
         description='Generate audio for the video. Only supported by veo 3 models.',
     )
     negativePrompt: Optional[str] = None
-    personGeneration: Optional[PersonGeneration1] = None
+    personGeneration: str | None = Field(None, description="ALLOW or BLOCK")
     sampleCount: Optional[int] = None
     seed: Optional[int] = None
     storageUri: Optional[str] = Field(
         None, description='Optional Cloud Storage URI to upload the video'
     )
+    resolution: str | None = Field(None)
 
 
 class VeoGenVidRequest(BaseModel):
-    instances: Optional[list[Instance1]] = None
-    parameters: Optional[Parameters1] = None
+    instances: list[VeoRequestInstance] | None = Field(None)
+    parameters: VeoRequestParameters | None = Field(None)
 
 
 class VeoGenVidResponse(BaseModel):
diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py
index d37e9e9b4..a54dc13ab 100644
--- a/comfy_api_nodes/nodes_veo2.py
+++ b/comfy_api_nodes/nodes_veo2.py
@@ -1,6 +1,7 @@
 import base64
 from io import BytesIO
 
+import torch
 from typing_extensions import override
 
 from comfy_api.input_impl.video_types import VideoFromFile
@@ -10,6 +11,9 @@ from comfy_api_nodes.apis.veo_api import (
     VeoGenVidPollResponse,
     VeoGenVidRequest,
     VeoGenVidResponse,
+    VeoRequestInstance,
+    VeoRequestInstanceImage,
+    VeoRequestParameters,
 )
 from comfy_api_nodes.util import (
     ApiEndpoint,
@@ -346,12 +350,163 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode):
         )
 
 
+class Veo3FirstLastFrameNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Veo3FirstLastFrameNode",
+            display_name="Google Veo 3 First-Last-Frame to Video",
+            category="api node/video/Veo",
+            description="Generate video using prompt and first and last frames.",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text description of the video",
+                ),
+                IO.String.Input(
+                    "negative_prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Negative text prompt to guide what to avoid in the video",
+                ),
+                IO.Combo.Input("resolution", options=["720p", "1080p"]),
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=["16:9", "9:16"],
+                    default="16:9",
+                    tooltip="Aspect ratio of the output video",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=8,
+                    min=4,
+                    max=8,
+                    step=2,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip="Duration of the output video in seconds",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFF,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed for video generation",
+                ),
+                IO.Image.Input("first_frame", tooltip="Start frame"),
+                IO.Image.Input("last_frame", tooltip="End frame"),
+                IO.Combo.Input(
+                    "model",
+                    options=["veo-3.1-generate", "veo-3.1-fast-generate"],
+                    default="veo-3.1-fast-generate",
+                ),
+                IO.Boolean.Input(
+                    "generate_audio",
+                    default=True,
+                    tooltip="Generate audio for the video.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        negative_prompt: str,
+        resolution: str,
+        aspect_ratio: str,
+        duration: int,
+        seed: int,
+        first_frame: torch.Tensor,
+        last_frame: torch.Tensor,
+        model: str,
+        generate_audio: bool,
+    ):
+        model = MODELS_MAP[model]
+        initial_response = await sync_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/veo/{model}/generate", method="POST"),
+            response_model=VeoGenVidResponse,
+            data=VeoGenVidRequest(
+                instances=[
+                    VeoRequestInstance(
+                        prompt=prompt,
+                        image=VeoRequestInstanceImage(
+                            bytesBase64Encoded=tensor_to_base64_string(first_frame), mimeType="image/png"
+                        ),
+                        lastFrame=VeoRequestInstanceImage(
+                            bytesBase64Encoded=tensor_to_base64_string(last_frame), mimeType="image/png"
+                        ),
+                    ),
+                ],
+                parameters=VeoRequestParameters(
+                    aspectRatio=aspect_ratio,
+                    personGeneration="ALLOW",
+                    durationSeconds=duration,
+                    enhancePrompt=True,  # cannot be False for Veo3
+                    seed=seed,
+                    generateAudio=generate_audio,
+                    negativePrompt=negative_prompt,
+                    resolution=resolution,
+                ),
+            ),
+        )
+        poll_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/veo/{model}/poll", method="POST"),
+            response_model=VeoGenVidPollResponse,
+            status_extractor=lambda r: "completed" if r.done else "pending",
+            data=VeoGenVidPollRequest(
+                operationName=initial_response.name,
+            ),
+            poll_interval=5.0,
+            estimated_duration=AVERAGE_DURATION_VIDEO_GEN,
+        )
+
+        if poll_response.error:
+            raise Exception(f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})")
+
+        response = poll_response.response
+        filtered_count = response.raiMediaFilteredCount
+        if filtered_count:
+            reasons = response.raiMediaFilteredReasons or []
+            reason_part = f": {reasons[0]}" if reasons else ""
+            raise Exception(
+                f"Content blocked by Google's Responsible AI filters{reason_part} "
+                f"({filtered_count} video{'s' if filtered_count != 1 else ''} filtered)."
+            )
+
+        if response.videos:
+            video = response.videos[0]
+            if video.bytesBase64Encoded:
+                return IO.NodeOutput(VideoFromFile(BytesIO(base64.b64decode(video.bytesBase64Encoded))))
+            if video.gcsUri:
+                return IO.NodeOutput(await download_url_to_video_output(video.gcsUri))
+            raise Exception("Video returned but no data or URL was provided")
+        raise Exception("Video generation completed but no video was returned")
+
+
 class VeoExtension(ComfyExtension):
     @override
     async def get_node_list(self) -> list[type[IO.ComfyNode]]:
         return [
             VeoVideoGenerationNode,
             Veo3VideoGenerationNode,
+            Veo3FirstLastFrameNode,
         ]
 
 

From 1105e0d139001ad602d0f883406bfce41e54ae67 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 26 Nov 2025 19:23:14 +0200
Subject: [PATCH 15/24] improve UX for batch uploads in
 upload_images_to_comfyapi (#10913)

---
 comfy_api_nodes/util/upload_helpers.py | 43 +++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/comfy_api_nodes/util/upload_helpers.py b/comfy_api_nodes/util/upload_helpers.py
index 632450d9b..b9019841f 100644
--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@@ -4,7 +4,7 @@ import logging
 import time
 import uuid
 from io import BytesIO
-from typing import Optional, Union
+from typing import Optional
 from urllib.parse import urlparse
 
 import aiohttp
@@ -48,8 +48,9 @@ async def upload_images_to_comfyapi(
     image: torch.Tensor,
     *,
     max_images: int = 8,
-    mime_type: Optional[str] = None,
-    wait_label: Optional[str] = "Uploading",
+    mime_type: str | None = None,
+    wait_label: str | None = "Uploading",
+    show_batch_index: bool = True,
 ) -> list[str]:
     """
     Uploads images to ComfyUI API and returns download URLs.
@@ -59,11 +60,18 @@ async def upload_images_to_comfyapi(
     download_urls: list[str] = []
     is_batch = len(image.shape) > 3
     batch_len = image.shape[0] if is_batch else 1
+    num_to_upload = min(batch_len, max_images)
+    batch_start_ts = time.monotonic()
 
-    for idx in range(min(batch_len, max_images)):
+    for idx in range(num_to_upload):
         tensor = image[idx] if is_batch else image
         img_io = tensor_to_bytesio(tensor, mime_type=mime_type)
-        url = await upload_file_to_comfyapi(cls, img_io, img_io.name, mime_type, wait_label)
+
+        effective_label = wait_label
+        if wait_label and show_batch_index and num_to_upload > 1:
+            effective_label = f"{wait_label} ({idx + 1}/{num_to_upload})"
+
+        url = await upload_file_to_comfyapi(cls, img_io, img_io.name, mime_type, effective_label, batch_start_ts)
         download_urls.append(url)
     return download_urls
 
@@ -126,8 +134,9 @@ async def upload_file_to_comfyapi(
     cls: type[IO.ComfyNode],
     file_bytes_io: BytesIO,
     filename: str,
-    upload_mime_type: Optional[str],
-    wait_label: Optional[str] = "Uploading",
+    upload_mime_type: str | None,
+    wait_label: str | None = "Uploading",
+    progress_origin_ts: float | None = None,
 ) -> str:
     """Uploads a single file to ComfyUI API and returns its download URL."""
     if upload_mime_type is None:
@@ -148,6 +157,7 @@ async def upload_file_to_comfyapi(
         file_bytes_io,
         content_type=upload_mime_type,
         wait_label=wait_label,
+        progress_origin_ts=progress_origin_ts,
     )
     return create_resp.download_url
 
@@ -155,27 +165,18 @@ async def upload_file_to_comfyapi(
 async def upload_file(
     cls: type[IO.ComfyNode],
     upload_url: str,
-    file: Union[BytesIO, str],
+    file: BytesIO | str,
     *,
-    content_type: Optional[str] = None,
+    content_type: str | None = None,
     max_retries: int = 3,
     retry_delay: float = 1.0,
     retry_backoff: float = 2.0,
-    wait_label: Optional[str] = None,
+    wait_label: str | None = None,
+    progress_origin_ts: float | None = None,
 ) -> None:
     """
     Upload a file to a signed URL (e.g., S3 pre-signed PUT) with retries, Comfy progress display, and interruption.
 
-    Args:
-        cls: Node class (provides auth context + UI progress hooks).
-        upload_url: Pre-signed PUT URL.
-        file: BytesIO or path string.
-        content_type: Explicit MIME type. If None, we *suppress* Content-Type.
-        max_retries: Maximum retry attempts.
-        retry_delay: Initial delay in seconds.
-        retry_backoff: Exponential backoff factor.
-        wait_label: Progress label shown in Comfy UI.
-
     Raises:
         ProcessingInterrupted, LocalNetworkError, ApiServerError, Exception
     """
@@ -198,7 +199,7 @@ async def upload_file(
 
     attempt = 0
     delay = retry_delay
-    start_ts = time.monotonic()
+    start_ts = progress_origin_ts if progress_origin_ts is not None else time.monotonic()
     op_uuid = uuid.uuid4().hex[:8]
     while True:
         attempt += 1

From 8908ee262862f1252d1363d55c59872fb3361066 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 26 Nov 2025 20:38:30 +0200
Subject: [PATCH 16/24] fix(gemini): use first 10 images as fileData (URLs) and
 remaining images as inline base64 (#10918)

---
 comfy_api_nodes/apis/gemini_api.py |  6 ++++
 comfy_api_nodes/nodes_gemini.py    | 55 ++++++++++++++++++++----------
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/comfy_api_nodes/apis/gemini_api.py b/comfy_api_nodes/apis/gemini_api.py
index d34590d28..a380ecc86 100644
--- a/comfy_api_nodes/apis/gemini_api.py
+++ b/comfy_api_nodes/apis/gemini_api.py
@@ -58,8 +58,14 @@ class GeminiInlineData(BaseModel):
     mimeType: GeminiMimeType | None = Field(None)
 
 
+class GeminiFileData(BaseModel):
+    fileUri: str | None = Field(None)
+    mimeType: GeminiMimeType | None = Field(None)
+
+
 class GeminiPart(BaseModel):
     inlineData: GeminiInlineData | None = Field(None)
+    fileData: GeminiFileData | None = Field(None)
     text: str | None = Field(None)
 
 
diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py
index 938a20f84..976d9c225 100644
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -20,6 +20,7 @@ from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api.util import VideoCodec, VideoContainer
 from comfy_api_nodes.apis.gemini_api import (
     GeminiContent,
+    GeminiFileData,
     GeminiGenerateContentRequest,
     GeminiGenerateContentResponse,
     GeminiImageConfig,
@@ -38,6 +39,7 @@ from comfy_api_nodes.util import (
     get_number_of_images,
     sync_op,
     tensor_to_base64_string,
+    upload_images_to_comfyapi,
     validate_string,
     video_to_base64_string,
 )
@@ -68,24 +70,43 @@ class GeminiImageModel(str, Enum):
     gemini_2_5_flash_image = "gemini-2.5-flash-image"
 
 
-def create_image_parts(image_input: torch.Tensor) -> list[GeminiPart]:
-    """
-    Convert image tensor input to Gemini API compatible parts.
-
-    Args:
-        image_input: Batch of image tensors from ComfyUI.
-
-    Returns:
-        List of GeminiPart objects containing the encoded images.
-    """
+async def create_image_parts(
+    cls: type[IO.ComfyNode],
+    images: torch.Tensor,
+    image_limit: int = 0,
+) -> list[GeminiPart]:
     image_parts: list[GeminiPart] = []
-    for image_index in range(image_input.shape[0]):
-        image_as_b64 = tensor_to_base64_string(image_input[image_index].unsqueeze(0))
+    if image_limit < 0:
+        raise ValueError("image_limit must be greater than or equal to 0 when creating Gemini image parts.")
+    total_images = get_number_of_images(images)
+    if total_images <= 0:
+        raise ValueError("No images provided to create_image_parts; at least one image is required.")
+
+    # If image_limit == 0 --> use all images; otherwise clamp to image_limit.
+    effective_max = total_images if image_limit == 0 else min(total_images, image_limit)
+
+    # Number of images we'll send as URLs (fileData)
+    num_url_images = min(effective_max, 10)  # Vertex API max number of image links
+    reference_images_urls = await upload_images_to_comfyapi(
+        cls,
+        images,
+        max_images=num_url_images,
+    )
+    for reference_image_url in reference_images_urls:
+        image_parts.append(
+            GeminiPart(
+                fileData=GeminiFileData(
+                    mimeType=GeminiMimeType.image_png,
+                    fileUri=reference_image_url,
+                )
+            )
+        )
+    for idx in range(num_url_images, effective_max):
         image_parts.append(
             GeminiPart(
                 inlineData=GeminiInlineData(
                     mimeType=GeminiMimeType.image_png,
-                    data=image_as_b64,
+                    data=tensor_to_base64_string(images[idx]),
                 )
             )
         )
@@ -338,8 +359,7 @@ class GeminiNode(IO.ComfyNode):
 
         # Add other modal parts
         if images is not None:
-            image_parts = create_image_parts(images)
-            parts.extend(image_parts)
+            parts.extend(await create_image_parts(cls, images))
         if audio is not None:
             parts.extend(cls.create_audio_parts(audio))
         if video is not None:
@@ -562,8 +582,7 @@ class GeminiImage(IO.ComfyNode):
         image_config = GeminiImageConfig(aspectRatio=aspect_ratio)
 
         if images is not None:
-            image_parts = create_image_parts(images)
-            parts.extend(image_parts)
+            parts.extend(await create_image_parts(cls, images))
         if files is not None:
             parts.extend(files)
 
@@ -702,7 +721,7 @@ class GeminiImage2(IO.ComfyNode):
         if images is not None:
             if get_number_of_images(images) > 14:
                 raise ValueError("The current maximum number of supported images is 14.")
-            parts.extend(create_image_parts(images))
+            parts.extend(await create_image_parts(cls, images))
         if files is not None:
             parts.extend(files)
 

From 234c3dc85f7e61a537bbf6d8999c5880c5e0b746 Mon Sep 17 00:00:00 2001
From: Christian Byrne <cbyrne@comfy.org>
Date: Wed, 26 Nov 2025 11:58:08 -0800
Subject: [PATCH 17/24] Bump frontend to 1.32.9 (#10867)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 5f20816d6..9291552d3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.30.6
+comfyui-frontend-package==1.32.9
 comfyui-workflow-templates==0.7.20
 comfyui-embedded-docs==0.3.1
 torch

From 58c6ed541d5aaf6d9b12f63bc23c33164e1cf7a3 Mon Sep 17 00:00:00 2001
From: Terry Jia <terryjia88@gmail.com>
Date: Wed, 26 Nov 2025 14:58:27 -0500
Subject: [PATCH 18/24] Merge 3d animation node (#10025)

---
 comfy_extras/nodes_load_3d.py | 110 +++++++---------------------------
 1 file changed, 23 insertions(+), 87 deletions(-)

diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py
index 899608149..54c66ef68 100644
--- a/comfy_extras/nodes_load_3d.py
+++ b/comfy_extras/nodes_load_3d.py
@@ -7,6 +7,10 @@ from comfy_api.input_impl import VideoFromFile
 
 from pathlib import Path
 
+from PIL import Image
+import numpy as np
+
+import uuid
 
 def normalize_path(path):
     return path.replace('\\', '/')
@@ -34,58 +38,6 @@ class Load3D():
             "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
         }}
 
-    RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "IMAGE", "LOAD3D_CAMERA", IO.VIDEO)
-    RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "lineart", "camera_info", "recording_video")
-
-    FUNCTION = "process"
-    EXPERIMENTAL = True
-
-    CATEGORY = "3d"
-
-    def process(self, model_file, image, **kwargs):
-        image_path = folder_paths.get_annotated_filepath(image['image'])
-        mask_path = folder_paths.get_annotated_filepath(image['mask'])
-        normal_path = folder_paths.get_annotated_filepath(image['normal'])
-        lineart_path = folder_paths.get_annotated_filepath(image['lineart'])
-
-        load_image_node = nodes.LoadImage()
-        output_image, ignore_mask = load_image_node.load_image(image=image_path)
-        ignore_image, output_mask = load_image_node.load_image(image=mask_path)
-        normal_image, ignore_mask2 = load_image_node.load_image(image=normal_path)
-        lineart_image, ignore_mask3 = load_image_node.load_image(image=lineart_path)
-
-        video = None
-
-        if image['recording'] != "":
-            recording_video_path = folder_paths.get_annotated_filepath(image['recording'])
-
-            video = VideoFromFile(recording_video_path)
-
-        return output_image, output_mask, model_file, normal_image, lineart_image, image['camera_info'], video
-
-class Load3DAnimation():
-    @classmethod
-    def INPUT_TYPES(s):
-        input_dir = os.path.join(folder_paths.get_input_directory(), "3d")
-
-        os.makedirs(input_dir, exist_ok=True)
-
-        input_path = Path(input_dir)
-        base_path = Path(folder_paths.get_input_directory())
-
-        files = [
-            normalize_path(str(file_path.relative_to(base_path)))
-            for file_path in input_path.rglob("*")
-            if file_path.suffix.lower() in {'.gltf', '.glb', '.fbx'}
-        ]
-
-        return {"required": {
-            "model_file": (sorted(files), {"file_upload": True}),
-            "image": ("LOAD_3D_ANIMATION", {}),
-            "width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
-            "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}),
-        }}
-
     RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "LOAD3D_CAMERA", IO.VIDEO)
     RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "camera_info", "recording_video")
 
@@ -120,7 +72,8 @@ class Preview3D():
             "model_file": ("STRING", {"default": "", "multiline": False}),
         },
         "optional": {
-            "camera_info": ("LOAD3D_CAMERA", {})
+            "camera_info": ("LOAD3D_CAMERA", {}),
+            "bg_image": ("IMAGE", {})
         }}
 
     OUTPUT_NODE = True
@@ -133,50 +86,33 @@ class Preview3D():
 
     def process(self, model_file, **kwargs):
         camera_info = kwargs.get("camera_info", None)
+        bg_image = kwargs.get("bg_image", None)
+
+        bg_image_path = None
+        if bg_image is not None:
+
+            img_array = (bg_image[0].cpu().numpy() * 255).astype(np.uint8)
+            img = Image.fromarray(img_array)
+
+            temp_dir = folder_paths.get_temp_directory()
+            filename = f"bg_{uuid.uuid4().hex}.png"
+            bg_image_path = os.path.join(temp_dir, filename)
+            img.save(bg_image_path, compress_level=1)
+
+            bg_image_path = f"temp/{filename}"
 
         return {
             "ui": {
-                "result": [model_file, camera_info]
-            }
-        }
-
-class Preview3DAnimation():
-    @classmethod
-    def INPUT_TYPES(s):
-        return {"required": {
-            "model_file": ("STRING", {"default": "", "multiline": False}),
-        },
-        "optional": {
-            "camera_info": ("LOAD3D_CAMERA", {})
-        }}
-
-    OUTPUT_NODE = True
-    RETURN_TYPES = ()
-
-    CATEGORY = "3d"
-
-    FUNCTION = "process"
-    EXPERIMENTAL = True
-
-    def process(self, model_file, **kwargs):
-        camera_info = kwargs.get("camera_info", None)
-
-        return {
-            "ui": {
-                "result": [model_file, camera_info]
+                "result": [model_file, camera_info, bg_image_path]
             }
         }
 
 NODE_CLASS_MAPPINGS = {
     "Load3D": Load3D,
-    "Load3DAnimation": Load3DAnimation,
     "Preview3D": Preview3D,
-    "Preview3DAnimation": Preview3DAnimation
 }
 
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "Load3D": "Load 3D",
-    "Load3DAnimation": "Load 3D - Animation",
-    "Preview3D": "Preview 3D",
-    "Preview3DAnimation": "Preview 3D - Animation"
+    "Load3D": "Load 3D & Animation",
+    "Preview3D": "Preview 3D & Animation",
 }

From 55f654db3ddaf5a10ac6dbe79774c23c350d279d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 26 Nov 2025 12:16:40 -0800
Subject: [PATCH 19/24] Fix the CSP offline feature. (#10923)

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 0fd2e49e3..fca5050bd 100644
--- a/server.py
+++ b/server.py
@@ -174,7 +174,7 @@ def create_block_external_middleware():
         else:
             response = await handler(request)
 
-        response.headers['Content-Security-Policy'] = "default-src 'self'; script-src 'self' 'unsafe-inline' blob:; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self'; frame-src 'self'; object-src 'self';"
+        response.headers['Content-Security-Policy'] = "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; font-src 'self'; connect-src 'self'; frame-src 'self'; object-src 'self';"
         return response
 
     return block_external_middleware

From dd41b745497cdbbafb0bd745f590726b0e41f9f3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 26 Nov 2025 12:36:38 -0800
Subject: [PATCH 20/24] Add Z Image to readme. (#10924)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index b9300ab07..91fb510e1 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
    - [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
    - [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
    - [Flux 2](https://comfyanonymous.github.io/ComfyUI_examples/flux2/)
+   - [Z Image](https://comfyanonymous.github.io/ComfyUI_examples/z_image/)
 - Image Editing Models
    - [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
    - [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)

From d8433c63fdacef24f40da401b02ebba272bf1fbb Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 27 Nov 2025 00:42:01 +0200
Subject: [PATCH 21/24] chore(api-nodes): remove chat widgets from
 OpenAI/Gemini nodes (#10861)

---
 comfy_api_nodes/nodes_gemini.py | 77 +--------------------------------
 comfy_api_nodes/nodes_openai.py | 46 ++++----------------
 2 files changed, 11 insertions(+), 112 deletions(-)

diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py
index 976d9c225..08f7b0f64 100644
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -4,10 +4,7 @@ See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/infer
 """
 
 import base64
-import json
 import os
-import time
-import uuid
 from enum import Enum
 from io import BytesIO
 from typing import Literal
@@ -43,7 +40,6 @@ from comfy_api_nodes.util import (
     validate_string,
     video_to_base64_string,
 )
-from server import PromptServer
 
 GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini"
 GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024  # 20 MB
@@ -384,29 +380,6 @@ class GeminiNode(IO.ComfyNode):
         )
 
         output_text = get_text_from_response(response)
-        if output_text:
-            # Not a true chat history like the OpenAI Chat node. It is emulated so the frontend can show a copy button.
-            render_spec = {
-                "node_id": cls.hidden.unique_id,
-                "component": "ChatHistoryWidget",
-                "props": {
-                    "history": json.dumps(
-                        [
-                            {
-                                "prompt": prompt,
-                                "response": output_text,
-                                "response_id": str(uuid.uuid4()),
-                                "timestamp": time.time(),
-                            }
-                        ]
-                    ),
-                },
-            }
-            PromptServer.instance.send_sync(
-                "display_component",
-                render_spec,
-            )
-
         return IO.NodeOutput(output_text or "Empty response from Gemini model...")
 
 
@@ -601,30 +574,7 @@ class GeminiImage(IO.ComfyNode):
             response_model=GeminiGenerateContentResponse,
             price_extractor=calculate_tokens_price,
         )
-
-        output_text = get_text_from_response(response)
-        if output_text:
-            render_spec = {
-                "node_id": cls.hidden.unique_id,
-                "component": "ChatHistoryWidget",
-                "props": {
-                    "history": json.dumps(
-                        [
-                            {
-                                "prompt": prompt,
-                                "response": output_text,
-                                "response_id": str(uuid.uuid4()),
-                                "timestamp": time.time(),
-                            }
-                        ]
-                    ),
-                },
-            }
-            PromptServer.instance.send_sync(
-                "display_component",
-                render_spec,
-            )
-        return IO.NodeOutput(get_image_from_response(response), output_text)
+        return IO.NodeOutput(get_image_from_response(response), get_text_from_response(response))
 
 
 class GeminiImage2(IO.ComfyNode):
@@ -744,30 +694,7 @@ class GeminiImage2(IO.ComfyNode):
             response_model=GeminiGenerateContentResponse,
             price_extractor=calculate_tokens_price,
         )
-
-        output_text = get_text_from_response(response)
-        if output_text:
-            render_spec = {
-                "node_id": cls.hidden.unique_id,
-                "component": "ChatHistoryWidget",
-                "props": {
-                    "history": json.dumps(
-                        [
-                            {
-                                "prompt": prompt,
-                                "response": output_text,
-                                "response_id": str(uuid.uuid4()),
-                                "timestamp": time.time(),
-                            }
-                        ]
-                    ),
-                },
-            }
-            PromptServer.instance.send_sync(
-                "display_component",
-                render_spec,
-            )
-        return IO.NodeOutput(get_image_from_response(response), output_text)
+        return IO.NodeOutput(get_image_from_response(response), get_text_from_response(response))
 
 
 class GeminiExtension(ComfyExtension):
diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index acf35d276..c8da5464b 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -1,15 +1,10 @@
 from io import BytesIO
-from typing import Optional, Union
-import json
 import os
-import time
-import uuid
 from enum import Enum
 from inspect import cleandoc
 import numpy as np
 import torch
 from PIL import Image
-from server import PromptServer
 import folder_paths
 import base64
 from comfy_api.latest import IO, ComfyExtension
@@ -587,11 +582,11 @@ class OpenAIChatNode(IO.ComfyNode):
     def create_input_message_contents(
         cls,
         prompt: str,
-        image: Optional[torch.Tensor] = None,
-        files: Optional[list[InputFileContent]] = None,
+        image: torch.Tensor | None = None,
+        files: list[InputFileContent] | None = None,
     ) -> InputMessageContentList:
         """Create a list of input message contents from prompt and optional image."""
-        content_list: list[Union[InputContent, InputTextContent, InputImageContent, InputFileContent]] = [
+        content_list: list[InputContent | InputTextContent | InputImageContent | InputFileContent] = [
             InputTextContent(text=prompt, type="input_text"),
         ]
         if image is not None:
@@ -617,9 +612,9 @@ class OpenAIChatNode(IO.ComfyNode):
         prompt: str,
         persist_context: bool = False,
         model: SupportedOpenAIModel = SupportedOpenAIModel.gpt_5.value,
-        images: Optional[torch.Tensor] = None,
-        files: Optional[list[InputFileContent]] = None,
-        advanced_options: Optional[CreateModelResponseProperties] = None,
+        images: torch.Tensor | None = None,
+        files: list[InputFileContent] | None = None,
+        advanced_options: CreateModelResponseProperties | None = None,
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False)
 
@@ -660,30 +655,7 @@ class OpenAIChatNode(IO.ComfyNode):
                 status_extractor=lambda response: response.status,
                 completed_statuses=["incomplete", "completed"]
             )
-        output_text = cls.get_text_from_message_content(cls.get_message_content_from_response(result_response))
-
-        # Update history
-        render_spec = {
-            "node_id": cls.hidden.unique_id,
-            "component": "ChatHistoryWidget",
-            "props": {
-                "history": json.dumps(
-                    [
-                        {
-                            "prompt": prompt,
-                            "response": output_text,
-                            "response_id": str(uuid.uuid4()),
-                            "timestamp": time.time(),
-                        }
-                    ]
-                ),
-            },
-        }
-        PromptServer.instance.send_sync(
-            "display_component",
-            render_spec,
-        )
-        return IO.NodeOutput(output_text)
+        return IO.NodeOutput(cls.get_text_from_message_content(cls.get_message_content_from_response(result_response)))
 
 
 class OpenAIInputFiles(IO.ComfyNode):
@@ -790,8 +762,8 @@ class OpenAIChatConfig(IO.ComfyNode):
     def execute(
         cls,
         truncation: bool,
-        instructions: Optional[str] = None,
-        max_output_tokens: Optional[int] = None,
+        instructions: str | None = None,
+        max_output_tokens: int | None = None,
     ) -> IO.NodeOutput:
         """
         Configure advanced options for the OpenAI Chat Node.

From a2d60aad0f8e03657d501842460123f6eaaf6791 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Thu, 27 Nov 2025 00:55:31 +0200
Subject: [PATCH 22/24] convert nodes_customer_sampler.py to V3 schema (#10206)

---
 comfy_extras/nodes_custom_sampler.py | 1182 ++++++++++++++------------
 1 file changed, 633 insertions(+), 549 deletions(-)

diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index d011f433b..fbb080886 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -3,272 +3,312 @@ import comfy.samplers
 import comfy.sample
 from comfy.k_diffusion import sampling as k_diffusion_sampling
 from comfy.k_diffusion import sa_solver
-from comfy.comfy_types import IO, ComfyNodeABC, InputTypeDict
 import latent_preview
 import torch
 import comfy.utils
 import node_helpers
+from typing_extensions import override
+from comfy_api.latest import ComfyExtension, io
 
 
-class BasicScheduler:
+class BasicScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                     "scheduler": (comfy.samplers.SCHEDULER_NAMES, ),
-                     "steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                      }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BasicScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Combo.Input("scheduler", options=comfy.samplers.SCHEDULER_NAMES),
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, model, scheduler, steps, denoise):
+    @classmethod
+    def execute(cls, model, scheduler, steps, denoise) -> io.NodeOutput:
         total_steps = steps
         if denoise < 1.0:
             if denoise <= 0.0:
-                return (torch.FloatTensor([]),)
+                return io.NodeOutput(torch.FloatTensor([]))
             total_steps = int(steps/denoise)
 
         sigmas = comfy.samplers.calculate_sigmas(model.get_model_object("model_sampling"), scheduler, total_steps).cpu()
         sigmas = sigmas[-(steps + 1):]
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
+
+    get_sigmas = execute
 
 
-class KarrasScheduler:
+class KarrasScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "rho": ("FLOAT", {"default": 7.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="KarrasScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("rho", default=7.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, sigma_max, sigma_min, rho):
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, rho) -> io.NodeOutput:
         sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class ExponentialScheduler:
+    get_sigmas = execute
+
+class ExponentialScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ExponentialScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, sigma_max, sigma_min):
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min) -> io.NodeOutput:
         sigmas = k_diffusion_sampling.get_sigmas_exponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class PolyexponentialScheduler:
+    get_sigmas = execute
+
+class PolyexponentialScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "rho": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="PolyexponentialScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("rho", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, sigma_max, sigma_min, rho):
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, rho) -> io.NodeOutput:
         sigmas = k_diffusion_sampling.get_sigmas_polyexponential(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, rho=rho)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class LaplaceScheduler:
+    get_sigmas = execute
+
+class LaplaceScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "sigma_max": ("FLOAT", {"default": 14.614642, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "sigma_min": ("FLOAT", {"default": 0.0291675, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "mu": ("FLOAT", {"default": 0.0, "min": -10.0, "max": 10.0, "step":0.1, "round": False}),
-                     "beta": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 10.0, "step":0.1, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LaplaceScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.1, round=False),
+                io.Float.Input("beta", default=0.5, min=0.0, max=10.0, step=0.1, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, sigma_max, sigma_min, mu, beta):
+    @classmethod
+    def execute(cls, steps, sigma_max, sigma_min, mu, beta) -> io.NodeOutput:
         sigmas = k_diffusion_sampling.get_sigmas_laplace(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, mu=mu, beta=beta)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
+
+    get_sigmas = execute
 
 
-class SDTurboScheduler:
+class SDTurboScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                     "steps": ("INT", {"default": 1, "min": 1, "max": 10}),
-                     "denoise": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
-                      }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SDTurboScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("steps", default=1, min=1, max=10),
+                io.Float.Input("denoise", default=1.0, min=0, max=1.0, step=0.01),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, model, steps, denoise):
+    @classmethod
+    def execute(cls, model, steps, denoise) -> io.NodeOutput:
         start_step = 10 - int(10 * denoise)
         timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps]
         sigmas = model.get_model_object("model_sampling").sigma(timesteps)
         sigmas = torch.cat([sigmas, sigmas.new_zeros([1])])
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class BetaSamplingScheduler:
+    get_sigmas = execute
+
+class BetaSamplingScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                     "steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "alpha": ("FLOAT", {"default": 0.6, "min": 0.0, "max": 50.0, "step":0.01, "round": False}),
-                     "beta": ("FLOAT", {"default": 0.6, "min": 0.0, "max": 50.0, "step":0.01, "round": False}),
-                      }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BetaSamplingScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("alpha", default=0.6, min=0.0, max=50.0, step=0.01, round=False),
+                io.Float.Input("beta", default=0.6, min=0.0, max=50.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, model, steps, alpha, beta):
+    @classmethod
+    def execute(cls, model, steps, alpha, beta) -> io.NodeOutput:
         sigmas = comfy.samplers.beta_scheduler(model.get_model_object("model_sampling"), steps, alpha=alpha, beta=beta)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class VPScheduler:
+    get_sigmas = execute
+
+class VPScheduler(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"steps": ("INT", {"default": 20, "min": 1, "max": 10000}),
-                     "beta_d": ("FLOAT", {"default": 19.9, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}), #TODO: fix default values
-                     "beta_min": ("FLOAT", {"default": 0.1, "min": 0.0, "max": 5000.0, "step":0.01, "round": False}),
-                     "eps_s": ("FLOAT", {"default": 0.001, "min": 0.0, "max": 1.0, "step":0.0001, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/schedulers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="VPScheduler",
+            category="sampling/custom_sampling/schedulers",
+            inputs=[
+                io.Int.Input("steps", default=20, min=1, max=10000),
+                io.Float.Input("beta_d", default=19.9, min=0.0, max=5000.0, step=0.01, round=False), #TODO: fix default values
+                io.Float.Input("beta_min", default=0.1, min=0.0, max=5000.0, step=0.01, round=False),
+                io.Float.Input("eps_s", default=0.001, min=0.0, max=1.0, step=0.0001, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, steps, beta_d, beta_min, eps_s):
+    @classmethod
+    def execute(cls, steps, beta_d, beta_min, eps_s) -> io.NodeOutput:
         sigmas = k_diffusion_sampling.get_sigmas_vp(n=steps, beta_d=beta_d, beta_min=beta_min, eps_s=eps_s)
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class SplitSigmas:
+    get_sigmas = execute
+
+class SplitSigmas(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sigmas": ("SIGMAS", ),
-                    "step": ("INT", {"default": 0, "min": 0, "max": 10000}),
-                     }
-                }
-    RETURN_TYPES = ("SIGMAS","SIGMAS")
-    RETURN_NAMES = ("high_sigmas", "low_sigmas")
-    CATEGORY = "sampling/custom_sampling/sigmas"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SplitSigmas",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Int.Input("step", default=0, min=0, max=10000),
+            ],
+            outputs=[
+                io.Sigmas.Output(display_name="high_sigmas"),
+                io.Sigmas.Output(display_name="low_sigmas"),
+            ]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, sigmas, step):
+    @classmethod
+    def execute(cls, sigmas, step) -> io.NodeOutput:
         sigmas1 = sigmas[:step + 1]
         sigmas2 = sigmas[step:]
-        return (sigmas1, sigmas2)
+        return io.NodeOutput(sigmas1, sigmas2)
 
-class SplitSigmasDenoise:
+    get_sigmas = execute
+
+class SplitSigmasDenoise(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sigmas": ("SIGMAS", ),
-                    "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
-                     }
-                }
-    RETURN_TYPES = ("SIGMAS","SIGMAS")
-    RETURN_NAMES = ("high_sigmas", "low_sigmas")
-    CATEGORY = "sampling/custom_sampling/sigmas"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SplitSigmasDenoise",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
+            ],
+            outputs=[
+                io.Sigmas.Output(display_name="high_sigmas"),
+                io.Sigmas.Output(display_name="low_sigmas"),
+            ]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, sigmas, denoise):
+    @classmethod
+    def execute(cls, sigmas, denoise) -> io.NodeOutput:
         steps = max(sigmas.shape[-1] - 1, 0)
         total_steps = round(steps * denoise)
         sigmas1 = sigmas[:-(total_steps)]
         sigmas2 = sigmas[-(total_steps + 1):]
-        return (sigmas1, sigmas2)
+        return io.NodeOutput(sigmas1, sigmas2)
 
-class FlipSigmas:
+    get_sigmas = execute
+
+class FlipSigmas(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sigmas": ("SIGMAS", ),
-                     }
-                }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/sigmas"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="FlipSigmas",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[io.Sigmas.Input("sigmas")],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "get_sigmas"
-
-    def get_sigmas(self, sigmas):
+    @classmethod
+    def execute(cls, sigmas) -> io.NodeOutput:
         if len(sigmas) == 0:
-            return (sigmas,)
+            return io.NodeOutput(sigmas)
 
         sigmas = sigmas.flip(0)
         if sigmas[0] == 0:
             sigmas[0] = 0.0001
-        return (sigmas,)
+        return io.NodeOutput(sigmas)
 
-class SetFirstSigma:
+    get_sigmas = execute
+
+class SetFirstSigma(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sigmas": ("SIGMAS", ),
-                     "sigma": ("FLOAT", {"default": 136.0, "min": 0.0, "max": 20000.0, "step": 0.001, "round": False}),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/sigmas"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SetFirstSigma",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Float.Input("sigma", default=136.0, min=0.0, max=20000.0, step=0.001, round=False),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "set_first_sigma"
-
-    def set_first_sigma(self, sigmas, sigma):
+    @classmethod
+    def execute(cls, sigmas, sigma) -> io.NodeOutput:
         sigmas = sigmas.clone()
         sigmas[0] = sigma
-        return (sigmas, )
+        return io.NodeOutput(sigmas)
 
-class ExtendIntermediateSigmas:
+    set_first_sigma = execute
+
+class ExtendIntermediateSigmas(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sigmas": ("SIGMAS", ),
-                     "steps": ("INT", {"default": 2, "min": 1, "max": 100}),
-                     "start_at_sigma": ("FLOAT", {"default": -1.0, "min": -1.0, "max": 20000.0, "step": 0.01, "round": False}),
-                     "end_at_sigma": ("FLOAT", {"default": 12.0, "min":  0.0, "max": 20000.0, "step": 0.01, "round": False}),
-                     "spacing": (['linear', 'cosine', 'sine'],),
-                    }
-               }
-    RETURN_TYPES = ("SIGMAS",)
-    CATEGORY = "sampling/custom_sampling/sigmas"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ExtendIntermediateSigmas",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Sigmas.Input("sigmas"),
+                io.Int.Input("steps", default=2, min=1, max=100),
+                io.Float.Input("start_at_sigma", default=-1.0, min=-1.0, max=20000.0, step=0.01, round=False),
+                io.Float.Input("end_at_sigma", default=12.0, min=0.0, max=20000.0, step=0.01, round=False),
+                io.Combo.Input("spacing", options=['linear', 'cosine', 'sine']),
+            ],
+            outputs=[io.Sigmas.Output()]
+        )
 
-    FUNCTION = "extend"
-
-    def extend(self, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str):
+    @classmethod
+    def execute(cls, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str) -> io.NodeOutput:
         if start_at_sigma < 0:
             start_at_sigma = float("inf")
 
@@ -299,27 +339,27 @@ class ExtendIntermediateSigmas:
 
         extended_sigmas = torch.FloatTensor(extended_sigmas)
 
-        return (extended_sigmas,)
+        return io.NodeOutput(extended_sigmas)
+
+    extend = execute
 
 
-class SamplingPercentToSigma:
+class SamplingPercentToSigma(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "model": (IO.MODEL, {}),
-                "sampling_percent": (IO.FLOAT, {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.0001}),
-                "return_actual_sigma": (IO.BOOLEAN, {"default": False, "tooltip": "Return the actual sigma value instead of the value used for interval checks.\nThis only affects results at 0.0 and 1.0."}),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplingPercentToSigma",
+            category="sampling/custom_sampling/sigmas",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("sampling_percent", default=0.0, min=0.0, max=1.0, step=0.0001),
+                io.Boolean.Input("return_actual_sigma", default=False, tooltip="Return the actual sigma value instead of the value used for interval checks.\nThis only affects results at 0.0 and 1.0."),
+            ],
+            outputs=[io.Float.Output(display_name="sigma_value")]
+        )
 
-    RETURN_TYPES = (IO.FLOAT,)
-    RETURN_NAMES = ("sigma_value",)
-    CATEGORY = "sampling/custom_sampling/sigmas"
-
-    FUNCTION = "get_sigma"
-
-    def get_sigma(self, model, sampling_percent, return_actual_sigma):
+    @classmethod
+    def execute(cls, model, sampling_percent, return_actual_sigma) -> io.NodeOutput:
         model_sampling = model.get_model_object("model_sampling")
         sigma_val = model_sampling.percent_to_sigma(sampling_percent)
         if return_actual_sigma:
@@ -327,212 +367,234 @@ class SamplingPercentToSigma:
                 sigma_val = model_sampling.sigma_max.item()
             elif sampling_percent == 1.0:
                 sigma_val = model_sampling.sigma_min.item()
-        return (sigma_val,)
+        return io.NodeOutput(sigma_val)
+
+    get_sigma = execute
 
 
-class KSamplerSelect:
+class KSamplerSelect(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"sampler_name": (comfy.samplers.SAMPLER_NAMES, ),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="KSamplerSelect",
+            category="sampling/custom_sampling/samplers",
+            inputs=[io.Combo.Input("sampler_name", options=comfy.samplers.SAMPLER_NAMES)],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, sampler_name):
+    @classmethod
+    def execute(cls, sampler_name) -> io.NodeOutput:
         sampler = comfy.samplers.sampler_object(sampler_name)
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerDPMPP_3M_SDE:
+    get_sampler = execute
+
+class SamplerDPMPP_3M_SDE(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "noise_device": (['gpu', 'cpu'], ),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_3M_SDE",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, eta, s_noise, noise_device):
+    @classmethod
+    def execute(cls, eta, s_noise, noise_device) -> io.NodeOutput:
         if noise_device == 'cpu':
             sampler_name = "dpmpp_3m_sde"
         else:
             sampler_name = "dpmpp_3m_sde_gpu"
         sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerDPMPP_2M_SDE:
+    get_sampler = execute
+
+class SamplerDPMPP_2M_SDE(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"solver_type": (['midpoint', 'heun'], ),
-                     "eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "noise_device": (['gpu', 'cpu'], ),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_2M_SDE",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("solver_type", options=['midpoint', 'heun']),
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, solver_type, eta, s_noise, noise_device):
+    @classmethod
+    def execute(cls, solver_type, eta, s_noise, noise_device) -> io.NodeOutput:
         if noise_device == 'cpu':
             sampler_name = "dpmpp_2m_sde"
         else:
             sampler_name = "dpmpp_2m_sde_gpu"
         sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "solver_type": solver_type})
-        return (sampler, )
+        return io.NodeOutput(sampler)
+
+    get_sampler = execute
 
 
-class SamplerDPMPP_SDE:
+class SamplerDPMPP_SDE(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "r": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "noise_device": (['gpu', 'cpu'], ),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_SDE",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("r", default=0.5, min=0.0, max=100.0, step=0.01, round=False),
+                io.Combo.Input("noise_device", options=['gpu', 'cpu']),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, eta, s_noise, r, noise_device):
+    @classmethod
+    def execute(cls, eta, s_noise, r, noise_device) -> io.NodeOutput:
         if noise_device == 'cpu':
             sampler_name = "dpmpp_sde"
         else:
             sampler_name = "dpmpp_sde_gpu"
         sampler = comfy.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "r": r})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerDPMPP_2S_Ancestral:
+    get_sampler = execute
+
+class SamplerDPMPP_2S_Ancestral(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMPP_2S_Ancestral",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, eta, s_noise):
+    @classmethod
+    def execute(cls, eta, s_noise) -> io.NodeOutput:
         sampler = comfy.samplers.ksampler("dpmpp_2s_ancestral", {"eta": eta, "s_noise": s_noise})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerEulerAncestral:
+    get_sampler = execute
+
+class SamplerEulerAncestral(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerEulerAncestral",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, eta, s_noise):
+    @classmethod
+    def execute(cls, eta, s_noise) -> io.NodeOutput:
         sampler = comfy.samplers.ksampler("euler_ancestral", {"eta": eta, "s_noise": s_noise})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerEulerAncestralCFGPP:
+    get_sampler = execute
+
+class SamplerEulerAncestralCFGPP(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "eta": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step":0.01, "round": False}),
-                "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step":0.01, "round": False}),
-            }}
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerEulerAncestralCFGPP",
+            display_name="SamplerEulerAncestralCFG++",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Float.Input("eta", default=1.0, min=0.0, max=1.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=10.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, eta, s_noise):
+    @classmethod
+    def execute(cls, eta, s_noise) -> io.NodeOutput:
         sampler = comfy.samplers.ksampler(
             "euler_ancestral_cfg_pp",
             {"eta": eta, "s_noise": s_noise})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerLMS:
+    get_sampler = execute
+
+class SamplerLMS(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"order": ("INT", {"default": 4, "min": 1, "max": 100}),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerLMS",
+            category="sampling/custom_sampling/samplers",
+            inputs=[io.Int.Input("order", default=4, min=1, max=100)],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, order):
+    @classmethod
+    def execute(cls, order) -> io.NodeOutput:
         sampler = comfy.samplers.ksampler("lms", {"order": order})
-        return (sampler, )
+        return io.NodeOutput(sampler)
 
-class SamplerDPMAdaptative:
+    get_sampler = execute
+
+class SamplerDPMAdaptative(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"order": ("INT", {"default": 3, "min": 2, "max": 3}),
-                     "rtol": ("FLOAT", {"default": 0.05, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "atol": ("FLOAT", {"default": 0.0078, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "h_init": ("FLOAT", {"default": 0.05, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "pcoeff": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "icoeff": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "dcoeff": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "accept_safety": ("FLOAT", {"default": 0.81, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "eta": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                     "s_noise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0, "step":0.01, "round": False}),
-                      }
-               }
-    RETURN_TYPES = ("SAMPLER",)
-    CATEGORY = "sampling/custom_sampling/samplers"
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerDPMAdaptative",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Int.Input("order", default=3, min=2, max=3),
+                io.Float.Input("rtol", default=0.05, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("atol", default=0.0078, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("h_init", default=0.05, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("pcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("icoeff", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("dcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("accept_safety", default=0.81, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("eta", default=0.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise):
+    @classmethod
+    def execute(cls, order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise) -> io.NodeOutput:
         sampler = comfy.samplers.ksampler("dpm_adaptive", {"order": order, "rtol": rtol, "atol": atol, "h_init": h_init, "pcoeff": pcoeff,
                                                               "icoeff": icoeff, "dcoeff": dcoeff, "accept_safety": accept_safety, "eta": eta,
                                                               "s_noise":s_noise })
-        return (sampler, )
+        return io.NodeOutput(sampler)
+
+    get_sampler = execute
 
 
-class SamplerER_SDE(ComfyNodeABC):
+class SamplerER_SDE(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "solver_type": (IO.COMBO, {"options": ["ER-SDE", "Reverse-time SDE", "ODE"]}),
-                "max_stage": (IO.INT, {"default": 3, "min": 1, "max": 3}),
-                "eta": (
-                    IO.FLOAT,
-                    {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False, "tooltip": "Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."},
-                ),
-                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False}),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerER_SDE",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Combo.Input("solver_type", options=["ER-SDE", "Reverse-time SDE", "ODE"]),
+                io.Int.Input("max_stage", default=3, min=1, max=3),
+                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    RETURN_TYPES = (IO.SAMPLER,)
-    CATEGORY = "sampling/custom_sampling/samplers"
-
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, solver_type, max_stage, eta, s_noise):
+    @classmethod
+    def execute(cls, solver_type, max_stage, eta, s_noise) -> io.NodeOutput:
         if solver_type == "ODE" or (solver_type == "Reverse-time SDE" and eta == 0):
             eta = 0
             s_noise = 0
@@ -548,32 +610,33 @@ class SamplerER_SDE(ComfyNodeABC):
 
         sampler_name = "er_sde"
         sampler = comfy.samplers.ksampler(sampler_name, {"s_noise": s_noise, "noise_scaler": noise_scaler, "max_stage": max_stage})
-        return (sampler,)
+        return io.NodeOutput(sampler)
+
+    get_sampler = execute
 
 
-class SamplerSASolver(ComfyNodeABC):
+class SamplerSASolver(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(cls) -> InputTypeDict:
-        return {
-            "required": {
-                "model": (IO.MODEL, {}),
-                "eta": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "round": False},),
-                "sde_start_percent": (IO.FLOAT, {"default": 0.2, "min": 0.0, "max": 1.0, "step": 0.001},),
-                "sde_end_percent": (IO.FLOAT, {"default": 0.8, "min": 0.0, "max": 1.0, "step": 0.001},),
-                "s_noise": (IO.FLOAT, {"default": 1.0, "min": 0.0, "max": 100.0, "step": 0.01, "round": False},),
-                "predictor_order": (IO.INT, {"default": 3, "min": 1, "max": 6}),
-                "corrector_order": (IO.INT, {"default": 4, "min": 0, "max": 6}),
-                "use_pece": (IO.BOOLEAN, {}),
-                "simple_order_2": (IO.BOOLEAN, {}),
-            }
-        }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerSASolver",
+            category="sampling/custom_sampling/samplers",
+            inputs=[
+                io.Model.Input("model"),
+                io.Float.Input("eta", default=1.0, min=0.0, max=10.0, step=0.01, round=False),
+                io.Float.Input("sde_start_percent", default=0.2, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("sde_end_percent", default=0.8, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
+                io.Int.Input("predictor_order", default=3, min=1, max=6),
+                io.Int.Input("corrector_order", default=4, min=0, max=6),
+                io.Boolean.Input("use_pece"),
+                io.Boolean.Input("simple_order_2"),
+            ],
+            outputs=[io.Sampler.Output()]
+        )
 
-    RETURN_TYPES = (IO.SAMPLER,)
-    CATEGORY = "sampling/custom_sampling/samplers"
-
-    FUNCTION = "get_sampler"
-
-    def get_sampler(self, model, eta, sde_start_percent, sde_end_percent, s_noise, predictor_order, corrector_order, use_pece, simple_order_2):
+    @classmethod
+    def execute(cls, model, eta, sde_start_percent, sde_end_percent, s_noise, predictor_order, corrector_order, use_pece, simple_order_2) -> io.NodeOutput:
         model_sampling = model.get_model_object("model_sampling")
         start_sigma = model_sampling.percent_to_sigma(sde_start_percent)
         end_sigma = model_sampling.percent_to_sigma(sde_end_percent)
@@ -591,7 +654,9 @@ class SamplerSASolver(ComfyNodeABC):
                 "simple_order_2": simple_order_2,
             },
         )
-        return (sampler,)
+        return io.NodeOutput(sampler)
+
+    get_sampler = execute
 
 
 class Noise_EmptyNoise:
@@ -612,30 +677,31 @@ class Noise_RandomNoise:
         batch_inds = input_latent["batch_index"] if "batch_index" in input_latent else None
         return comfy.sample.prepare_noise(latent_image, self.seed, batch_inds)
 
-class SamplerCustom:
+class SamplerCustom(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                    "add_noise": ("BOOLEAN", {"default": True}),
-                    "noise_seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff, "control_after_generate": True}),
-                    "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                    "positive": ("CONDITIONING", ),
-                    "negative": ("CONDITIONING", ),
-                    "sampler": ("SAMPLER", ),
-                    "sigmas": ("SIGMAS", ),
-                    "latent_image": ("LATENT", ),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerCustom",
+            category="sampling/custom_sampling",
+            inputs=[
+                io.Model.Input("model"),
+                io.Boolean.Input("add_noise", default=True),
+                io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True),
+                io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Sampler.Input("sampler"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(display_name="output"),
+                io.Latent.Output(display_name="denoised_output"),
+            ]
+        )
 
-    RETURN_TYPES = ("LATENT","LATENT")
-    RETURN_NAMES = ("output", "denoised_output")
-
-    FUNCTION = "sample"
-
-    CATEGORY = "sampling/custom_sampling"
-
-    def sample(self, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image):
+    @classmethod
+    def execute(cls, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image) -> io.NodeOutput:
         latent = latent_image
         latent_image = latent["samples"]
         latent = latent.copy()
@@ -664,52 +730,58 @@ class SamplerCustom:
             out_denoised["samples"] = model.model.process_latent_out(x0_output["x0"].cpu())
         else:
             out_denoised = out
-        return (out, out_denoised)
+        return io.NodeOutput(out, out_denoised)
+
+    sample = execute
 
 class Guider_Basic(comfy.samplers.CFGGuider):
     def set_conds(self, positive):
         self.inner_set_conds({"positive": positive})
 
-class BasicGuider:
+class BasicGuider(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                    "conditioning": ("CONDITIONING", ),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="BasicGuider",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("conditioning"),
+            ],
+            outputs=[io.Guider.Output()]
+        )
 
-    RETURN_TYPES = ("GUIDER",)
-
-    FUNCTION = "get_guider"
-    CATEGORY = "sampling/custom_sampling/guiders"
-
-    def get_guider(self, model, conditioning):
+    @classmethod
+    def execute(cls, model, conditioning) -> io.NodeOutput:
         guider = Guider_Basic(model)
         guider.set_conds(conditioning)
-        return (guider,)
+        return io.NodeOutput(guider)
 
-class CFGGuider:
+    get_guider = execute
+
+class CFGGuider(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                    "positive": ("CONDITIONING", ),
-                    "negative": ("CONDITIONING", ),
-                    "cfg": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="CFGGuider",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+            ],
+            outputs=[io.Guider.Output()]
+        )
 
-    RETURN_TYPES = ("GUIDER",)
-
-    FUNCTION = "get_guider"
-    CATEGORY = "sampling/custom_sampling/guiders"
-
-    def get_guider(self, model, positive, negative, cfg):
+    @classmethod
+    def execute(cls, model, positive, negative, cfg) -> io.NodeOutput:
         guider = comfy.samplers.CFGGuider(model)
         guider.set_conds(positive, negative)
         guider.set_cfg(cfg)
-        return (guider,)
+        return io.NodeOutput(guider)
+
+    get_guider = execute
 
 class Guider_DualCFG(comfy.samplers.CFGGuider):
     def set_cfg(self, cfg1, cfg2, nested=False):
@@ -740,84 +812,88 @@ class Guider_DualCFG(comfy.samplers.CFGGuider):
             out = comfy.samplers.calc_cond_batch(self.inner_model, [negative_cond, middle_cond, positive_cond], x, timestep, model_options)
             return comfy.samplers.cfg_function(self.inner_model, out[1], out[0], self.cfg2, x, timestep, model_options=model_options, cond=middle_cond, uncond=negative_cond) + (out[2] - out[1]) * self.cfg1
 
-class DualCFGGuider:
+class DualCFGGuider(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                    "cond1": ("CONDITIONING", ),
-                    "cond2": ("CONDITIONING", ),
-                    "negative": ("CONDITIONING", ),
-                    "cfg_conds": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                    "cfg_cond2_negative": ("FLOAT", {"default": 8.0, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}),
-                    "style": (["regular", "nested"],),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="DualCFGGuider",
+            category="sampling/custom_sampling/guiders",
+            inputs=[
+                io.Model.Input("model"),
+                io.Conditioning.Input("cond1"),
+                io.Conditioning.Input("cond2"),
+                io.Conditioning.Input("negative"),
+                io.Float.Input("cfg_conds", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Float.Input("cfg_cond2_negative", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01),
+                io.Combo.Input("style", options=["regular", "nested"]),
+            ],
+            outputs=[io.Guider.Output()]
+        )
 
-    RETURN_TYPES = ("GUIDER",)
-
-    FUNCTION = "get_guider"
-    CATEGORY = "sampling/custom_sampling/guiders"
-
-    def get_guider(self, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative, style):
+    @classmethod
+    def execute(cls, model, cond1, cond2, negative, cfg_conds, cfg_cond2_negative, style) -> io.NodeOutput:
         guider = Guider_DualCFG(model)
         guider.set_conds(cond1, cond2, negative)
         guider.set_cfg(cfg_conds, cfg_cond2_negative, nested=(style == "nested"))
-        return (guider,)
+        return io.NodeOutput(guider)
 
-class DisableNoise:
+    get_guider = execute
+
+class DisableNoise(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":{
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="DisableNoise",
+            category="sampling/custom_sampling/noise",
+            inputs=[],
+            outputs=[io.Noise.Output()]
+        )
 
-    RETURN_TYPES = ("NOISE",)
-    FUNCTION = "get_noise"
-    CATEGORY = "sampling/custom_sampling/noise"
-
-    def get_noise(self):
-        return (Noise_EmptyNoise(),)
-
-
-class RandomNoise(DisableNoise):
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "noise_seed": ("INT", {
-                    "default": 0,
-                    "min": 0,
-                    "max": 0xffffffffffffffff,
-                    "control_after_generate": True,
-                }),
-            }
-        }
+    def execute(cls) -> io.NodeOutput:
+        return io.NodeOutput(Noise_EmptyNoise())
 
-    def get_noise(self, noise_seed):
-        return (Noise_RandomNoise(noise_seed),)
+    get_noise = execute
 
 
-class SamplerCustomAdvanced:
+class RandomNoise(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"noise": ("NOISE", ),
-                    "guider": ("GUIDER", ),
-                    "sampler": ("SAMPLER", ),
-                    "sigmas": ("SIGMAS", ),
-                    "latent_image": ("LATENT", ),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="RandomNoise",
+            category="sampling/custom_sampling/noise",
+            inputs=[io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True)],
+            outputs=[io.Noise.Output()]
+        )
 
-    RETURN_TYPES = ("LATENT","LATENT")
-    RETURN_NAMES = ("output", "denoised_output")
+    @classmethod
+    def execute(cls, noise_seed) -> io.NodeOutput:
+        return io.NodeOutput(Noise_RandomNoise(noise_seed))
 
-    FUNCTION = "sample"
+    get_noise = execute
 
-    CATEGORY = "sampling/custom_sampling"
 
-    def sample(self, noise, guider, sampler, sigmas, latent_image):
+class SamplerCustomAdvanced(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SamplerCustomAdvanced",
+            category="sampling/custom_sampling",
+            inputs=[
+                io.Noise.Input("noise"),
+                io.Guider.Input("guider"),
+                io.Sampler.Input("sampler"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(display_name="output"),
+                io.Latent.Output(display_name="denoised_output"),
+            ]
+        )
+
+    @classmethod
+    def execute(cls, noise, guider, sampler, sigmas, latent_image) -> io.NodeOutput:
         latent = latent_image
         latent_image = latent["samples"]
         latent = latent.copy()
@@ -842,28 +918,32 @@ class SamplerCustomAdvanced:
             out_denoised["samples"] = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu())
         else:
             out_denoised = out
-        return (out, out_denoised)
+        return io.NodeOutput(out, out_denoised)
 
-class AddNoise:
+    sample = execute
+
+class AddNoise(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {"required":
-                    {"model": ("MODEL",),
-                     "noise": ("NOISE", ),
-                     "sigmas": ("SIGMAS", ),
-                     "latent_image": ("LATENT", ),
-                     }
-                }
+    def define_schema(cls):
+        return io.Schema(
+            node_id="AddNoise",
+            category="_for_testing/custom_sampling/noise",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input("model"),
+                io.Noise.Input("noise"),
+                io.Sigmas.Input("sigmas"),
+                io.Latent.Input("latent_image"),
+            ],
+            outputs=[
+                io.Latent.Output(),
+            ]
+        )
 
-    RETURN_TYPES = ("LATENT",)
-
-    FUNCTION = "add_noise"
-
-    CATEGORY = "_for_testing/custom_sampling/noise"
-
-    def add_noise(self, model, noise, sigmas, latent_image):
+    @classmethod
+    def execute(cls, model, noise, sigmas, latent_image) -> io.NodeOutput:
         if len(sigmas) == 0:
-            return latent_image
+            return io.NodeOutput(latent_image)
 
         latent = latent_image
         latent_image = latent["samples"]
@@ -887,46 +967,50 @@ class AddNoise:
 
         out = latent.copy()
         out["samples"] = noisy
-        return (out,)
+        return io.NodeOutput(out)
+
+    add_noise = execute
 
 
-NODE_CLASS_MAPPINGS = {
-    "SamplerCustom": SamplerCustom,
-    "BasicScheduler": BasicScheduler,
-    "KarrasScheduler": KarrasScheduler,
-    "ExponentialScheduler": ExponentialScheduler,
-    "PolyexponentialScheduler": PolyexponentialScheduler,
-    "LaplaceScheduler": LaplaceScheduler,
-    "VPScheduler": VPScheduler,
-    "BetaSamplingScheduler": BetaSamplingScheduler,
-    "SDTurboScheduler": SDTurboScheduler,
-    "KSamplerSelect": KSamplerSelect,
-    "SamplerEulerAncestral": SamplerEulerAncestral,
-    "SamplerEulerAncestralCFGPP": SamplerEulerAncestralCFGPP,
-    "SamplerLMS": SamplerLMS,
-    "SamplerDPMPP_3M_SDE": SamplerDPMPP_3M_SDE,
-    "SamplerDPMPP_2M_SDE": SamplerDPMPP_2M_SDE,
-    "SamplerDPMPP_SDE": SamplerDPMPP_SDE,
-    "SamplerDPMPP_2S_Ancestral": SamplerDPMPP_2S_Ancestral,
-    "SamplerDPMAdaptative": SamplerDPMAdaptative,
-    "SamplerER_SDE": SamplerER_SDE,
-    "SamplerSASolver": SamplerSASolver,
-    "SplitSigmas": SplitSigmas,
-    "SplitSigmasDenoise": SplitSigmasDenoise,
-    "FlipSigmas": FlipSigmas,
-    "SetFirstSigma": SetFirstSigma,
-    "ExtendIntermediateSigmas": ExtendIntermediateSigmas,
-    "SamplingPercentToSigma": SamplingPercentToSigma,
+class CustomSamplersExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            SamplerCustom,
+            BasicScheduler,
+            KarrasScheduler,
+            ExponentialScheduler,
+            PolyexponentialScheduler,
+            LaplaceScheduler,
+            VPScheduler,
+            BetaSamplingScheduler,
+            SDTurboScheduler,
+            KSamplerSelect,
+            SamplerEulerAncestral,
+            SamplerEulerAncestralCFGPP,
+            SamplerLMS,
+            SamplerDPMPP_3M_SDE,
+            SamplerDPMPP_2M_SDE,
+            SamplerDPMPP_SDE,
+            SamplerDPMPP_2S_Ancestral,
+            SamplerDPMAdaptative,
+            SamplerER_SDE,
+            SamplerSASolver,
+            SplitSigmas,
+            SplitSigmasDenoise,
+            FlipSigmas,
+            SetFirstSigma,
+            ExtendIntermediateSigmas,
+            SamplingPercentToSigma,
+            CFGGuider,
+            DualCFGGuider,
+            BasicGuider,
+            RandomNoise,
+            DisableNoise,
+            AddNoise,
+            SamplerCustomAdvanced,
+        ]
 
-    "CFGGuider": CFGGuider,
-    "DualCFGGuider": DualCFGGuider,
-    "BasicGuider": BasicGuider,
-    "RandomNoise": RandomNoise,
-    "DisableNoise": DisableNoise,
-    "AddNoise": AddNoise,
-    "SamplerCustomAdvanced": SamplerCustomAdvanced,
-}
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "SamplerEulerAncestralCFGPP": "SamplerEulerAncestralCFG++",
-}
+async def comfy_entrypoint() -> CustomSamplersExtension:
+    return CustomSamplersExtension()

From cc6a8dcd1ad9cc9ef7602ee141174a0cea0ed4ce Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Thu, 27 Nov 2025 08:18:08 +0800
Subject: [PATCH 23/24] Dataset Processing Nodes and Improved LoRA Trainer
 Nodes with multi resolution supports. (#10708)

* Create nodes_dataset.py

* Add encoded dataset caching mechanism

* make training node to work with our dataset system

* allow trainer node to get different resolution dataset

* move all dataset related implementation to nodes_dataset

* Rewrite dataset system with new io schema

* Rewrite training system with new io schema

* add ui pbar

* Add outputs' id/name

* Fix bad id/naming

* use single process instead of input list when no need

* fix wrong output_list flag

* use torch.load/save and fix bad behaviors
---
 comfy_extras/nodes_dataset.py | 1532 +++++++++++++++++++++++++++++++++
 comfy_extras/nodes_train.py   |  967 ++++++++++-----------
 nodes.py                      |    1 +
 3 files changed, 1980 insertions(+), 520 deletions(-)
 create mode 100644 comfy_extras/nodes_dataset.py

diff --git a/comfy_extras/nodes_dataset.py b/comfy_extras/nodes_dataset.py
new file mode 100644
index 000000000..b23867505
--- /dev/null
+++ b/comfy_extras/nodes_dataset.py
@@ -0,0 +1,1532 @@
+import logging
+import os
+import math
+import json
+
+import numpy as np
+import torch
+from PIL import Image
+from typing_extensions import override
+
+import folder_paths
+import node_helpers
+from comfy_api.latest import ComfyExtension, io
+
+
+def load_and_process_images(image_files, input_dir):
+    """Utility function to load and process a list of images.
+
+    Args:
+        image_files: List of image filenames
+        input_dir: Base directory containing the images
+        resize_method: How to handle images of different sizes ("None", "Stretch", "Crop", "Pad")
+
+    Returns:
+        torch.Tensor: Batch of processed images
+    """
+    if not image_files:
+        raise ValueError("No valid images found in input")
+
+    output_images = []
+
+    for file in image_files:
+        image_path = os.path.join(input_dir, file)
+        img = node_helpers.pillow(Image.open, image_path)
+
+        if img.mode == "I":
+            img = img.point(lambda i: i * (1 / 255))
+        img = img.convert("RGB")
+        img_array = np.array(img).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_array)[None,]
+        output_images.append(img_tensor)
+
+    return output_images
+
+
+class LoadImageDataSetFromFolderNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoadImageDataSetFromFolder",
+            display_name="Load Image Dataset from Folder",
+            category="dataset",
+            is_experimental=True,
+            inputs=[
+                io.Combo.Input(
+                    "folder",
+                    options=folder_paths.get_input_subfolders(),
+                    tooltip="The folder to load images from.",
+                )
+            ],
+            outputs=[
+                io.Image.Output(
+                    display_name="images",
+                    is_output_list=True,
+                    tooltip="List of loaded images",
+                )
+            ],
+        )
+
+    @classmethod
+    def execute(cls, folder):
+        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
+        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
+        image_files = [
+            f
+            for f in os.listdir(sub_input_dir)
+            if any(f.lower().endswith(ext) for ext in valid_extensions)
+        ]
+        output_tensor = load_and_process_images(image_files, sub_input_dir)
+        return io.NodeOutput(output_tensor)
+
+
+class LoadImageTextDataSetFromFolderNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoadImageTextDataSetFromFolder",
+            display_name="Load Image and Text Dataset from Folder",
+            category="dataset",
+            is_experimental=True,
+            inputs=[
+                io.Combo.Input(
+                    "folder",
+                    options=folder_paths.get_input_subfolders(),
+                    tooltip="The folder to load images from.",
+                )
+            ],
+            outputs=[
+                io.Image.Output(
+                    display_name="images",
+                    is_output_list=True,
+                    tooltip="List of loaded images",
+                ),
+                io.String.Output(
+                    display_name="texts",
+                    is_output_list=True,
+                    tooltip="List of text captions",
+                ),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, folder):
+        logging.info(f"Loading images from folder: {folder}")
+
+        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
+        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
+
+        image_files = []
+        for item in os.listdir(sub_input_dir):
+            path = os.path.join(sub_input_dir, item)
+            if any(item.lower().endswith(ext) for ext in valid_extensions):
+                image_files.append(path)
+            elif os.path.isdir(path):
+                # Support kohya-ss/sd-scripts folder structure
+                repeat = 1
+                if item.split("_")[0].isdigit():
+                    repeat = int(item.split("_")[0])
+                image_files.extend(
+                    [
+                        os.path.join(path, f)
+                        for f in os.listdir(path)
+                        if any(f.lower().endswith(ext) for ext in valid_extensions)
+                    ]
+                    * repeat
+                )
+
+        caption_file_path = [
+            f.replace(os.path.splitext(f)[1], ".txt") for f in image_files
+        ]
+        captions = []
+        for caption_file in caption_file_path:
+            caption_path = os.path.join(sub_input_dir, caption_file)
+            if os.path.exists(caption_path):
+                with open(caption_path, "r", encoding="utf-8") as f:
+                    caption = f.read().strip()
+                    captions.append(caption)
+            else:
+                captions.append("")
+
+        output_tensor = load_and_process_images(image_files, sub_input_dir)
+
+        logging.info(f"Loaded {len(output_tensor)} images from {sub_input_dir}.")
+        return io.NodeOutput(output_tensor, captions)
+
+
+def save_images_to_folder(image_list, output_dir, prefix="image"):
+    """Utility function to save a list of image tensors to disk.
+
+    Args:
+        image_list: List of image tensors (each [1, H, W, C] or [H, W, C] or [C, H, W])
+        output_dir: Directory to save images to
+        prefix: Filename prefix
+
+    Returns:
+        List of saved filenames
+    """
+    os.makedirs(output_dir, exist_ok=True)
+    saved_files = []
+
+    for idx, img_tensor in enumerate(image_list):
+        # Handle different tensor shapes
+        if isinstance(img_tensor, torch.Tensor):
+            # Remove batch dimension if present [1, H, W, C] -> [H, W, C]
+            if img_tensor.dim() == 4 and img_tensor.shape[0] == 1:
+                img_tensor = img_tensor.squeeze(0)
+
+            # If tensor is [C, H, W], permute to [H, W, C]
+            if img_tensor.dim() == 3 and img_tensor.shape[0] in [1, 3, 4]:
+                if (
+                    img_tensor.shape[0] <= 4
+                    and img_tensor.shape[1] > 4
+                    and img_tensor.shape[2] > 4
+                ):
+                    img_tensor = img_tensor.permute(1, 2, 0)
+
+            # Convert to numpy and scale to 0-255
+            img_array = img_tensor.cpu().numpy()
+            img_array = np.clip(img_array * 255.0, 0, 255).astype(np.uint8)
+
+            # Convert to PIL Image
+            img = Image.fromarray(img_array)
+        else:
+            raise ValueError(f"Expected torch.Tensor, got {type(img_tensor)}")
+
+        # Save image
+        filename = f"{prefix}_{idx:05d}.png"
+        filepath = os.path.join(output_dir, filename)
+        img.save(filepath)
+        saved_files.append(filename)
+
+    return saved_files
+
+
+class SaveImageDataSetToFolderNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveImageDataSetToFolder",
+            display_name="Save Image Dataset to Folder",
+            category="dataset",
+            is_experimental=True,
+            is_output_node=True,
+            is_input_list=True,  # Receive images as list
+            inputs=[
+                io.Image.Input("images", tooltip="List of images to save."),
+                io.String.Input(
+                    "folder_name",
+                    default="dataset",
+                    tooltip="Name of the folder to save images to (inside output directory).",
+                ),
+                io.String.Input(
+                    "filename_prefix",
+                    default="image",
+                    tooltip="Prefix for saved image filenames.",
+                ),
+            ],
+            outputs=[],
+        )
+
+    @classmethod
+    def execute(cls, images, folder_name, filename_prefix):
+        # Extract scalar values
+        folder_name = folder_name[0]
+        filename_prefix = filename_prefix[0]
+
+        output_dir = os.path.join(folder_paths.get_output_directory(), folder_name)
+        saved_files = save_images_to_folder(images, output_dir, filename_prefix)
+
+        logging.info(f"Saved {len(saved_files)} images to {output_dir}.")
+        return io.NodeOutput()
+
+
+class SaveImageTextDataSetToFolderNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveImageTextDataSetToFolder",
+            display_name="Save Image and Text Dataset to Folder",
+            category="dataset",
+            is_experimental=True,
+            is_output_node=True,
+            is_input_list=True,  # Receive both images and texts as lists
+            inputs=[
+                io.Image.Input("images", tooltip="List of images to save."),
+                io.String.Input("texts", tooltip="List of text captions to save."),
+                io.String.Input(
+                    "folder_name",
+                    default="dataset",
+                    tooltip="Name of the folder to save images to (inside output directory).",
+                ),
+                io.String.Input(
+                    "filename_prefix",
+                    default="image",
+                    tooltip="Prefix for saved image filenames.",
+                ),
+            ],
+            outputs=[],
+        )
+
+    @classmethod
+    def execute(cls, images, texts, folder_name, filename_prefix):
+        # Extract scalar values
+        folder_name = folder_name[0]
+        filename_prefix = filename_prefix[0]
+
+        output_dir = os.path.join(folder_paths.get_output_directory(), folder_name)
+        saved_files = save_images_to_folder(images, output_dir, filename_prefix)
+
+        # Save captions
+        for idx, (filename, caption) in enumerate(zip(saved_files, texts)):
+            caption_filename = filename.replace(".png", ".txt")
+            caption_path = os.path.join(output_dir, caption_filename)
+            with open(caption_path, "w", encoding="utf-8") as f:
+                f.write(caption)
+
+        logging.info(f"Saved {len(saved_files)} images and captions to {output_dir}.")
+        return io.NodeOutput()
+
+
+# ========== Helper Functions for Transform Nodes ==========
+
+
+def tensor_to_pil(img_tensor):
+    """Convert tensor to PIL Image."""
+    if img_tensor.dim() == 4 and img_tensor.shape[0] == 1:
+        img_tensor = img_tensor.squeeze(0)
+    img_array = (img_tensor.cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
+    return Image.fromarray(img_array)
+
+
+def pil_to_tensor(img):
+    """Convert PIL Image to tensor."""
+    img_array = np.array(img).astype(np.float32) / 255.0
+    return torch.from_numpy(img_array)[None,]
+
+
+# ========== Base Classes for Transform Nodes ==========
+
+
+class ImageProcessingNode(io.ComfyNode):
+    """Base class for image processing nodes that operate on images.
+
+    Child classes should set:
+        node_id: Unique node identifier (required)
+        display_name: Display name (optional, defaults to node_id)
+        description: Node description (optional)
+        extra_inputs: List of additional io.Input objects beyond "images" (optional)
+        is_group_process: None (auto-detect), True (group), or False (individual) (optional)
+        is_output_list: True (list output) or False (single output) (optional, default True)
+
+    Child classes must implement ONE of:
+        _process(cls, image, **kwargs) -> tensor  (for single-item processing)
+        _group_process(cls, images, **kwargs) -> list[tensor]  (for group processing)
+    """
+
+    node_id = None
+    display_name = None
+    description = None
+    extra_inputs = []
+    is_group_process = None  # None = auto-detect, True/False = explicit
+    is_output_list = None  # None = auto-detect based on processing mode
+
+    @classmethod
+    def _detect_processing_mode(cls):
+        """Detect whether this node uses group or individual processing.
+
+        Returns:
+            bool: True if group processing, False if individual processing
+        """
+        # Explicit setting takes precedence
+        if cls.is_group_process is not None:
+            return cls.is_group_process
+
+        # Check which method is overridden by looking at the defining class in MRO
+        base_class = ImageProcessingNode
+
+        # Find which class in MRO defines _process
+        process_definer = None
+        for klass in cls.__mro__:
+            if "_process" in klass.__dict__:
+                process_definer = klass
+                break
+
+        # Find which class in MRO defines _group_process
+        group_definer = None
+        for klass in cls.__mro__:
+            if "_group_process" in klass.__dict__:
+                group_definer = klass
+                break
+
+        # Check what was overridden (not defined in base class)
+        has_process = process_definer is not None and process_definer is not base_class
+        has_group = group_definer is not None and group_definer is not base_class
+
+        if has_process and has_group:
+            raise ValueError(
+                f"{cls.__name__}: Cannot override both _process and _group_process. "
+                "Override only one, or set is_group_process explicitly."
+            )
+        if not has_process and not has_group:
+            raise ValueError(
+                f"{cls.__name__}: Must override either _process or _group_process"
+            )
+
+        return has_group
+
+    @classmethod
+    def define_schema(cls):
+        if cls.node_id is None:
+            raise NotImplementedError(f"{cls.__name__} must set node_id class variable")
+
+        is_group = cls._detect_processing_mode()
+
+        # Auto-detect is_output_list if not explicitly set
+        # Single processing: False (backend collects results into list)
+        # Group processing: True by default (can be False for single-output nodes)
+        output_is_list = (
+            cls.is_output_list if cls.is_output_list is not None else is_group
+        )
+
+        inputs = [
+            io.Image.Input(
+                "images",
+                tooltip=(
+                    "List of images to process." if is_group else "Image to process."
+                ),
+            )
+        ]
+        inputs.extend(cls.extra_inputs)
+
+        return io.Schema(
+            node_id=cls.node_id,
+            display_name=cls.display_name or cls.node_id,
+            category="dataset/image",
+            is_experimental=True,
+            is_input_list=is_group,  # True for group, False for individual
+            inputs=inputs,
+            outputs=[
+                io.Image.Output(
+                    display_name="images",
+                    is_output_list=output_is_list,
+                    tooltip="Processed images",
+                )
+            ],
+        )
+
+    @classmethod
+    def execute(cls, images, **kwargs):
+        """Execute the node. Routes to _process or _group_process based on mode."""
+        is_group = cls._detect_processing_mode()
+
+        # Extract scalar values from lists for parameters
+        params = {}
+        for k, v in kwargs.items():
+            if isinstance(v, list) and len(v) == 1:
+                params[k] = v[0]
+            else:
+                params[k] = v
+
+        if is_group:
+            # Group processing: images is list, call _group_process
+            result = cls._group_process(images, **params)
+        else:
+            # Individual processing: images is single item, call _process
+            result = cls._process(images, **params)
+
+        return io.NodeOutput(result)
+
+    @classmethod
+    def _process(cls, image, **kwargs):
+        """Override this method for single-item processing.
+
+        Args:
+            image: tensor - Single image tensor
+            **kwargs: Additional parameters (already extracted from lists)
+
+        Returns:
+            tensor - Processed image
+        """
+        raise NotImplementedError(f"{cls.__name__} must implement _process method")
+
+    @classmethod
+    def _group_process(cls, images, **kwargs):
+        """Override this method for group processing.
+
+        Args:
+            images: list[tensor] - List of image tensors
+            **kwargs: Additional parameters (already extracted from lists)
+
+        Returns:
+            list[tensor] - Processed images
+        """
+        raise NotImplementedError(
+            f"{cls.__name__} must implement _group_process method"
+        )
+
+
+class TextProcessingNode(io.ComfyNode):
+    """Base class for text processing nodes that operate on texts.
+
+    Child classes should set:
+        node_id: Unique node identifier (required)
+        display_name: Display name (optional, defaults to node_id)
+        description: Node description (optional)
+        extra_inputs: List of additional io.Input objects beyond "texts" (optional)
+        is_group_process: None (auto-detect), True (group), or False (individual) (optional)
+        is_output_list: True (list output) or False (single output) (optional, default True)
+
+    Child classes must implement ONE of:
+        _process(cls, text, **kwargs) -> str  (for single-item processing)
+        _group_process(cls, texts, **kwargs) -> list[str]  (for group processing)
+    """
+
+    node_id = None
+    display_name = None
+    description = None
+    extra_inputs = []
+    is_group_process = None  # None = auto-detect, True/False = explicit
+    is_output_list = None  # None = auto-detect based on processing mode
+
+    @classmethod
+    def _detect_processing_mode(cls):
+        """Detect whether this node uses group or individual processing.
+
+        Returns:
+            bool: True if group processing, False if individual processing
+        """
+        # Explicit setting takes precedence
+        if cls.is_group_process is not None:
+            return cls.is_group_process
+
+        # Check which method is overridden by looking at the defining class in MRO
+        base_class = TextProcessingNode
+
+        # Find which class in MRO defines _process
+        process_definer = None
+        for klass in cls.__mro__:
+            if "_process" in klass.__dict__:
+                process_definer = klass
+                break
+
+        # Find which class in MRO defines _group_process
+        group_definer = None
+        for klass in cls.__mro__:
+            if "_group_process" in klass.__dict__:
+                group_definer = klass
+                break
+
+        # Check what was overridden (not defined in base class)
+        has_process = process_definer is not None and process_definer is not base_class
+        has_group = group_definer is not None and group_definer is not base_class
+
+        if has_process and has_group:
+            raise ValueError(
+                f"{cls.__name__}: Cannot override both _process and _group_process. "
+                "Override only one, or set is_group_process explicitly."
+            )
+        if not has_process and not has_group:
+            raise ValueError(
+                f"{cls.__name__}: Must override either _process or _group_process"
+            )
+
+        return has_group
+
+    @classmethod
+    def define_schema(cls):
+        if cls.node_id is None:
+            raise NotImplementedError(f"{cls.__name__} must set node_id class variable")
+
+        is_group = cls._detect_processing_mode()
+
+        inputs = [
+            io.String.Input(
+                "texts",
+                tooltip="List of texts to process." if is_group else "Text to process.",
+            )
+        ]
+        inputs.extend(cls.extra_inputs)
+
+        return io.Schema(
+            node_id=cls.node_id,
+            display_name=cls.display_name or cls.node_id,
+            category="dataset/text",
+            is_experimental=True,
+            is_input_list=is_group,  # True for group, False for individual
+            inputs=inputs,
+            outputs=[
+                io.String.Output(
+                    display_name="texts",
+                    is_output_list=cls.is_output_list,
+                    tooltip="Processed texts",
+                )
+            ],
+        )
+
+    @classmethod
+    def execute(cls, texts, **kwargs):
+        """Execute the node. Routes to _process or _group_process based on mode."""
+        is_group = cls._detect_processing_mode()
+
+        # Extract scalar values from lists for parameters
+        params = {}
+        for k, v in kwargs.items():
+            if isinstance(v, list) and len(v) == 1:
+                params[k] = v[0]
+            else:
+                params[k] = v
+
+        if is_group:
+            # Group processing: texts is list, call _group_process
+            result = cls._group_process(texts, **params)
+        else:
+            # Individual processing: texts is single item, call _process
+            result = cls._process(texts, **params)
+
+        # Wrap result based on is_output_list
+        if cls.is_output_list:
+            # Result should already be a list (or will be for individual)
+            return io.NodeOutput(result if is_group else [result])
+        else:
+            # Single output - wrap in list for NodeOutput
+            return io.NodeOutput([result])
+
+    @classmethod
+    def _process(cls, text, **kwargs):
+        """Override this method for single-item processing.
+
+        Args:
+            text: str - Single text string
+            **kwargs: Additional parameters (already extracted from lists)
+
+        Returns:
+            str - Processed text
+        """
+        raise NotImplementedError(f"{cls.__name__} must implement _process method")
+
+    @classmethod
+    def _group_process(cls, texts, **kwargs):
+        """Override this method for group processing.
+
+        Args:
+            texts: list[str] - List of text strings
+            **kwargs: Additional parameters (already extracted from lists)
+
+        Returns:
+            list[str] - Processed texts
+        """
+        raise NotImplementedError(
+            f"{cls.__name__} must implement _group_process method"
+        )
+
+
+# ========== Image Transform Nodes ==========
+
+
+class ResizeImagesToSameSizeNode(ImageProcessingNode):
+    node_id = "ResizeImagesToSameSize"
+    display_name = "Resize Images to Same Size"
+    description = "Resize all images to the same width and height."
+    extra_inputs = [
+        io.Int.Input("width", default=512, min=1, max=8192, tooltip="Target width."),
+        io.Int.Input("height", default=512, min=1, max=8192, tooltip="Target height."),
+        io.Combo.Input(
+            "mode",
+            options=["stretch", "crop_center", "pad"],
+            default="stretch",
+            tooltip="Resize mode.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, width, height, mode):
+        img = tensor_to_pil(image)
+
+        if mode == "stretch":
+            img = img.resize((width, height), Image.Resampling.LANCZOS)
+        elif mode == "crop_center":
+            left = max(0, (img.width - width) // 2)
+            top = max(0, (img.height - height) // 2)
+            right = min(img.width, left + width)
+            bottom = min(img.height, top + height)
+            img = img.crop((left, top, right, bottom))
+            if img.width != width or img.height != height:
+                img = img.resize((width, height), Image.Resampling.LANCZOS)
+        elif mode == "pad":
+            img.thumbnail((width, height), Image.Resampling.LANCZOS)
+            new_img = Image.new("RGB", (width, height), (0, 0, 0))
+            paste_x = (width - img.width) // 2
+            paste_y = (height - img.height) // 2
+            new_img.paste(img, (paste_x, paste_y))
+            img = new_img
+
+        return pil_to_tensor(img)
+
+
+class ResizeImagesToPixelCountNode(ImageProcessingNode):
+    node_id = "ResizeImagesToPixelCount"
+    display_name = "Resize Images to Pixel Count"
+    description = "Resize images so that the total pixel count matches the specified number while preserving aspect ratio."
+    extra_inputs = [
+        io.Int.Input(
+            "pixel_count",
+            default=512 * 512,
+            min=1,
+            max=8192 * 8192,
+            tooltip="Target pixel count.",
+        ),
+        io.Int.Input(
+            "steps",
+            default=64,
+            min=1,
+            max=128,
+            tooltip="The stepping for resize width/height.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, pixel_count, steps):
+        img = tensor_to_pil(image)
+        w, h = img.size
+        pixel_count_ratio = math.sqrt(pixel_count / (w * h))
+        new_w = int(w * pixel_count_ratio / steps) * steps
+        new_h = int(h * pixel_count_ratio / steps) * steps
+        logging.info(f"Resizing from {w}x{h} to {new_w}x{new_h}")
+        img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        return pil_to_tensor(img)
+
+
+class ResizeImagesByShorterEdgeNode(ImageProcessingNode):
+    node_id = "ResizeImagesByShorterEdge"
+    display_name = "Resize Images by Shorter Edge"
+    description = "Resize images so that the shorter edge matches the specified length while preserving aspect ratio."
+    extra_inputs = [
+        io.Int.Input(
+            "shorter_edge",
+            default=512,
+            min=1,
+            max=8192,
+            tooltip="Target length for the shorter edge.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, shorter_edge):
+        img = tensor_to_pil(image)
+        w, h = img.size
+        if w < h:
+            new_w = shorter_edge
+            new_h = int(h * (shorter_edge / w))
+        else:
+            new_h = shorter_edge
+            new_w = int(w * (shorter_edge / h))
+        img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        return pil_to_tensor(img)
+
+
+class ResizeImagesByLongerEdgeNode(ImageProcessingNode):
+    node_id = "ResizeImagesByLongerEdge"
+    display_name = "Resize Images by Longer Edge"
+    description = "Resize images so that the longer edge matches the specified length while preserving aspect ratio."
+    extra_inputs = [
+        io.Int.Input(
+            "longer_edge",
+            default=1024,
+            min=1,
+            max=8192,
+            tooltip="Target length for the longer edge.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, longer_edge):
+        img = tensor_to_pil(image)
+        w, h = img.size
+        if w > h:
+            new_w = longer_edge
+            new_h = int(h * (longer_edge / w))
+        else:
+            new_h = longer_edge
+            new_w = int(w * (longer_edge / h))
+        img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        return pil_to_tensor(img)
+
+
+class CenterCropImagesNode(ImageProcessingNode):
+    node_id = "CenterCropImages"
+    display_name = "Center Crop Images"
+    description = "Center crop all images to the specified dimensions."
+    extra_inputs = [
+        io.Int.Input("width", default=512, min=1, max=8192, tooltip="Crop width."),
+        io.Int.Input("height", default=512, min=1, max=8192, tooltip="Crop height."),
+    ]
+
+    @classmethod
+    def _process(cls, image, width, height):
+        img = tensor_to_pil(image)
+        left = max(0, (img.width - width) // 2)
+        top = max(0, (img.height - height) // 2)
+        right = min(img.width, left + width)
+        bottom = min(img.height, top + height)
+        img = img.crop((left, top, right, bottom))
+        return pil_to_tensor(img)
+
+
+class RandomCropImagesNode(ImageProcessingNode):
+    node_id = "RandomCropImages"
+    display_name = "Random Crop Images"
+    description = (
+        "Randomly crop all images to the specified dimensions (for data augmentation)."
+    )
+    extra_inputs = [
+        io.Int.Input("width", default=512, min=1, max=8192, tooltip="Crop width."),
+        io.Int.Input("height", default=512, min=1, max=8192, tooltip="Crop height."),
+        io.Int.Input(
+            "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, tooltip="Random seed."
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, width, height, seed):
+        np.random.seed(seed % (2**32 - 1))
+        img = tensor_to_pil(image)
+        max_left = max(0, img.width - width)
+        max_top = max(0, img.height - height)
+        left = np.random.randint(0, max_left + 1) if max_left > 0 else 0
+        top = np.random.randint(0, max_top + 1) if max_top > 0 else 0
+        right = min(img.width, left + width)
+        bottom = min(img.height, top + height)
+        img = img.crop((left, top, right, bottom))
+        return pil_to_tensor(img)
+
+
+class FlipImagesNode(ImageProcessingNode):
+    node_id = "FlipImages"
+    display_name = "Flip Images"
+    description = "Flip all images horizontally or vertically."
+    extra_inputs = [
+        io.Combo.Input(
+            "direction",
+            options=["horizontal", "vertical"],
+            default="horizontal",
+            tooltip="Flip direction.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, direction):
+        img = tensor_to_pil(image)
+        if direction == "horizontal":
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+        else:
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+        return pil_to_tensor(img)
+
+
+class NormalizeImagesNode(ImageProcessingNode):
+    node_id = "NormalizeImages"
+    display_name = "Normalize Images"
+    description = "Normalize images using mean and standard deviation."
+    extra_inputs = [
+        io.Float.Input(
+            "mean",
+            default=0.5,
+            min=0.0,
+            max=1.0,
+            tooltip="Mean value for normalization.",
+        ),
+        io.Float.Input(
+            "std",
+            default=0.5,
+            min=0.001,
+            max=1.0,
+            tooltip="Standard deviation for normalization.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, mean, std):
+        return (image - mean) / std
+
+
+class AdjustBrightnessNode(ImageProcessingNode):
+    node_id = "AdjustBrightness"
+    display_name = "Adjust Brightness"
+    description = "Adjust brightness of all images."
+    extra_inputs = [
+        io.Float.Input(
+            "factor",
+            default=1.0,
+            min=0.0,
+            max=2.0,
+            tooltip="Brightness factor. 1.0 = no change, <1.0 = darker, >1.0 = brighter.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, factor):
+        return (image * factor).clamp(0.0, 1.0)
+
+
+class AdjustContrastNode(ImageProcessingNode):
+    node_id = "AdjustContrast"
+    display_name = "Adjust Contrast"
+    description = "Adjust contrast of all images."
+    extra_inputs = [
+        io.Float.Input(
+            "factor",
+            default=1.0,
+            min=0.0,
+            max=2.0,
+            tooltip="Contrast factor. 1.0 = no change, <1.0 = less contrast, >1.0 = more contrast.",
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, image, factor):
+        return ((image - 0.5) * factor + 0.5).clamp(0.0, 1.0)
+
+
+class ShuffleDatasetNode(ImageProcessingNode):
+    node_id = "ShuffleDataset"
+    display_name = "Shuffle Image Dataset"
+    description = "Randomly shuffle the order of images in the dataset."
+    is_group_process = True  # Requires full list to shuffle
+    extra_inputs = [
+        io.Int.Input(
+            "seed", default=0, min=0, max=0xFFFFFFFFFFFFFFFF, tooltip="Random seed."
+        ),
+    ]
+
+    @classmethod
+    def _group_process(cls, images, seed):
+        np.random.seed(seed % (2**32 - 1))
+        indices = np.random.permutation(len(images))
+        return [images[i] for i in indices]
+
+
+class ShuffleImageTextDatasetNode(io.ComfyNode):
+    """Special node that shuffles both images and texts together."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="ShuffleImageTextDataset",
+            display_name="Shuffle Image-Text Dataset",
+            category="dataset/image",
+            is_experimental=True,
+            is_input_list=True,
+            inputs=[
+                io.Image.Input("images", tooltip="List of images to shuffle."),
+                io.String.Input("texts", tooltip="List of texts to shuffle."),
+                io.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    tooltip="Random seed.",
+                ),
+            ],
+            outputs=[
+                io.Image.Output(
+                    display_name="images",
+                    is_output_list=True,
+                    tooltip="Shuffled images",
+                ),
+                io.String.Output(
+                    display_name="texts", is_output_list=True, tooltip="Shuffled texts"
+                ),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, images, texts, seed):
+        seed = seed[0]  # Extract scalar
+        np.random.seed(seed % (2**32 - 1))
+        indices = np.random.permutation(len(images))
+        shuffled_images = [images[i] for i in indices]
+        shuffled_texts = [texts[i] for i in indices]
+        return io.NodeOutput(shuffled_images, shuffled_texts)
+
+
+# ========== Text Transform Nodes ==========
+
+
+class TextToLowercaseNode(TextProcessingNode):
+    node_id = "TextToLowercase"
+    display_name = "Text to Lowercase"
+    description = "Convert all texts to lowercase."
+
+    @classmethod
+    def _process(cls, text):
+        return text.lower()
+
+
+class TextToUppercaseNode(TextProcessingNode):
+    node_id = "TextToUppercase"
+    display_name = "Text to Uppercase"
+    description = "Convert all texts to uppercase."
+
+    @classmethod
+    def _process(cls, text):
+        return text.upper()
+
+
+class TruncateTextNode(TextProcessingNode):
+    node_id = "TruncateText"
+    display_name = "Truncate Text"
+    description = "Truncate all texts to a maximum length."
+    extra_inputs = [
+        io.Int.Input(
+            "max_length", default=77, min=1, max=10000, tooltip="Maximum text length."
+        ),
+    ]
+
+    @classmethod
+    def _process(cls, text, max_length):
+        return text[:max_length]
+
+
+class AddTextPrefixNode(TextProcessingNode):
+    node_id = "AddTextPrefix"
+    display_name = "Add Text Prefix"
+    description = "Add a prefix to all texts."
+    extra_inputs = [
+        io.String.Input("prefix", default="", tooltip="Prefix to add."),
+    ]
+
+    @classmethod
+    def _process(cls, text, prefix):
+        return prefix + text
+
+
+class AddTextSuffixNode(TextProcessingNode):
+    node_id = "AddTextSuffix"
+    display_name = "Add Text Suffix"
+    description = "Add a suffix to all texts."
+    extra_inputs = [
+        io.String.Input("suffix", default="", tooltip="Suffix to add."),
+    ]
+
+    @classmethod
+    def _process(cls, text, suffix):
+        return text + suffix
+
+
+class ReplaceTextNode(TextProcessingNode):
+    node_id = "ReplaceText"
+    display_name = "Replace Text"
+    description = "Replace text in all texts."
+    extra_inputs = [
+        io.String.Input("find", default="", tooltip="Text to find."),
+        io.String.Input("replace", default="", tooltip="Text to replace with."),
+    ]
+
+    @classmethod
+    def _process(cls, text, find, replace):
+        return text.replace(find, replace)
+
+
+class StripWhitespaceNode(TextProcessingNode):
+    node_id = "StripWhitespace"
+    display_name = "Strip Whitespace"
+    description = "Strip leading and trailing whitespace from all texts."
+
+    @classmethod
+    def _process(cls, text):
+        return text.strip()
+
+
+# ========== Group Processing Example Nodes ==========
+
+
+class ImageDeduplicationNode(ImageProcessingNode):
+    """Remove duplicate or very similar images from the dataset using perceptual hashing."""
+
+    node_id = "ImageDeduplication"
+    display_name = "Image Deduplication"
+    description = "Remove duplicate or very similar images from the dataset."
+    is_group_process = True  # Requires full list to compare images
+    extra_inputs = [
+        io.Float.Input(
+            "similarity_threshold",
+            default=0.95,
+            min=0.0,
+            max=1.0,
+            tooltip="Similarity threshold (0-1). Higher means more similar. Images above this threshold are considered duplicates.",
+        ),
+    ]
+
+    @classmethod
+    def _group_process(cls, images, similarity_threshold):
+        """Remove duplicate images using perceptual hashing."""
+        if len(images) == 0:
+            return []
+
+        # Compute simple perceptual hash for each image
+        def compute_hash(img_tensor):
+            """Compute a simple perceptual hash by resizing to 8x8 and comparing to average."""
+            img = tensor_to_pil(img_tensor)
+            # Resize to 8x8
+            img_small = img.resize((8, 8), Image.Resampling.LANCZOS).convert("L")
+            # Get pixels
+            pixels = list(img_small.getdata())
+            # Compute average
+            avg = sum(pixels) / len(pixels)
+            # Create hash (1 if above average, 0 otherwise)
+            hash_bits = "".join("1" if p > avg else "0" for p in pixels)
+            return hash_bits
+
+        def hamming_distance(hash1, hash2):
+            """Compute Hamming distance between two hash strings."""
+            return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))
+
+        # Compute hashes for all images
+        hashes = [compute_hash(img) for img in images]
+
+        # Find duplicates
+        keep_indices = []
+        for i in range(len(images)):
+            is_duplicate = False
+            for j in keep_indices:
+                # Compare hashes
+                distance = hamming_distance(hashes[i], hashes[j])
+                similarity = 1.0 - (distance / 64.0)  # 64 bits total
+                if similarity >= similarity_threshold:
+                    is_duplicate = True
+                    logging.info(
+                        f"Image {i} is similar to image {j} (similarity: {similarity:.3f}), skipping"
+                    )
+                    break
+
+            if not is_duplicate:
+                keep_indices.append(i)
+
+        # Return only unique images
+        unique_images = [images[i] for i in keep_indices]
+        logging.info(
+            f"Deduplication: kept {len(unique_images)} out of {len(images)} images"
+        )
+        return unique_images
+
+
+class ImageGridNode(ImageProcessingNode):
+    """Combine multiple images into a single grid/collage."""
+
+    node_id = "ImageGrid"
+    display_name = "Image Grid"
+    description = "Arrange multiple images into a grid layout."
+    is_group_process = True  # Requires full list to create grid
+    is_output_list = False  # Outputs single grid image
+    extra_inputs = [
+        io.Int.Input(
+            "columns",
+            default=4,
+            min=1,
+            max=20,
+            tooltip="Number of columns in the grid.",
+        ),
+        io.Int.Input(
+            "cell_width",
+            default=256,
+            min=32,
+            max=2048,
+            tooltip="Width of each cell in the grid.",
+        ),
+        io.Int.Input(
+            "cell_height",
+            default=256,
+            min=32,
+            max=2048,
+            tooltip="Height of each cell in the grid.",
+        ),
+        io.Int.Input(
+            "padding", default=4, min=0, max=50, tooltip="Padding between images."
+        ),
+    ]
+
+    @classmethod
+    def _group_process(cls, images, columns, cell_width, cell_height, padding):
+        """Arrange images into a grid."""
+        if len(images) == 0:
+            raise ValueError("Cannot create grid from empty image list")
+
+        # Calculate grid dimensions
+        num_images = len(images)
+        rows = (num_images + columns - 1) // columns  # Ceiling division
+
+        # Calculate total grid size
+        grid_width = columns * cell_width + (columns - 1) * padding
+        grid_height = rows * cell_height + (rows - 1) * padding
+
+        # Create blank grid
+        grid = Image.new("RGB", (grid_width, grid_height), (0, 0, 0))
+
+        # Place images
+        for idx, img_tensor in enumerate(images):
+            row = idx // columns
+            col = idx % columns
+
+            # Convert to PIL and resize to cell size
+            img = tensor_to_pil(img_tensor)
+            img = img.resize((cell_width, cell_height), Image.Resampling.LANCZOS)
+
+            # Calculate position
+            x = col * (cell_width + padding)
+            y = row * (cell_height + padding)
+
+            # Paste into grid
+            grid.paste(img, (x, y))
+
+        logging.info(
+            f"Created {columns}x{rows} grid with {num_images} images ({grid_width}x{grid_height})"
+        )
+        return pil_to_tensor(grid)
+
+
+class MergeImageListsNode(ImageProcessingNode):
+    """Merge multiple image lists into a single list."""
+
+    node_id = "MergeImageLists"
+    display_name = "Merge Image Lists"
+    description = "Concatenate multiple image lists into one."
+    is_group_process = True  # Receives images as list
+
+    @classmethod
+    def _group_process(cls, images):
+        """Simply return the images list (already merged by input handling)."""
+        # When multiple list inputs are connected, they're concatenated
+        # For now, this is a simple pass-through
+        logging.info(f"Merged image list contains {len(images)} images")
+        return images
+
+
+class MergeTextListsNode(TextProcessingNode):
+    """Merge multiple text lists into a single list."""
+
+    node_id = "MergeTextLists"
+    display_name = "Merge Text Lists"
+    description = "Concatenate multiple text lists into one."
+    is_group_process = True  # Receives texts as list
+
+    @classmethod
+    def _group_process(cls, texts):
+        """Simply return the texts list (already merged by input handling)."""
+        # When multiple list inputs are connected, they're concatenated
+        # For now, this is a simple pass-through
+        logging.info(f"Merged text list contains {len(texts)} texts")
+        return texts
+
+
+# ========== Training Dataset Nodes ==========
+
+
+class MakeTrainingDataset(io.ComfyNode):
+    """Encode images with VAE and texts with CLIP to create a training dataset."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="MakeTrainingDataset",
+            display_name="Make Training Dataset",
+            category="dataset",
+            is_experimental=True,
+            is_input_list=True,  # images and texts as lists
+            inputs=[
+                io.Image.Input("images", tooltip="List of images to encode."),
+                io.Vae.Input(
+                    "vae", tooltip="VAE model for encoding images to latents."
+                ),
+                io.Clip.Input(
+                    "clip", tooltip="CLIP model for encoding text to conditioning."
+                ),
+                io.String.Input(
+                    "texts",
+                    optional=True,
+                    tooltip="List of text captions. Can be length n (matching images), 1 (repeated for all), or omitted (uses empty string).",
+                ),
+            ],
+            outputs=[
+                io.Latent.Output(
+                    display_name="latents",
+                    is_output_list=True,
+                    tooltip="List of latent dicts",
+                ),
+                io.Conditioning.Output(
+                    display_name="conditioning",
+                    is_output_list=True,
+                    tooltip="List of conditioning lists",
+                ),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, images, vae, clip, texts=None):
+        # Extract scalars (vae and clip are single values wrapped in lists)
+        vae = vae[0]
+        clip = clip[0]
+
+        # Handle text list
+        num_images = len(images)
+
+        if texts is None or len(texts) == 0:
+            # Treat as [""] for unconditional training
+            texts = [""]
+
+        if len(texts) == 1 and num_images > 1:
+            # Repeat single text for all images
+            texts = texts * num_images
+        elif len(texts) != num_images:
+            raise ValueError(
+                f"Number of texts ({len(texts)}) does not match number of images ({num_images}). "
+                f"Text list should have length {num_images}, 1, or 0."
+            )
+
+        # Encode images with VAE
+        logging.info(f"Encoding {num_images} images with VAE...")
+        latents_list = []  # list[{"samples": tensor}]
+        for img_tensor in images:
+            # img_tensor is [1, H, W, 3]
+            latent_tensor = vae.encode(img_tensor[:, :, :, :3])
+            latents_list.append({"samples": latent_tensor})
+
+        # Encode texts with CLIP
+        logging.info(f"Encoding {len(texts)} texts with CLIP...")
+        conditioning_list = []  # list[list[cond]]
+        for text in texts:
+            if text == "":
+                cond = clip.encode_from_tokens_scheduled(clip.tokenize(""))
+            else:
+                tokens = clip.tokenize(text)
+                cond = clip.encode_from_tokens_scheduled(tokens)
+            conditioning_list.append(cond)
+
+        logging.info(
+            f"Created dataset with {len(latents_list)} latents and {len(conditioning_list)} conditioning."
+        )
+        return io.NodeOutput(latents_list, conditioning_list)
+
+
+class SaveTrainingDataset(io.ComfyNode):
+    """Save encoded training dataset (latents + conditioning) to disk."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveTrainingDataset",
+            display_name="Save Training Dataset",
+            category="dataset",
+            is_experimental=True,
+            is_output_node=True,
+            is_input_list=True,  # Receive lists
+            inputs=[
+                io.Latent.Input(
+                    "latents",
+                    tooltip="List of latent dicts from MakeTrainingDataset.",
+                ),
+                io.Conditioning.Input(
+                    "conditioning",
+                    tooltip="List of conditioning lists from MakeTrainingDataset.",
+                ),
+                io.String.Input(
+                    "folder_name",
+                    default="training_dataset",
+                    tooltip="Name of folder to save dataset (inside output directory).",
+                ),
+                io.Int.Input(
+                    "shard_size",
+                    default=1000,
+                    min=1,
+                    max=100000,
+                    tooltip="Number of samples per shard file.",
+                ),
+            ],
+            outputs=[],
+        )
+
+    @classmethod
+    def execute(cls, latents, conditioning, folder_name, shard_size):
+        # Extract scalars
+        folder_name = folder_name[0]
+        shard_size = shard_size[0]
+
+        # latents: list[{"samples": tensor}]
+        # conditioning: list[list[cond]]
+
+        # Validate lengths match
+        if len(latents) != len(conditioning):
+            raise ValueError(
+                f"Number of latents ({len(latents)}) does not match number of conditions ({len(conditioning)}). "
+                f"Something went wrong in dataset preparation."
+            )
+
+        # Create output directory
+        output_dir = os.path.join(folder_paths.get_output_directory(), folder_name)
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Prepare data pairs
+        num_samples = len(latents)
+        num_shards = (num_samples + shard_size - 1) // shard_size  # Ceiling division
+
+        logging.info(
+            f"Saving {num_samples} samples to {num_shards} shards in {output_dir}..."
+        )
+
+        # Save data in shards
+        for shard_idx in range(num_shards):
+            start_idx = shard_idx * shard_size
+            end_idx = min(start_idx + shard_size, num_samples)
+
+            # Get shard data (list of latent dicts and conditioning lists)
+            shard_data = {
+                "latents": latents[start_idx:end_idx],
+                "conditioning": conditioning[start_idx:end_idx],
+            }
+
+            # Save shard
+            shard_filename = f"shard_{shard_idx:04d}.pkl"
+            shard_path = os.path.join(output_dir, shard_filename)
+
+            with open(shard_path, "wb") as f:
+                torch.save(shard_data, f)
+
+            logging.info(
+                f"Saved shard {shard_idx + 1}/{num_shards}: {shard_filename} ({end_idx - start_idx} samples)"
+            )
+
+        # Save metadata
+        metadata = {
+            "num_samples": num_samples,
+            "num_shards": num_shards,
+            "shard_size": shard_size,
+        }
+        metadata_path = os.path.join(output_dir, "metadata.json")
+        with open(metadata_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        logging.info(f"Successfully saved {num_samples} samples to {output_dir}.")
+        return io.NodeOutput()
+
+
+class LoadTrainingDataset(io.ComfyNode):
+    """Load encoded training dataset from disk."""
+
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoadTrainingDataset",
+            display_name="Load Training Dataset",
+            category="dataset",
+            is_experimental=True,
+            inputs=[
+                io.String.Input(
+                    "folder_name",
+                    default="training_dataset",
+                    tooltip="Name of folder containing the saved dataset (inside output directory).",
+                ),
+            ],
+            outputs=[
+                io.Latent.Output(
+                    display_name="latents",
+                    is_output_list=True,
+                    tooltip="List of latent dicts",
+                ),
+                io.Conditioning.Output(
+                    display_name="conditioning",
+                    is_output_list=True,
+                    tooltip="List of conditioning lists",
+                ),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, folder_name):
+        # Get dataset directory
+        dataset_dir = os.path.join(folder_paths.get_output_directory(), folder_name)
+
+        if not os.path.exists(dataset_dir):
+            raise ValueError(f"Dataset directory not found: {dataset_dir}")
+
+        # Find all shard files
+        shard_files = sorted(
+            [
+                f
+                for f in os.listdir(dataset_dir)
+                if f.startswith("shard_") and f.endswith(".pkl")
+            ]
+        )
+
+        if not shard_files:
+            raise ValueError(f"No shard files found in {dataset_dir}")
+
+        logging.info(f"Loading {len(shard_files)} shards from {dataset_dir}...")
+
+        # Load all shards
+        all_latents = []  # list[{"samples": tensor}]
+        all_conditioning = []  # list[list[cond]]
+
+        for shard_file in shard_files:
+            shard_path = os.path.join(dataset_dir, shard_file)
+
+            with open(shard_path, "rb") as f:
+                shard_data = torch.load(f)
+
+            all_latents.extend(shard_data["latents"])
+            all_conditioning.extend(shard_data["conditioning"])
+
+            logging.info(f"Loaded {shard_file}: {len(shard_data['latents'])} samples")
+
+        logging.info(
+            f"Successfully loaded {len(all_latents)} samples from {dataset_dir}."
+        )
+        return io.NodeOutput(all_latents, all_conditioning)
+
+
+# ========== Extension Setup ==========
+
+
+class DatasetExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            # Data loading/saving nodes
+            LoadImageDataSetFromFolderNode,
+            LoadImageTextDataSetFromFolderNode,
+            SaveImageDataSetToFolderNode,
+            SaveImageTextDataSetToFolderNode,
+            # Image transform nodes
+            ResizeImagesToSameSizeNode,
+            ResizeImagesToPixelCountNode,
+            ResizeImagesByShorterEdgeNode,
+            ResizeImagesByLongerEdgeNode,
+            CenterCropImagesNode,
+            RandomCropImagesNode,
+            FlipImagesNode,
+            NormalizeImagesNode,
+            AdjustBrightnessNode,
+            AdjustContrastNode,
+            ShuffleDatasetNode,
+            ShuffleImageTextDatasetNode,
+            # Text transform nodes
+            TextToLowercaseNode,
+            TextToUppercaseNode,
+            TruncateTextNode,
+            AddTextPrefixNode,
+            AddTextSuffixNode,
+            ReplaceTextNode,
+            StripWhitespaceNode,
+            # Group processing examples
+            ImageDeduplicationNode,
+            ImageGridNode,
+            MergeImageListsNode,
+            MergeTextListsNode,
+            # Training dataset nodes
+            MakeTrainingDataset,
+            SaveTrainingDataset,
+            LoadTrainingDataset,
+        ]
+
+
+async def comfy_entrypoint() -> DatasetExtension:
+    return DatasetExtension()
diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py
index 9e6ec6780..cb24ab709 100644
--- a/comfy_extras/nodes_train.py
+++ b/comfy_extras/nodes_train.py
@@ -1,15 +1,13 @@
-import datetime
-import json
 import logging
 import os
 
 import numpy as np
 import safetensors
 import torch
-from PIL import Image, ImageDraw, ImageFont
-from PIL.PngImagePlugin import PngInfo
 import torch.utils.checkpoint
-import tqdm
+from tqdm.auto import trange
+from PIL import Image, ImageDraw, ImageFont
+from typing_extensions import override
 
 import comfy.samplers
 import comfy.sd
@@ -18,9 +16,9 @@ import comfy.model_management
 import comfy_extras.nodes_custom_sampler
 import folder_paths
 import node_helpers
-from comfy.cli_args import args
-from comfy.comfy_types.node_typing import IO
 from comfy.weight_adapter import adapters, adapter_maps
+from comfy_api.latest import ComfyExtension, io, ui
+from comfy.utils import ProgressBar
 
 
 def make_batch_extra_option_dict(d, indicies, full_size=None):
@@ -56,7 +54,18 @@ def process_cond_list(d, prefix=""):
 
 
 class TrainSampler(comfy.samplers.Sampler):
-    def __init__(self, loss_fn, optimizer, loss_callback=None, batch_size=1, grad_acc=1, total_steps=1, seed=0, training_dtype=torch.bfloat16):
+    def __init__(
+        self,
+        loss_fn,
+        optimizer,
+        loss_callback=None,
+        batch_size=1,
+        grad_acc=1,
+        total_steps=1,
+        seed=0,
+        training_dtype=torch.bfloat16,
+        real_dataset=None,
+    ):
         self.loss_fn = loss_fn
         self.optimizer = optimizer
         self.loss_callback = loss_callback
@@ -65,54 +74,138 @@ class TrainSampler(comfy.samplers.Sampler):
         self.grad_acc = grad_acc
         self.seed = seed
         self.training_dtype = training_dtype
+        self.real_dataset: list[torch.Tensor] | None = real_dataset
 
-    def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
+    def fwd_bwd(
+        self,
+        model_wrap,
+        batch_sigmas,
+        batch_noise,
+        batch_latent,
+        cond,
+        indicies,
+        extra_args,
+        dataset_size,
+        bwd=True,
+    ):
+        xt = model_wrap.inner_model.model_sampling.noise_scaling(
+            batch_sigmas, batch_noise, batch_latent, False
+        )
+        x0 = model_wrap.inner_model.model_sampling.noise_scaling(
+            torch.zeros_like(batch_sigmas),
+            torch.zeros_like(batch_noise),
+            batch_latent,
+            False,
+        )
+
+        model_wrap.conds["positive"] = [cond[i] for i in indicies]
+        batch_extra_args = make_batch_extra_option_dict(
+            extra_args, indicies, full_size=dataset_size
+        )
+
+        with torch.autocast(xt.device.type, dtype=self.training_dtype):
+            x0_pred = model_wrap(
+                xt.requires_grad_(True),
+                batch_sigmas.requires_grad_(True),
+                **batch_extra_args,
+            )
+            loss = self.loss_fn(x0_pred, x0)
+        if bwd:
+            bwd_loss = loss / self.grad_acc
+            bwd_loss.backward()
+        return loss
+
+    def sample(
+        self,
+        model_wrap,
+        sigmas,
+        extra_args,
+        callback,
+        noise,
+        latent_image=None,
+        denoise_mask=None,
+        disable_pbar=False,
+    ):
         model_wrap.conds = process_cond_list(model_wrap.conds)
         cond = model_wrap.conds["positive"]
         dataset_size = sigmas.size(0)
         torch.cuda.empty_cache()
-        for i in (pbar:=tqdm.trange(self.total_steps, desc="Training LoRA", smoothing=0.01, disable=not comfy.utils.PROGRESS_BAR_ENABLED)):
-            noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(self.seed + i * 1000)
-            indicies = torch.randperm(dataset_size)[:self.batch_size].tolist()
-
-            batch_latent = torch.stack([latent_image[i] for i in indicies])
-            batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(batch_latent.device)
-            batch_sigmas = [
-                model_wrap.inner_model.model_sampling.percent_to_sigma(
-                    torch.rand((1,)).item()
-                ) for _ in range(min(self.batch_size, dataset_size))
-            ]
-            batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device)
-
-            xt = model_wrap.inner_model.model_sampling.noise_scaling(
-                batch_sigmas,
-                batch_noise,
-                batch_latent,
-                False
+        ui_pbar = ProgressBar(self.total_steps)
+        for i in (
+            pbar := trange(
+                self.total_steps,
+                desc="Training LoRA",
+                smoothing=0.01,
+                disable=not comfy.utils.PROGRESS_BAR_ENABLED,
             )
-            x0 = model_wrap.inner_model.model_sampling.noise_scaling(
-                torch.zeros_like(batch_sigmas),
-                torch.zeros_like(batch_noise),
-                batch_latent,
-                False
+        ):
+            noisegen = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(
+                self.seed + i * 1000
             )
+            indicies = torch.randperm(dataset_size)[: self.batch_size].tolist()
 
-            model_wrap.conds["positive"] = [
-                cond[i] for i in indicies
-            ]
-            batch_extra_args = make_batch_extra_option_dict(extra_args, indicies, full_size=dataset_size)
+            if self.real_dataset is None:
+                batch_latent = torch.stack([latent_image[i] for i in indicies])
+                batch_noise = noisegen.generate_noise({"samples": batch_latent}).to(
+                    batch_latent.device
+                )
+                batch_sigmas = [
+                    model_wrap.inner_model.model_sampling.percent_to_sigma(
+                        torch.rand((1,)).item()
+                    )
+                    for _ in range(min(self.batch_size, dataset_size))
+                ]
+                batch_sigmas = torch.tensor(batch_sigmas).to(batch_latent.device)
 
-            with torch.autocast(xt.device.type, dtype=self.training_dtype):
-                x0_pred = model_wrap(xt, batch_sigmas, **batch_extra_args)
-                loss = self.loss_fn(x0_pred, x0)
-            loss.backward()
-            if self.loss_callback:
-                self.loss_callback(loss.item())
-            pbar.set_postfix({"loss": f"{loss.item():.4f}"})
+                loss = self.fwd_bwd(
+                    model_wrap,
+                    batch_sigmas,
+                    batch_noise,
+                    batch_latent,
+                    cond,
+                    indicies,
+                    extra_args,
+                    dataset_size,
+                    bwd=True,
+                )
+                if self.loss_callback:
+                    self.loss_callback(loss.item())
+                pbar.set_postfix({"loss": f"{loss.item():.4f}"})
+            else:
+                total_loss = 0
+                for index in indicies:
+                    single_latent = self.real_dataset[index].to(latent_image)
+                    batch_noise = noisegen.generate_noise(
+                        {"samples": single_latent}
+                    ).to(single_latent.device)
+                    batch_sigmas = (
+                        model_wrap.inner_model.model_sampling.percent_to_sigma(
+                            torch.rand((1,)).item()
+                        )
+                    )
+                    batch_sigmas = torch.tensor([batch_sigmas]).to(single_latent.device)
+                    loss = self.fwd_bwd(
+                        model_wrap,
+                        batch_sigmas,
+                        batch_noise,
+                        single_latent,
+                        cond,
+                        [index],
+                        extra_args,
+                        dataset_size,
+                        bwd=False,
+                    )
+                    total_loss += loss
+                total_loss = total_loss / self.grad_acc / len(indicies)
+                total_loss.backward()
+                if self.loss_callback:
+                    self.loss_callback(total_loss.item())
+                pbar.set_postfix({"loss": f"{total_loss.item():.4f}"})
 
-            if (i+1) % self.grad_acc == 0:
+            if (i + 1) % self.grad_acc == 0:
                 self.optimizer.step()
                 self.optimizer.zero_grad()
+                ui_pbar.update(1)
         torch.cuda.empty_cache()
         return torch.zeros_like(latent_image)
 
@@ -134,233 +227,6 @@ class BiasDiff(torch.nn.Module):
         return self.passive_memory_usage()
 
 
-def load_and_process_images(image_files, input_dir, resize_method="None", w=None, h=None):
-    """Utility function to load and process a list of images.
-
-    Args:
-        image_files: List of image filenames
-        input_dir: Base directory containing the images
-        resize_method: How to handle images of different sizes ("None", "Stretch", "Crop", "Pad")
-
-    Returns:
-        torch.Tensor: Batch of processed images
-    """
-    if not image_files:
-        raise ValueError("No valid images found in input")
-
-    output_images = []
-
-    for file in image_files:
-        image_path = os.path.join(input_dir, file)
-        img = node_helpers.pillow(Image.open, image_path)
-
-        if img.mode == "I":
-            img = img.point(lambda i: i * (1 / 255))
-        img = img.convert("RGB")
-
-        if w is None and h is None:
-            w, h = img.size[0], img.size[1]
-
-        # Resize image to first image
-        if img.size[0] != w or img.size[1] != h:
-            if resize_method == "Stretch":
-                img = img.resize((w, h), Image.Resampling.LANCZOS)
-            elif resize_method == "Crop":
-                img = img.crop((0, 0, w, h))
-            elif resize_method == "Pad":
-                img = img.resize((w, h), Image.Resampling.LANCZOS)
-            elif resize_method == "None":
-                raise ValueError(
-                    "Your input image size does not match the first image in the dataset. Either select a valid resize method or use the same size for all images."
-                )
-
-        img_array = np.array(img).astype(np.float32) / 255.0
-        img_tensor = torch.from_numpy(img_array)[None,]
-        output_images.append(img_tensor)
-
-    return torch.cat(output_images, dim=0)
-
-
-class LoadImageSetNode:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "images": (
-                    [
-                        f
-                        for f in os.listdir(folder_paths.get_input_directory())
-                        if f.endswith((".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif", ".jpe", ".apng", ".tif", ".tiff"))
-                    ],
-                    {"image_upload": True, "allow_batch": True},
-                )
-            },
-            "optional": {
-                "resize_method": (
-                    ["None", "Stretch", "Crop", "Pad"],
-                    {"default": "None"},
-                ),
-            },
-        }
-
-    INPUT_IS_LIST = True
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "load_images"
-    CATEGORY = "loaders"
-    EXPERIMENTAL = True
-    DESCRIPTION = "Loads a batch of images from a directory for training."
-
-    @classmethod
-    def VALIDATE_INPUTS(s, images, resize_method):
-        filenames = images[0] if isinstance(images[0], list) else images
-
-        for image in filenames:
-            if not folder_paths.exists_annotated_filepath(image):
-                return "Invalid image file: {}".format(image)
-        return True
-
-    def load_images(self, input_files, resize_method):
-        input_dir = folder_paths.get_input_directory()
-        valid_extensions = [".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif", ".jpe", ".apng", ".tif", ".tiff"]
-        image_files = [
-            f
-            for f in input_files
-            if any(f.lower().endswith(ext) for ext in valid_extensions)
-        ]
-        output_tensor = load_and_process_images(image_files, input_dir, resize_method)
-        return (output_tensor,)
-
-
-class LoadImageSetFromFolderNode:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "folder": (folder_paths.get_input_subfolders(), {"tooltip": "The folder to load images from."})
-            },
-            "optional": {
-                "resize_method": (
-                    ["None", "Stretch", "Crop", "Pad"],
-                    {"default": "None"},
-                ),
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE",)
-    FUNCTION = "load_images"
-    CATEGORY = "loaders"
-    EXPERIMENTAL = True
-    DESCRIPTION = "Loads a batch of images from a directory for training."
-
-    def load_images(self, folder, resize_method):
-        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
-        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
-        image_files = [
-            f
-            for f in os.listdir(sub_input_dir)
-            if any(f.lower().endswith(ext) for ext in valid_extensions)
-        ]
-        output_tensor = load_and_process_images(image_files, sub_input_dir, resize_method)
-        return (output_tensor,)
-
-
-class LoadImageTextSetFromFolderNode:
-    @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "folder": (folder_paths.get_input_subfolders(), {"tooltip": "The folder to load images from."}),
-                "clip": (IO.CLIP, {"tooltip": "The CLIP model used for encoding the text."}),
-            },
-            "optional": {
-                "resize_method": (
-                    ["None", "Stretch", "Crop", "Pad"],
-                    {"default": "None"},
-                ),
-                "width": (
-                    IO.INT,
-                    {
-                        "default": -1,
-                        "min": -1,
-                        "max": 10000,
-                        "step": 1,
-                        "tooltip": "The width to resize the images to. -1 means use the original width.",
-                    },
-                ),
-                "height": (
-                    IO.INT,
-                    {
-                        "default": -1,
-                        "min": -1,
-                        "max": 10000,
-                        "step": 1,
-                        "tooltip": "The height to resize the images to. -1 means use the original height.",
-                    },
-                )
-            },
-        }
-
-    RETURN_TYPES = ("IMAGE", IO.CONDITIONING,)
-    FUNCTION = "load_images"
-    CATEGORY = "loaders"
-    EXPERIMENTAL = True
-    DESCRIPTION = "Loads a batch of images and caption from a directory for training."
-
-    def load_images(self, folder, clip, resize_method, width=None, height=None):
-        if clip is None:
-            raise RuntimeError("ERROR: clip input is invalid: None\n\nIf the clip is from a checkpoint loader node your checkpoint does not contain a valid clip or text encoder model.")
-
-        logging.info(f"Loading images from folder: {folder}")
-
-        sub_input_dir = os.path.join(folder_paths.get_input_directory(), folder)
-        valid_extensions = [".png", ".jpg", ".jpeg", ".webp"]
-
-        image_files = []
-        for item in os.listdir(sub_input_dir):
-            path = os.path.join(sub_input_dir, item)
-            if any(item.lower().endswith(ext) for ext in valid_extensions):
-                image_files.append(path)
-            elif os.path.isdir(path):
-                # Support kohya-ss/sd-scripts folder structure
-                repeat = 1
-                if item.split("_")[0].isdigit():
-                    repeat = int(item.split("_")[0])
-                image_files.extend([
-                    os.path.join(path, f) for f in os.listdir(path) if any(f.lower().endswith(ext) for ext in valid_extensions)
-                ] * repeat)
-
-        caption_file_path = [
-            f.replace(os.path.splitext(f)[1], ".txt")
-            for f in image_files
-        ]
-        captions = []
-        for caption_file in caption_file_path:
-            caption_path = os.path.join(sub_input_dir, caption_file)
-            if os.path.exists(caption_path):
-                with open(caption_path, "r", encoding="utf-8") as f:
-                    caption = f.read().strip()
-                    captions.append(caption)
-            else:
-                captions.append("")
-
-        width = width if width != -1 else None
-        height = height if height != -1 else None
-        output_tensor = load_and_process_images(image_files, sub_input_dir, resize_method, width, height)
-
-        logging.info(f"Loaded {len(output_tensor)} images from {sub_input_dir}.")
-
-        logging.info(f"Encoding captions from {sub_input_dir}.")
-        conditions = []
-        empty_cond = clip.encode_from_tokens_scheduled(clip.tokenize(""))
-        for text in captions:
-            if text == "":
-                conditions.append(empty_cond)
-            tokens = clip.tokenize(text)
-            conditions.extend(clip.encode_from_tokens_scheduled(tokens))
-        logging.info(f"Encoded {len(conditions)} captions from {sub_input_dir}.")
-        return (output_tensor, conditions)
-
-
 def draw_loss_graph(loss_map, steps):
     width, height = 500, 300
     img = Image.new("RGB", (width, height), "white")
@@ -379,10 +245,14 @@ def draw_loss_graph(loss_map, steps):
     return img
 
 
-def find_all_highest_child_module_with_forward(model: torch.nn.Module, result = None, name = None):
+def find_all_highest_child_module_with_forward(
+    model: torch.nn.Module, result=None, name=None
+):
     if result is None:
         result = []
-    elif hasattr(model, "forward") and not isinstance(model, (torch.nn.ModuleList, torch.nn.Sequential, torch.nn.ModuleDict)):
+    elif hasattr(model, "forward") and not isinstance(
+        model, (torch.nn.ModuleList, torch.nn.Sequential, torch.nn.ModuleDict)
+    ):
         result.append(model)
         logging.debug(f"Found module with forward: {name} ({model.__class__.__name__})")
         return result
@@ -396,12 +266,13 @@ def patch(m):
     if not hasattr(m, "forward"):
         return
     org_forward = m.forward
+
     def fwd(args, kwargs):
         return org_forward(*args, **kwargs)
+
     def checkpointing_fwd(*args, **kwargs):
-        return torch.utils.checkpoint.checkpoint(
-            fwd, args, kwargs, use_reentrant=False
-        )
+        return torch.utils.checkpoint.checkpoint(fwd, args, kwargs, use_reentrant=False)
+
     m.org_forward = org_forward
     m.forward = checkpointing_fwd
 
@@ -412,130 +283,126 @@ def unpatch(m):
         del m.org_forward
 
 
-class TrainLoraNode:
+class TrainLoraNode(io.ComfyNode):
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": (IO.MODEL, {"tooltip": "The model to train the LoRA on."}),
-                "latents": (
-                    "LATENT",
-                    {
-                        "tooltip": "The Latents to use for training, serve as dataset/input of the model."
-                    },
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TrainLoraNode",
+            display_name="Train LoRA",
+            category="training",
+            is_experimental=True,
+            is_input_list=True,  # All inputs become lists
+            inputs=[
+                io.Model.Input("model", tooltip="The model to train the LoRA on."),
+                io.Latent.Input(
+                    "latents",
+                    tooltip="The Latents to use for training, serve as dataset/input of the model.",
                 ),
-                "positive": (
-                    IO.CONDITIONING,
-                    {"tooltip": "The positive conditioning to use for training."},
+                io.Conditioning.Input(
+                    "positive", tooltip="The positive conditioning to use for training."
                 ),
-                "batch_size": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 10000,
-                        "step": 1,
-                        "tooltip": "The batch size to use for training.",
-                    },
+                io.Int.Input(
+                    "batch_size",
+                    default=1,
+                    min=1,
+                    max=10000,
+                    tooltip="The batch size to use for training.",
                 ),
-                "grad_accumulation_steps": (
-                    IO.INT,
-                    {
-                        "default": 1,
-                        "min": 1,
-                        "max": 1024,
-                        "step": 1,
-                        "tooltip": "The number of gradient accumulation steps to use for training.",
-                    }
+                io.Int.Input(
+                    "grad_accumulation_steps",
+                    default=1,
+                    min=1,
+                    max=1024,
+                    tooltip="The number of gradient accumulation steps to use for training.",
                 ),
-                "steps": (
-                    IO.INT,
-                    {
-                        "default": 16,
-                        "min": 1,
-                        "max": 100000,
-                        "tooltip": "The number of steps to train the LoRA for.",
-                    },
+                io.Int.Input(
+                    "steps",
+                    default=16,
+                    min=1,
+                    max=100000,
+                    tooltip="The number of steps to train the LoRA for.",
                 ),
-                "learning_rate": (
-                    IO.FLOAT,
-                    {
-                        "default": 0.0005,
-                        "min": 0.0000001,
-                        "max": 1.0,
-                        "step": 0.000001,
-                        "tooltip": "The learning rate to use for training.",
-                    },
+                io.Float.Input(
+                    "learning_rate",
+                    default=0.0005,
+                    min=0.0000001,
+                    max=1.0,
+                    step=0.0000001,
+                    tooltip="The learning rate to use for training.",
                 ),
-                "rank": (
-                    IO.INT,
-                    {
-                        "default": 8,
-                        "min": 1,
-                        "max": 128,
-                        "tooltip": "The rank of the LoRA layers.",
-                    },
+                io.Int.Input(
+                    "rank",
+                    default=8,
+                    min=1,
+                    max=128,
+                    tooltip="The rank of the LoRA layers.",
                 ),
-                "optimizer": (
-                    ["AdamW", "Adam", "SGD", "RMSprop"],
-                    {
-                        "default": "AdamW",
-                        "tooltip": "The optimizer to use for training.",
-                    },
+                io.Combo.Input(
+                    "optimizer",
+                    options=["AdamW", "Adam", "SGD", "RMSprop"],
+                    default="AdamW",
+                    tooltip="The optimizer to use for training.",
                 ),
-                "loss_function": (
-                    ["MSE", "L1", "Huber", "SmoothL1"],
-                    {
-                        "default": "MSE",
-                        "tooltip": "The loss function to use for training.",
-                    },
+                io.Combo.Input(
+                    "loss_function",
+                    options=["MSE", "L1", "Huber", "SmoothL1"],
+                    default="MSE",
+                    tooltip="The loss function to use for training.",
                 ),
-                "seed": (
-                    IO.INT,
-                    {
-                        "default": 0,
-                        "min": 0,
-                        "max": 0xFFFFFFFFFFFFFFFF,
-                        "tooltip": "The seed to use for training (used in generator for LoRA weight initialization and noise sampling)",
-                    },
+                io.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    tooltip="The seed to use for training (used in generator for LoRA weight initialization and noise sampling)",
                 ),
-                "training_dtype": (
-                    ["bf16",  "fp32"],
-                    {"default": "bf16", "tooltip": "The dtype to use for training."},
+                io.Combo.Input(
+                    "training_dtype",
+                    options=["bf16", "fp32"],
+                    default="bf16",
+                    tooltip="The dtype to use for training.",
                 ),
-                "lora_dtype": (
-                    ["bf16", "fp32"],
-                    {"default": "bf16", "tooltip": "The dtype to use for lora."},
+                io.Combo.Input(
+                    "lora_dtype",
+                    options=["bf16", "fp32"],
+                    default="bf16",
+                    tooltip="The dtype to use for lora.",
                 ),
-                "algorithm": (
-                    list(adapter_maps.keys()),
-                    {"default": list(adapter_maps.keys())[0], "tooltip": "The algorithm to use for training."},
+                io.Combo.Input(
+                    "algorithm",
+                    options=list(adapter_maps.keys()),
+                    default=list(adapter_maps.keys())[0],
+                    tooltip="The algorithm to use for training.",
                 ),
-                "gradient_checkpointing": (
-                    IO.BOOLEAN,
-                    {
-                        "default": True,
-                        "tooltip": "Use gradient checkpointing for training.",
-                    }
+                io.Boolean.Input(
+                    "gradient_checkpointing",
+                    default=True,
+                    tooltip="Use gradient checkpointing for training.",
                 ),
-                "existing_lora": (
-                    folder_paths.get_filename_list("loras") + ["[None]"],
-                    {
-                        "default": "[None]",
-                        "tooltip": "The existing LoRA to append to. Set to None for new LoRA.",
-                    },
+                io.Combo.Input(
+                    "existing_lora",
+                    options=folder_paths.get_filename_list("loras") + ["[None]"],
+                    default="[None]",
+                    tooltip="The existing LoRA to append to. Set to None for new LoRA.",
                 ),
-            },
-        }
+            ],
+            outputs=[
+                io.Model.Output(
+                    display_name="model", tooltip="Model with LoRA applied"
+                ),
+                io.Custom("LORA_MODEL").Output(
+                    display_name="lora", tooltip="LoRA weights"
+                ),
+                io.Custom("LOSS_MAP").Output(
+                    display_name="loss_map", tooltip="Loss history"
+                ),
+                io.Int.Output(display_name="steps", tooltip="Total training steps"),
+            ],
+        )
 
-    RETURN_TYPES = (IO.MODEL, IO.LORA_MODEL, IO.LOSS_MAP, IO.INT)
-    RETURN_NAMES = ("model_with_lora", "lora", "loss", "steps")
-    FUNCTION = "train"
-    CATEGORY = "training"
-    EXPERIMENTAL = True
-
-    def train(
-        self,
+    @classmethod
+    def execute(
+        cls,
         model,
         latents,
         positive,
@@ -553,13 +420,74 @@ class TrainLoraNode:
         gradient_checkpointing,
         existing_lora,
     ):
+        # Extract scalars from lists (due to is_input_list=True)
+        model = model[0]
+        batch_size = batch_size[0]
+        steps = steps[0]
+        grad_accumulation_steps = grad_accumulation_steps[0]
+        learning_rate = learning_rate[0]
+        rank = rank[0]
+        optimizer = optimizer[0]
+        loss_function = loss_function[0]
+        seed = seed[0]
+        training_dtype = training_dtype[0]
+        lora_dtype = lora_dtype[0]
+        algorithm = algorithm[0]
+        gradient_checkpointing = gradient_checkpointing[0]
+        existing_lora = existing_lora[0]
+
+        # Handle latents - either single dict or list of dicts
+        if len(latents) == 1:
+            latents = latents[0]["samples"]  # Single latent dict
+        else:
+            latent_list = []
+            for latent in latents:
+                latent = latent["samples"]
+                bs = latent.shape[0]
+                if bs != 1:
+                    for sub_latent in latent:
+                        latent_list.append(sub_latent[None])
+                else:
+                    latent_list.append(latent)
+            latents = latent_list
+
+        # Handle conditioning - either single list or list of lists
+        if len(positive) == 1:
+            positive = positive[0]  # Single conditioning list
+        else:
+            # Multiple conditioning lists - flatten
+            flat_positive = []
+            for cond in positive:
+                if isinstance(cond, list):
+                    flat_positive.extend(cond)
+                else:
+                    flat_positive.append(cond)
+            positive = flat_positive
+
         mp = model.clone()
         dtype = node_helpers.string_to_torch_dtype(training_dtype)
         lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
         mp.set_model_compute_dtype(dtype)
 
-        latents = latents["samples"].to(dtype)
-        num_images = latents.shape[0]
+        # latents here can be list of different size latent or one large batch
+        if isinstance(latents, list):
+            all_shapes = set()
+            latents = [t.to(dtype) for t in latents]
+            for latent in latents:
+                all_shapes.add(latent.shape)
+            logging.info(f"Latent shapes: {all_shapes}")
+            if len(all_shapes) > 1:
+                multi_res = True
+            else:
+                multi_res = False
+                latents = torch.cat(latents, dim=0)
+            num_images = len(latents)
+        elif isinstance(latents, torch.Tensor):
+            latents = latents.to(dtype)
+            num_images = latents.shape[0]
+        else:
+            logging.error(f"Invalid latents type: {type(latents)}")
+
         logging.info(f"Total Images: {num_images}, Total Captions: {len(positive)}")
         if len(positive) == 1 and num_images > 1:
             positive = positive * num_images
@@ -591,9 +519,7 @@ class TrainLoraNode:
                         shape = m.weight.shape
                         if len(shape) >= 2:
                             alpha = float(existing_weights.get(f"{key}.alpha", 1.0))
-                            dora_scale = existing_weights.get(
-                                f"{key}.dora_scale", None
-                            )
+                            dora_scale = existing_weights.get(f"{key}.dora_scale", None)
                             for adapter_cls in adapters:
                                 existing_adapter = adapter_cls.load(
                                     n, existing_weights, alpha, dora_scale
@@ -605,7 +531,9 @@ class TrainLoraNode:
                                 adapter_cls = adapter_maps[algorithm]
 
                             if existing_adapter is not None:
-                                train_adapter = existing_adapter.to_train().to(lora_dtype)
+                                train_adapter = existing_adapter.to_train().to(
+                                    lora_dtype
+                                )
                             else:
                                 # Use LoRA with alpha=1.0 by default
                                 train_adapter = adapter_cls.create_train(
@@ -629,7 +557,9 @@ class TrainLoraNode:
                     if hasattr(m, "bias") and m.bias is not None:
                         key = "{}.bias".format(n)
                         bias = torch.nn.Parameter(
-                            torch.zeros(m.bias.shape, dtype=lora_dtype, requires_grad=True)
+                            torch.zeros(
+                                m.bias.shape, dtype=lora_dtype, requires_grad=True
+                            )
                         )
                         bias_module = BiasDiff(bias)
                         lora_sd["{}.diff_b".format(n)] = bias
@@ -657,24 +587,31 @@ class TrainLoraNode:
 
             # setup models
             if gradient_checkpointing:
-                for m in find_all_highest_child_module_with_forward(mp.model.diffusion_model):
+                for m in find_all_highest_child_module_with_forward(
+                    mp.model.diffusion_model
+                ):
                     patch(m)
             mp.model.requires_grad_(False)
-            comfy.model_management.load_models_gpu([mp], memory_required=1e20, force_full_load=True)
+            comfy.model_management.load_models_gpu(
+                [mp], memory_required=1e20, force_full_load=True
+            )
 
             # Setup sampler and guider like in test script
             loss_map = {"loss": []}
+
             def loss_callback(loss):
                 loss_map["loss"].append(loss)
+
             train_sampler = TrainSampler(
                 criterion,
                 optimizer,
                 loss_callback=loss_callback,
                 batch_size=batch_size,
                 grad_acc=grad_accumulation_steps,
-                total_steps=steps*grad_accumulation_steps,
+                total_steps=steps * grad_accumulation_steps,
                 seed=seed,
-                training_dtype=dtype
+                training_dtype=dtype,
+                real_dataset=latents if multi_res else None,
             )
             guider = comfy_extras.nodes_custom_sampler.Guider_Basic(mp)
             guider.set_conds(positive)  # Set conditioning from input
@@ -684,12 +621,15 @@ class TrainLoraNode:
                 # Generate dummy sigmas and noise
                 sigmas = torch.tensor(range(num_images))
                 noise = comfy_extras.nodes_custom_sampler.Noise_RandomNoise(seed)
+                if multi_res:
+                    # use first latent as dummy latent if multi_res
+                    latents = latents[0].repeat(num_images, 1, 1, 1)
                 guider.sample(
                     noise.generate_noise({"samples": latents}),
                     latents,
                     train_sampler,
                     sigmas,
-                    seed=noise.seed
+                    seed=noise.seed,
                 )
             finally:
                 for m in mp.model.modules():
@@ -702,111 +642,118 @@ class TrainLoraNode:
             for param in lora_sd:
                 lora_sd[param] = lora_sd[param].to(lora_dtype)
 
-            return (mp, lora_sd, loss_map, steps + existing_steps)
+            return io.NodeOutput(mp, lora_sd, loss_map, steps + existing_steps)
 
 
-class LoraModelLoader:
-    def __init__(self):
-        self.loaded_lora = None
+class LoraModelLoader(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LoraModelLoader",
+            display_name="Load LoRA Model",
+            category="loaders",
+            is_experimental=True,
+            inputs=[
+                io.Model.Input(
+                    "model", tooltip="The diffusion model the LoRA will be applied to."
+                ),
+                io.Custom("LORA_MODEL").Input(
+                    "lora", tooltip="The LoRA model to apply to the diffusion model."
+                ),
+                io.Float.Input(
+                    "strength_model",
+                    default=1.0,
+                    min=-100.0,
+                    max=100.0,
+                    tooltip="How strongly to modify the diffusion model. This value can be negative.",
+                ),
+            ],
+            outputs=[
+                io.Model.Output(
+                    display_name="model", tooltip="The modified diffusion model."
+                ),
+            ],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "model": ("MODEL", {"tooltip": "The diffusion model the LoRA will be applied to."}),
-                "lora": (IO.LORA_MODEL, {"tooltip": "The LoRA model to apply to the diffusion model."}),
-                "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01, "tooltip": "How strongly to modify the diffusion model. This value can be negative."}),
-            }
-        }
-
-    RETURN_TYPES = ("MODEL",)
-    OUTPUT_TOOLTIPS = ("The modified diffusion model.",)
-    FUNCTION = "load_lora_model"
-
-    CATEGORY = "loaders"
-    DESCRIPTION = "Load Trained LoRA weights from Train LoRA node."
-    EXPERIMENTAL = True
-
-    def load_lora_model(self, model, lora, strength_model):
+    def execute(cls, model, lora, strength_model):
         if strength_model == 0:
-            return (model, )
+            return io.NodeOutput(model)
 
-        model_lora, _ = comfy.sd.load_lora_for_models(model, None, lora, strength_model, 0)
-        return (model_lora, )
+        model_lora, _ = comfy.sd.load_lora_for_models(
+            model, None, lora, strength_model, 0
+        )
+        return io.NodeOutput(model_lora)
 
 
-class SaveLoRA:
-    def __init__(self):
-        self.output_dir = folder_paths.get_output_directory()
+class SaveLoRA(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="SaveLoRA",
+            display_name="Save LoRA Weights",
+            category="loaders",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                io.Custom("LORA_MODEL").Input(
+                    "lora",
+                    tooltip="The LoRA model to save. Do not use the model with LoRA layers.",
+                ),
+                io.String.Input(
+                    "prefix",
+                    default="loras/ComfyUI_trained_lora",
+                    tooltip="The prefix to use for the saved LoRA file.",
+                ),
+                io.Int.Input(
+                    "steps",
+                    optional=True,
+                    tooltip="Optional: The number of steps to LoRA has been trained for, used to name the saved file.",
+                ),
+            ],
+            outputs=[],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "lora": (
-                    IO.LORA_MODEL,
-                    {
-                        "tooltip": "The LoRA model to save. Do not use the model with LoRA layers."
-                    },
-                ),
-                "prefix": (
-                    "STRING",
-                    {
-                        "default": "loras/ComfyUI_trained_lora",
-                        "tooltip": "The prefix to use for the saved LoRA file.",
-                    },
-                ),
-            },
-            "optional": {
-                "steps": (
-                    IO.INT,
-                    {
-                        "forceInput": True,
-                        "tooltip": "Optional: The number of steps to LoRA has been trained for, used to name the saved file.",
-                    },
-                ),
-            },
-        }
-
-    RETURN_TYPES = ()
-    FUNCTION = "save"
-    CATEGORY = "loaders"
-    EXPERIMENTAL = True
-    OUTPUT_NODE = True
-
-    def save(self, lora, prefix, steps=None):
-        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(prefix, self.output_dir)
+    def execute(cls, lora, prefix, steps=None):
+        output_dir = folder_paths.get_output_directory()
+        full_output_folder, filename, counter, subfolder, filename_prefix = (
+            folder_paths.get_save_image_path(prefix, output_dir)
+        )
         if steps is None:
             output_checkpoint = f"{filename}_{counter:05}_.safetensors"
         else:
             output_checkpoint = f"{filename}_{steps}_steps_{counter:05}_.safetensors"
         output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
         safetensors.torch.save_file(lora, output_checkpoint)
-        return {}
+        return io.NodeOutput()
 
 
-class LossGraphNode:
-    def __init__(self):
-        self.output_dir = folder_paths.get_temp_directory()
+class LossGraphNode(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="LossGraphNode",
+            display_name="Plot Loss Graph",
+            category="training",
+            is_experimental=True,
+            is_output_node=True,
+            inputs=[
+                io.Custom("LOSS_MAP").Input(
+                    "loss", tooltip="Loss map from training node."
+                ),
+                io.String.Input(
+                    "filename_prefix",
+                    default="loss_graph",
+                    tooltip="Prefix for the saved loss graph image.",
+                ),
+            ],
+            outputs=[],
+            hidden=[io.Hidden.prompt, io.Hidden.extra_pnginfo],
+        )
 
     @classmethod
-    def INPUT_TYPES(s):
-        return {
-            "required": {
-                "loss": (IO.LOSS_MAP, {"default": {}}),
-                "filename_prefix": (IO.STRING, {"default": "loss_graph"}),
-            },
-            "hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
-        }
-
-    RETURN_TYPES = ()
-    FUNCTION = "plot_loss"
-    OUTPUT_NODE = True
-    CATEGORY = "training"
-    EXPERIMENTAL = True
-    DESCRIPTION = "Plots the loss graph and saves it to the output directory."
-
-    def plot_loss(self, loss, filename_prefix, prompt=None, extra_pnginfo=None):
+    def execute(cls, loss, filename_prefix, prompt=None, extra_pnginfo=None):
         loss_values = loss["loss"]
         width, height = 800, 480
         margin = 40
@@ -849,47 +796,27 @@ class LossGraphNode:
             (margin - 30, height - 10), f"{min_loss:.2f}", font=font, fill="black"
         )
 
-        metadata = None
-        if not args.disable_metadata:
-            metadata = PngInfo()
-            if prompt is not None:
-                metadata.add_text("prompt", json.dumps(prompt))
-            if extra_pnginfo is not None:
-                for x in extra_pnginfo:
-                    metadata.add_text(x, json.dumps(extra_pnginfo[x]))
+        # Convert PIL image to tensor for PreviewImage
+        img_array = np.array(img).astype(np.float32) / 255.0
+        img_tensor = torch.from_numpy(img_array)[None,]  # [1, H, W, 3]
 
-        date = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        img.save(
-            os.path.join(self.output_dir, f"{filename_prefix}_{date}.png"),
-            pnginfo=metadata,
-        )
-        return {
-            "ui": {
-                "images": [
-                    {
-                        "filename": f"{filename_prefix}_{date}.png",
-                        "subfolder": "",
-                        "type": "temp",
-                    }
-                ]
-            }
-        }
+        # Return preview UI
+        return io.NodeOutput(ui=ui.PreviewImage(img_tensor, cls=cls))
 
 
-NODE_CLASS_MAPPINGS = {
-    "TrainLoraNode": TrainLoraNode,
-    "SaveLoRANode": SaveLoRA,
-    "LoraModelLoader": LoraModelLoader,
-    "LoadImageSetFromFolderNode": LoadImageSetFromFolderNode,
-    "LoadImageTextSetFromFolderNode": LoadImageTextSetFromFolderNode,
-    "LossGraphNode": LossGraphNode,
-}
+# ========== Extension Setup ==========
 
-NODE_DISPLAY_NAME_MAPPINGS = {
-    "TrainLoraNode": "Train LoRA",
-    "SaveLoRANode": "Save LoRA Weights",
-    "LoraModelLoader": "Load LoRA Model",
-    "LoadImageSetFromFolderNode": "Load Image Dataset from Folder",
-    "LoadImageTextSetFromFolderNode": "Load Image and Text Dataset from Folder",
-    "LossGraphNode": "Plot Loss Graph",
-}
+
+class TrainingExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[io.ComfyNode]]:
+        return [
+            TrainLoraNode,
+            LoraModelLoader,
+            SaveLoRA,
+            LossGraphNode,
+        ]
+
+
+async def comfy_entrypoint() -> TrainingExtension:
+    return TrainingExtension()
diff --git a/nodes.py b/nodes.py
index f4835c02e..bf73eb90e 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2278,6 +2278,7 @@ async def init_builtin_extra_nodes():
         "nodes_images.py",
         "nodes_video_model.py",
         "nodes_train.py",
+        "nodes_dataset.py",
         "nodes_sag.py",
         "nodes_perpneg.py",
         "nodes_stable3d.py",

From eaf68c9b5bbfbcdac8988741f3948678c9465c1d Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 26 Nov 2025 16:25:32 -0800
Subject: [PATCH 24/24] Make lora training work on Z Image and remove some
 redundant nodes. (#10927)

---
 comfy/ldm/lumina/model.py     |   4 +-
 comfy_extras/nodes_dataset.py | 102 +---------------------------------
 2 files changed, 3 insertions(+), 103 deletions(-)

diff --git a/comfy/ldm/lumina/model.py b/comfy/ldm/lumina/model.py
index c8643eb82..565400b54 100644
--- a/comfy/ldm/lumina/model.py
+++ b/comfy/ldm/lumina/model.py
@@ -509,7 +509,7 @@ class NextDiT(nn.Module):
 
         if self.pad_tokens_multiple is not None:
             pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
-            cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
+            cap_feats = torch.cat((cap_feats, self.cap_pad_token.to(device=cap_feats.device, dtype=cap_feats.dtype, copy=True).unsqueeze(0).repeat(cap_feats.shape[0], pad_extra, 1)), dim=1)
 
         cap_pos_ids = torch.zeros(bsz, cap_feats.shape[1], 3, dtype=torch.float32, device=device)
         cap_pos_ids[:, :, 0] = torch.arange(cap_feats.shape[1], dtype=torch.float32, device=device) + 1.0
@@ -525,7 +525,7 @@ class NextDiT(nn.Module):
 
         if self.pad_tokens_multiple is not None:
             pad_extra = (-x.shape[1]) % self.pad_tokens_multiple
-            x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
+            x = torch.cat((x, self.x_pad_token.to(device=x.device, dtype=x.dtype, copy=True).unsqueeze(0).repeat(x.shape[0], pad_extra, 1)), dim=1)
             x_pos_ids = torch.nn.functional.pad(x_pos_ids, (0, 0, 0, pad_extra))
 
         freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)
diff --git a/comfy_extras/nodes_dataset.py b/comfy_extras/nodes_dataset.py
index b23867505..4789d7d53 100644
--- a/comfy_extras/nodes_dataset.py
+++ b/comfy_extras/nodes_dataset.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import math
 import json
 
 import numpy as np
@@ -624,79 +623,6 @@ class TextProcessingNode(io.ComfyNode):
 # ========== Image Transform Nodes ==========
 
 
-class ResizeImagesToSameSizeNode(ImageProcessingNode):
-    node_id = "ResizeImagesToSameSize"
-    display_name = "Resize Images to Same Size"
-    description = "Resize all images to the same width and height."
-    extra_inputs = [
-        io.Int.Input("width", default=512, min=1, max=8192, tooltip="Target width."),
-        io.Int.Input("height", default=512, min=1, max=8192, tooltip="Target height."),
-        io.Combo.Input(
-            "mode",
-            options=["stretch", "crop_center", "pad"],
-            default="stretch",
-            tooltip="Resize mode.",
-        ),
-    ]
-
-    @classmethod
-    def _process(cls, image, width, height, mode):
-        img = tensor_to_pil(image)
-
-        if mode == "stretch":
-            img = img.resize((width, height), Image.Resampling.LANCZOS)
-        elif mode == "crop_center":
-            left = max(0, (img.width - width) // 2)
-            top = max(0, (img.height - height) // 2)
-            right = min(img.width, left + width)
-            bottom = min(img.height, top + height)
-            img = img.crop((left, top, right, bottom))
-            if img.width != width or img.height != height:
-                img = img.resize((width, height), Image.Resampling.LANCZOS)
-        elif mode == "pad":
-            img.thumbnail((width, height), Image.Resampling.LANCZOS)
-            new_img = Image.new("RGB", (width, height), (0, 0, 0))
-            paste_x = (width - img.width) // 2
-            paste_y = (height - img.height) // 2
-            new_img.paste(img, (paste_x, paste_y))
-            img = new_img
-
-        return pil_to_tensor(img)
-
-
-class ResizeImagesToPixelCountNode(ImageProcessingNode):
-    node_id = "ResizeImagesToPixelCount"
-    display_name = "Resize Images to Pixel Count"
-    description = "Resize images so that the total pixel count matches the specified number while preserving aspect ratio."
-    extra_inputs = [
-        io.Int.Input(
-            "pixel_count",
-            default=512 * 512,
-            min=1,
-            max=8192 * 8192,
-            tooltip="Target pixel count.",
-        ),
-        io.Int.Input(
-            "steps",
-            default=64,
-            min=1,
-            max=128,
-            tooltip="The stepping for resize width/height.",
-        ),
-    ]
-
-    @classmethod
-    def _process(cls, image, pixel_count, steps):
-        img = tensor_to_pil(image)
-        w, h = img.size
-        pixel_count_ratio = math.sqrt(pixel_count / (w * h))
-        new_w = int(w * pixel_count_ratio / steps) * steps
-        new_h = int(h * pixel_count_ratio / steps) * steps
-        logging.info(f"Resizing from {w}x{h} to {new_w}x{new_h}")
-        img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-        return pil_to_tensor(img)
-
-
 class ResizeImagesByShorterEdgeNode(ImageProcessingNode):
     node_id = "ResizeImagesByShorterEdge"
     display_name = "Resize Images by Shorter Edge"
@@ -801,29 +727,6 @@ class RandomCropImagesNode(ImageProcessingNode):
         return pil_to_tensor(img)
 
 
-class FlipImagesNode(ImageProcessingNode):
-    node_id = "FlipImages"
-    display_name = "Flip Images"
-    description = "Flip all images horizontally or vertically."
-    extra_inputs = [
-        io.Combo.Input(
-            "direction",
-            options=["horizontal", "vertical"],
-            default="horizontal",
-            tooltip="Flip direction.",
-        ),
-    ]
-
-    @classmethod
-    def _process(cls, image, direction):
-        img = tensor_to_pil(image)
-        if direction == "horizontal":
-            img = img.transpose(Image.FLIP_LEFT_RIGHT)
-        else:
-            img = img.transpose(Image.FLIP_TOP_BOTTOM)
-        return pil_to_tensor(img)
-
-
 class NormalizeImagesNode(ImageProcessingNode):
     node_id = "NormalizeImages"
     display_name = "Normalize Images"
@@ -1470,7 +1373,7 @@ class LoadTrainingDataset(io.ComfyNode):
             shard_path = os.path.join(dataset_dir, shard_file)
 
             with open(shard_path, "rb") as f:
-                shard_data = torch.load(f)
+                shard_data = torch.load(f, weights_only=True)
 
             all_latents.extend(shard_data["latents"])
             all_conditioning.extend(shard_data["conditioning"])
@@ -1496,13 +1399,10 @@ class DatasetExtension(ComfyExtension):
             SaveImageDataSetToFolderNode,
             SaveImageTextDataSetToFolderNode,
             # Image transform nodes
-            ResizeImagesToSameSizeNode,
-            ResizeImagesToPixelCountNode,
             ResizeImagesByShorterEdgeNode,
             ResizeImagesByLongerEdgeNode,
             CenterCropImagesNode,
             RandomCropImagesNode,
-            FlipImagesNode,
             NormalizeImagesNode,
             AdjustBrightnessNode,
             AdjustContrastNode,