Merge branch 'Comfy-Org:master' into master

2026-06-26 01:39:25 +08:00 · 2026-06-05 12:57:52 +02:00 · 2026-06-05 12:57:52 +02:00 · 674c12a72c
commit 674c12a72c
parent 83f1dfed27 5aa71b9bc2
25 changed files with 6150 additions and 11754 deletions
--- a/comfy/ldm/ideogram4/model.py
+++ b/comfy/ldm/ideogram4/model.py
@ -0,0 +1,297 @@
 """
 The Ideogram 4 transformer is a NextDiT/Lumina2-family single-stream model
 consumes Qwen3-VL hidden-state features (concatenated from 13 layers -> 53248 dims)
 packs ``[text tokens, image tokens]`` into one sequence with block-diagonal segment attention and 3D interleaved MRoPE.
 """
 from __future__ import annotations
 import math
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import comfy.patcher_extension
 from comfy.ldm.lumina.model import FeedForward
 from comfy.ldm.modules.attention import optimized_attention_masked
 from comfy.text_encoders.llama import apply_rope, precompute_freqs_cis
 # Per-token role indicators
 SEQUENCE_PADDING_INDICATOR = -1
 OUTPUT_IMAGE_INDICATOR = 2
 LLM_TOKEN_INDICATOR = 3
 # Image grid coordinates are offset so they never collide with text positions
 IMAGE_POSITION_OFFSET = 65536
 class Ideogram4Attention(nn.Module):
    def __init__(self, hidden_size, num_heads, eps=1e-5, dtype=None, device=None, operations=None):
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = hidden_size // num_heads
        self.hidden_size = hidden_size
        self.qkv = operations.Linear(hidden_size, hidden_size * 3, bias=False, dtype=dtype, device=device)
        self.norm_q = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
        self.norm_k = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
        self.o = operations.Linear(hidden_size, hidden_size, bias=False, dtype=dtype, device=device)
    def forward(self, x, attn_mask, freqs_cis, transformer_options={}):
        batch_size, seq_len, _ = x.shape
        qkv = self.qkv(x).view(batch_size, seq_len, 3, self.num_heads, self.head_dim)
        q, k, v = qkv.unbind(dim=2)
        q = self.norm_q(q)
        k = self.norm_k(k)
        # (B, heads, L, head_dim)
        q = q.transpose(1, 2)
        k = k.transpose(1, 2)
        v = v.transpose(1, 2)
        q, k = apply_rope(q, k, freqs_cis)
        out = optimized_attention_masked(q, k, v, self.num_heads, attn_mask, skip_reshape=True, transformer_options=transformer_options)
        return self.o(out)
 class Ideogram4TransformerBlock(nn.Module):
    def __init__(self, hidden_size, intermediate_size, num_heads, norm_eps, adaln_dim, dtype=None, device=None, operations=None):
        super().__init__()
        self.attention = Ideogram4Attention(hidden_size, num_heads, eps=1e-5, dtype=dtype, device=device, operations=operations)
        self.feed_forward = FeedForward(
            dim=hidden_size, hidden_dim=intermediate_size, multiple_of=1, ffn_dim_multiplier=None,
            operation_settings={"operations": operations, "dtype": dtype, "device": device},
        )
        self.attention_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
        self.ffn_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
        self.attention_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
        self.ffn_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
        self.adaln_modulation = operations.Linear(adaln_dim, 4 * hidden_size, bias=True, dtype=dtype, device=device)
    def forward(self, x, attn_mask, freqs_cis, adaln_input, transformer_options={}):
        mod = self.adaln_modulation(adaln_input)
        scale_msa, gate_msa, scale_mlp, gate_mlp = mod.chunk(4, dim=-1)
        gate_msa = torch.tanh(gate_msa)
        gate_mlp = torch.tanh(gate_mlp)
        scale_msa = 1.0 + scale_msa
        scale_mlp = 1.0 + scale_mlp
        attn_out = self.attention(self.attention_norm1(x) * scale_msa, attn_mask, freqs_cis, transformer_options=transformer_options)
        x = x + gate_msa * self.attention_norm2(attn_out)
        x = x + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(x) * scale_mlp))
        return x
 def _sinusoidal_embedding(t, dim, scale=1e4):
    t = t.to(torch.float32)
    half = dim // 2
    freq = math.log(scale) / (half - 1)
    freq = torch.exp(torch.arange(half, dtype=torch.float32, device=t.device) * -freq)
    emb = t.unsqueeze(-1) * freq
    emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
    if dim % 2 == 1:
        emb = F.pad(emb, (0, 1))
    return emb
 class Ideogram4EmbedScalar(nn.Module):
    def __init__(self, dim, input_range=(0.0, 1.0), dtype=None, device=None, operations=None):
        super().__init__()
        self.dim = dim
        self.range_min, self.range_max = input_range
        self.mlp_in = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
        self.mlp_out = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
    def forward(self, x):
        x = x.to(torch.float32)
        scaled = 1e4 * (x - self.range_min) / (self.range_max - self.range_min)
        emb = _sinusoidal_embedding(scaled, self.dim)
        emb = emb.to(self.mlp_in.weight.dtype)
        emb = F.silu(self.mlp_in(emb))
        return self.mlp_out(emb)
 class Ideogram4FinalLayer(nn.Module):
    def __init__(self, hidden_size, out_channels, adaln_dim, dtype=None, device=None, operations=None):
        super().__init__()
        self.norm_final = operations.LayerNorm(hidden_size, eps=1e-6, elementwise_affine=False, dtype=dtype, device=device)
        self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
        self.adaln_modulation = operations.Linear(adaln_dim, hidden_size, bias=True, dtype=dtype, device=device)
    def forward(self, x, c):
        scale = 1.0 + self.adaln_modulation(F.silu(c))
        return self.linear(self.norm_final(x) * scale)
 class Ideogram4Transformer(nn.Module):
    """A single Ideogram 4 backbone operating on a packed token sequence."""
    def __init__(self, emb_dim, num_layers, num_heads, intermediate_size, adaln_dim,
                 in_channels, llm_features_dim, rope_theta, mrope_section, norm_eps,
                 dtype=None, device=None, operations=None):
        super().__init__()
        self.head_dim = emb_dim // num_heads
        self.rope_theta = rope_theta
        self.mrope_section = tuple(mrope_section)
        self.input_proj = operations.Linear(in_channels, emb_dim, bias=True, dtype=dtype, device=device)
        self.llm_cond_norm = operations.RMSNorm(llm_features_dim, eps=1e-6, elementwise_affine=True, dtype=dtype, device=device)
        self.llm_cond_proj = operations.Linear(llm_features_dim, emb_dim, bias=True, dtype=dtype, device=device)
        self.t_embedding = Ideogram4EmbedScalar(emb_dim, input_range=(0.0, 1.0), dtype=dtype, device=device, operations=operations)
        self.adaln_proj = operations.Linear(emb_dim, adaln_dim, bias=True, dtype=dtype, device=device)
        self.embed_image_indicator = operations.Embedding(2, emb_dim, dtype=dtype, device=device)
        self.layers = nn.ModuleList([
            Ideogram4TransformerBlock(emb_dim, intermediate_size, num_heads, norm_eps, adaln_dim,
                                      dtype=dtype, device=device, operations=operations)
            for _ in range(num_layers)
        ])
        self.final_layer = Ideogram4FinalLayer(emb_dim, in_channels, adaln_dim, dtype=dtype, device=device, operations=operations)
    def _backbone(self, llm_features, x, t, position_ids, attn_mask, indicator, transformer_options={}):
        indicator = indicator.to(torch.long)
        output_image_mask = (indicator == OUTPUT_IMAGE_INDICATOR).to(x.dtype).unsqueeze(-1)
        x = x * output_image_mask
        h = self.input_proj(x) * output_image_mask
        t_cond = self.t_embedding(t)
        if t.dim() == 1:
            t_cond = t_cond.unsqueeze(1)
        adaln_input = F.silu(self.adaln_proj(t_cond))
        # h is zero on the text rows (content lives only on image rows), add writes the text features in place
        if llm_features is not None:
            L_text = llm_features.shape[1]
            text_mask = (indicator[:, :L_text] == LLM_TOKEN_INDICATOR).to(x.dtype).unsqueeze(-1)
            llm = self.llm_cond_norm(llm_features * text_mask)
            llm = self.llm_cond_proj(llm) * text_mask
            h[:, :L_text] = h[:, :L_text] + llm
        h = h + self.embed_image_indicator((indicator == OUTPUT_IMAGE_INDICATOR).to(torch.long), out_dtype=h.dtype)
        # Qwen3-VL interleaved MRoPE; position_ids (B, L, 3) -> (3, L) (same across batch).
        freqs_cis = precompute_freqs_cis(
            self.head_dim, position_ids[0].transpose(0, 1), self.rope_theta,
            rope_dims=self.mrope_section, interleaved_mrope=True, device=position_ids.device,
        )
        if attn_mask is not None and attn_mask.dtype == torch.bool:
            attn_mask = torch.zeros_like(attn_mask, dtype=h.dtype).masked_fill_(~attn_mask, -torch.finfo(h.dtype).max)
        for layer in self.layers:
            h = layer(h, attn_mask, freqs_cis, adaln_input, transformer_options=transformer_options)
        return self.final_layer(h, adaln_input)
 class Ideogram4Transformer2DModel(Ideogram4Transformer):
    """Ideogram 4 single-stream DiT.
    Runs a packed ``[text, image]`` sequence when text context is supplied, or an image-only sequence when ``context is None``.
    """
    def __init__(self, image_model=None, in_channels=128, num_layers=34, num_attention_heads=18, attention_head_dim=256, intermediate_size=12288,
                 adaln_dim=512, llm_features_dim=53248, rope_theta=5000000, mrope_section=(24, 20, 20), norm_eps=1e-5,
                 dtype=None, device=None, operations=None, **kwargs):
        emb_dim = num_attention_heads * attention_head_dim
        super().__init__(
            emb_dim=emb_dim, num_layers=num_layers, num_heads=num_attention_heads,
            intermediate_size=intermediate_size, adaln_dim=adaln_dim, in_channels=in_channels,
            llm_features_dim=llm_features_dim, rope_theta=rope_theta, mrope_section=mrope_section,
            norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
        self.dtype = dtype
        self.in_channels = in_channels
        self.out_channels = in_channels
        # 128-dim token = patch (2x2) * ae_channels (32).
        self.patch_size = 2
        self.ae_channels = in_channels // (self.patch_size * self.patch_size)
    def _img_to_tokens(self, x):
        B, C, gh, gw = x.shape
        x = x.view(B, self.ae_channels, self.patch_size, self.patch_size, gh, gw)
        x = x.permute(0, 4, 5, 2, 3, 1)  # (B, gh, gw, pi, pj, c)
        return x.reshape(B, gh * gw, C)
    def _tokens_to_img(self, tokens, gh, gw):
        B = tokens.shape[0]
        C = tokens.shape[-1]
        x = tokens.reshape(B, gh, gw, self.patch_size, self.patch_size, self.ae_channels)
        x = x.permute(0, 5, 3, 4, 1, 2)  # (B, c, pi, pj, gh, gw)
        return x.reshape(B, C, gh, gw)
    def _image_position_ids(self, gh, gw, device):
        h_idx = torch.arange(gh, device=device).view(-1, 1).expand(gh, gw).reshape(-1)
        w_idx = torch.arange(gw, device=device).view(1, -1).expand(gh, gw).reshape(-1)
        t_idx = torch.zeros_like(h_idx)
        return torch.stack([t_idx, h_idx, w_idx], dim=1) + IMAGE_POSITION_OFFSET  # (L_img, 3)
    def _run_conditional(self, x_chunk, context_chunk, attn_mask_chunk, t_chunk, gh, gw, transformer_options):
        B = x_chunk.shape[0]
        device = x_chunk.device
        img_tokens = self._img_to_tokens(x_chunk)
        L_img = img_tokens.shape[1]
        L_text = context_chunk.shape[1]
        L = L_text + L_img
        latent_dim = img_tokens.shape[-1]
        x_full = torch.zeros(B, L, latent_dim, dtype=img_tokens.dtype, device=device)
        x_full[:, L_text:] = img_tokens
        text_pos = torch.arange(L_text, device=device).view(-1, 1).expand(L_text, 3)
        img_pos = self._image_position_ids(gh, gw, device)
        position_ids = torch.cat([text_pos, img_pos], dim=0).unsqueeze(0).expand(B, L, 3)
        indicator = torch.empty(B, L, dtype=torch.long, device=device)
        indicator[:, :L_text] = LLM_TOKEN_INDICATOR
        indicator[:, L_text:] = OUTPUT_IMAGE_INDICATOR
        attn_mask = None
        if attn_mask_chunk is not None:
            segment_ids = torch.ones(B, L, dtype=torch.long, device=device)
            pad = (attn_mask_chunk == 0)
            segment_ids[:, :L_text][pad] = SEQUENCE_PADDING_INDICATOR
            indicator[:, :L_text][pad] = 0
            # Block-diagonal mask from segment ids: (B, 1, L, L), True = attend.
            attn_mask = (segment_ids.unsqueeze(2) == segment_ids.unsqueeze(1)).unsqueeze(1)
        out = self._backbone(context_chunk, x_full, t_chunk, position_ids, attn_mask, indicator,
                             transformer_options=transformer_options)
        return self._tokens_to_img(out[:, L_text:], gh, gw)
    def _run_image_only(self, x_chunk, t_chunk, gh, gw, transformer_options):
        B = x_chunk.shape[0]
        device = x_chunk.device
        img_tokens = self._img_to_tokens(x_chunk)
        L_img = img_tokens.shape[1]
        position_ids = self._image_position_ids(gh, gw, device).unsqueeze(0).expand(B, L_img, 3)
        indicator = torch.full((B, L_img), OUTPUT_IMAGE_INDICATOR, dtype=torch.long, device=device)
        # Image-only sequence is a single segment -> no mask, full attention, no LLM context.
        out = self._backbone(None, img_tokens, t_chunk, position_ids, None, indicator, transformer_options=transformer_options)
        return self._tokens_to_img(out, gh, gw)
    def forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
            self._forward,
            self,
            comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options),
        ).execute(x, timesteps, context, attention_mask, transformer_options, **kwargs)
    def _forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
        bs, c, gh, gw = x.shape
        timesteps = 1.0 - timesteps
        # unconditional pass
        if context is None:
            return -self._run_image_only(x, timesteps, gh, gw, transformer_options)
        return -self._run_conditional(x, context, attention_mask, timesteps, gh, gw, transformer_options)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -56,6 +56,7 @@ import comfy.ldm.pixeldit.pid
 import comfy.ldm.ace.model
 import comfy.ldm.omnigen.omnigen2
 import comfy.ldm.qwen_image.model
 import comfy.ldm.ideogram4.model
 import comfy.ldm.kandinsky5.model
 import comfy.ldm.anima.model
 import comfy.ldm.ace.ace_step15
@ -2024,6 +2025,21 @@ class QwenImage(BaseModel):
            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
        return out
 class Ideogram4(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ideogram4.model.Ideogram4Transformer2DModel)
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
        attention_mask = kwargs.get("attention_mask", None)
        if attention_mask is not None:
            if torch.numel(attention_mask) != attention_mask.sum():
                out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
        return out
 class HunyuanImage21(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -857,6 +857,13 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["default_ref_method"] = "negative_index"
        return dit_config
    if '{}embed_image_indicator.weight'.format(key_prefix) in state_dict_keys:  # Ideogram 4
        dit_config = {}
        dit_config["image_model"] = "ideogram4"
        dit_config["in_channels"] = state_dict['{}input_proj.weight'.format(key_prefix)].shape[1]
        dit_config["num_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
        return dit_config
    if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5
        dit_config = {}
        model_dim = state_dict['{}visual_embeddings.in_layer.bias'.format(key_prefix)].shape[0]
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -58,6 +58,7 @@ import comfy.text_encoders.omnigen2
 import comfy.text_encoders.qwen_image
 import comfy.text_encoders.hunyuan_image
 import comfy.text_encoders.z_image
 import comfy.text_encoders.ideogram4
 import comfy.text_encoders.ovis
 import comfy.text_encoders.kandinsky5
 import comfy.text_encoders.jina_clip_2
@ -1312,6 +1313,7 @@ class CLIPType(Enum):
    COGVIDEOX = 27
    LENS = 28
    PIXELDIT = 29
    IDEOGRAM4 = 30
@ -1610,8 +1612,12 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.ovis.te(**llama_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.ovis.OvisTokenizer
        elif te_model == TEModel.QWEN3_8B:
-            clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
+            if clip_type == CLIPType.IDEOGRAM4:
-            clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
+                clip_target.clip = comfy.text_encoders.ideogram4.te(**llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Tokenizer
            else:
                clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
                clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
        elif te_model == TEModel.JINA_CLIP_2:
            clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper
            clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -24,6 +24,7 @@ import comfy.text_encoders.qwen_image
 import comfy.text_encoders.hunyuan_image
 import comfy.text_encoders.kandinsky5
 import comfy.text_encoders.z_image
 import comfy.text_encoders.ideogram4
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
 import comfy.text_encoders.longcat_image
@ -1755,6 +1756,44 @@ class Omnigen2(supported_models_base.BASE):
        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
        return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
 class Ideogram4(supported_models_base.BASE):
    unet_config = {
        "image_model": "ideogram4",
    }
    sampling_settings = {
        "multiplier": 1.0,
        "shift": 1.0,
    }
    memory_usage_factor = 11.6
    unet_extra_config = {
        "num_attention_heads": 18,
        "attention_head_dim": 256,
        "intermediate_size": 12288,
        "adaln_dim": 512,
        "llm_features_dim": 53248,
        "rope_theta": 5000000,
        "mrope_section": [24, 20, 20],
        "norm_eps": 1e-5,
    }
    latent_format = latent_formats.Flux2
    supported_inference_dtypes = [torch.bfloat16, torch.float32]
    vae_key_prefix = ["vae."]
    text_encoder_key_prefix = ["text_encoders."]
    def get_model(self, state_dict, prefix="", device=None):
        out = model_base.Ideogram4(self, device=device)
        return out
    def clip_target(self, state_dict={}):
        pref = self.text_encoder_key_prefix[0]
        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3vl_8b.transformer.".format(pref))
        return supported_models_base.ClipTarget(comfy.text_encoders.ideogram4.Ideogram4Tokenizer, comfy.text_encoders.ideogram4.te(**hunyuan_detect))
 class QwenImage(supported_models_base.BASE):
    unet_config = {
        "image_model": "qwen_image",
@ -2243,6 +2282,7 @@ models = [
    ACEStep15,
    Omnigen2,
    QwenImage,
    Ideogram4,
    Flux2,
    Lens,
    Kandinsky5Image,
--- a/comfy/text_encoders/ideogram4.py
+++ b/comfy/text_encoders/ideogram4.py
@ -0,0 +1,77 @@
 """Ideogram 4 text encoder: Qwen3-VL-8B language model, 13-layer tap.
 Ideogram 4 conditions on the concatenation of hidden states from 13 layers of
 Qwen3-VL (layers 0,3,...,33,35), giving a 4096*13 = 53248-dim feature per token.
 """
 import os
 from transformers import Qwen2Tokenizer
 import comfy.text_encoders.llama
 from comfy import sd1_clip
 # Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1.
 IDEOGRAM4_TAP_LAYERS = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 36]
 class Qwen3VLTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
        super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory,
                         embedding_size=4096, embedding_key='qwen3vl_8b', tokenizer_class=Qwen2Tokenizer,
                         has_start_token=False, has_end_token=False, pad_to_max_length=False,
                         max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
 class Ideogram4Tokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data,
                         name="qwen3vl_8b", tokenizer=Qwen3VLTokenizer)
        self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs):
        if llama_template is None:
            llama_text = self.llama_template.format(text)
        else:
            llama_text = llama_template.format(text)
        return super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
 # Qwen3-VL-8B = 5e6 (vs plain Qwen3-8B's 1e6)
 # final_norm/lm_head off -> Ideogram only reads raw tapped hidden states
 QWEN3VL_8B_CONFIG = {"rope_theta": 5000000.0, "final_norm": False, "lm_head": False}
 class Qwen3VL8BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="hidden", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
        super().__init__(device=device, layer=IDEOGRAM4_TAP_LAYERS, layer_idx=None,
                         textmodel_json_config=dict(QWEN3VL_8B_CONFIG),
                         dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False,
                         model_class=comfy.text_encoders.llama.Qwen3_8B,
                         enable_attention_masks=attention_mask, return_attention_masks=attention_mask,
                         model_options=model_options)
 class Ideogram4TEModel(sd1_clip.SD1ClipModel):
    def __init__(self, device="cpu", dtype=None, model_options={}):
        super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=Qwen3VL8BModel, model_options=model_options)
    def encode_token_weights(self, token_weight_pairs):
        out, pooled, extra = super().encode_token_weights(token_weight_pairs)
        b, n, seq, h = out.shape # (B, n_taps=13, seq, 4096) stacked in ascending layer order.
        out = out.permute(0, 2, 3, 1).reshape(b, seq, h * n) # (B, seq, 4096*13). permute -> (B, seq, H, taps).
        return out, pooled, extra
 def te(dtype_llama=None, llama_quantization_metadata=None):
    class Ideogram4TEModel_(Ideogram4TEModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
            if dtype_llama is not None:
                dtype = dtype_llama
            if llama_quantization_metadata is not None:
                model_options = model_options.copy()
                model_options["quantization_metadata"] = llama_quantization_metadata
            super().__init__(device=device, dtype=dtype, model_options=model_options)
    return Ideogram4TEModel_
--- a/comfy_api/latest/_ui.py
+++ b/comfy_api/latest/_ui.py
@ -285,7 +285,7 @@ class AudioSaveHelper:
        results = []
        for batch_number, waveform in enumerate(audio["waveform"].cpu()):
            filename_with_batch_num = filename.replace("%batch_num%", str(batch_number))
-            file = f"{filename_with_batch_num}_{counter:05}_.{format}"
+            file = f"{filename_with_batch_num}_{counter:05}.{format}"
            output_path = os.path.join(full_output_folder, file)
            # Use original sample rate initially
--- a/comfy_api_nodes/apis/bfl.py
+++ b/comfy_api_nodes/apis/bfl.py
@ -43,6 +43,7 @@ class BFLFluxEraseRequest(BaseModel):
        "white (255) marks areas to remove, black (0) marks areas to preserve.",
    )
    dilate_pixels: int = Field(10)
    seed: int | None = Field(None)
    output_format: str = Field("png")
--- a/comfy_api_nodes/apis/bria.py
+++ b/comfy_api_nodes/apis/bria.py
@ -97,3 +97,28 @@ class BriaRemoveVideoBackgroundResult(BaseModel):
 class BriaRemoveVideoBackgroundResponse(BaseModel):
    status: str = Field(...)
    result: BriaRemoveVideoBackgroundResult | None = Field(None)
 class BriaVideoGreenScreenRequest(BaseModel):
    video: str = Field(..., description="Publicly accessible URL of the input video.")
    green_shade: str = Field(
        default="broadcast_green",
        description="Solid chroma-key shade applied behind the foreground "
        "(broadcast_green, chroma_green, or blue_screen).",
    )
    output_container_and_codec: str = Field(...)
    preserve_audio: bool = Field(True)
    seed: int = Field(...)
 class BriaVideoReplaceBackgroundRequest(BaseModel):
    video: str = Field(..., description="Publicly accessible URL of the input (foreground) video.")
    background_url: str = Field(
        ...,
        description="Publicly accessible URL of the background image or video to composite behind "
        "the foreground. Stretched to the foreground frame; match its aspect ratio for "
        "undistorted results.",
    )
    output_container_and_codec: str = Field(...)
    preserve_audio: bool = Field(True)
    seed: int = Field(...)
--- a/comfy_api_nodes/apis/ideogram.py
+++ b/comfy_api_nodes/apis/ideogram.py
@ -290,3 +290,19 @@ class IdeogramV3Request(BaseModel):
        None,
        description='Optional masks for character reference images. When provided, must match the number of character_reference_images. Each mask should be a grayscale image of the same dimensions as the corresponding character reference image. The images should be in JPEG, PNG or WebP format.'
    )
 class IdeogramV4Request(BaseModel):
    text_prompt: str | None = Field(
        None,
        description="Natural-language prompt; Magic Prompt is applied automatically. "
        "Supply exactly one of text_prompt or json_prompt.",
    )
    json_prompt: dict[str, Any] | None = Field(
        None,
        description="Structured V4 prompt object consumed directly (disables Magic Prompt). "
        "Supply exactly one of text_prompt or json_prompt.",
    )
    resolution: str | None = Field(None, description="Output resolution in WIDTHxHEIGHT (e.g. '2048x2048').")
    rendering_speed: str | None = Field(None, description="Rendering speed: 'TURBO', 'DEFAULT', or 'QUALITY'.")
    enable_copyright_detection: bool | None = Field(None, description="Opt into post-generation copyright detection.")
--- a/comfy_api_nodes/nodes_bfl.py
+++ b/comfy_api_nodes/nodes_bfl.py
@ -534,6 +534,15 @@ class FluxEraseNode(IO.ComfyNode):
                    max=25,
                    tooltip="Expands the mask boundaries to ensure clean coverage of the object's edges.",
                ),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    control_after_generate=True,
                    tooltip="The random seed used for creating the noise.",
                    optional=True,
                ),
            ],
            outputs=[IO.Image.Output()],
            hidden=[
@ -553,6 +562,7 @@ class FluxEraseNode(IO.ComfyNode):
        image: Input.Image,
        mask: Input.Image,
        dilate_pixels: int = 10,
        seed: int = 0,
    ) -> IO.NodeOutput:
        validate_image_dimensions(image, min_width=256, min_height=256)
        mask = resize_mask_to_image(mask, image)
@ -565,6 +575,7 @@ class FluxEraseNode(IO.ComfyNode):
                image=tensor_to_base64_string(image[:, :, :, :3]),  # make sure image will have alpha channel removed
                mask=mask,
                dilate_pixels=dilate_pixels,
                seed=seed,
            ),
        )
--- a/comfy_api_nodes/nodes_bria.py
+++ b/comfy_api_nodes/nodes_bria.py
@ -1,14 +1,19 @@
 import av
 import torch
 from av.codec import CodecContext
 from typing_extensions import override
 from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis.bria import (
    BriaEditImageRequest,
    BriaImageEditResponse,
    BriaRemoveBackgroundRequest,
    BriaRemoveBackgroundResponse,
    BriaRemoveVideoBackgroundRequest,
    BriaRemoveVideoBackgroundResponse,
    BriaImageEditResponse,
    BriaStatusResponse,
    BriaVideoGreenScreenRequest,
    BriaVideoReplaceBackgroundRequest,
    InputModerationSettings,
 )
 from comfy_api_nodes.util import (
@ -316,6 +321,248 @@ class BriaRemoveVideoBackground(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
 class BriaVideoGreenScreen(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="BriaVideoGreenScreen",
            display_name="Bria Video Green Screen",
            category="partner/video/Bria",
            description="Replace a video's background with a solid chroma-key screen using Bria.",
            inputs=[
                IO.Video.Input("video"),
                IO.Combo.Input(
                    "green_shade",
                    options=["broadcast_green", "chroma_green", "blue_screen"],
                    tooltip="Solid chroma-key shade applied behind the foreground: "
                    "broadcast_green (#00B140), chroma_green (#00FF00), or blue_screen (#0000FF).",
                ),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed controls whether the node should re-run; "
                    "results are non-deterministic regardless of seed.",
                ),
            ],
            outputs=[IO.Video.Output()],
            hidden=[
                IO.Hidden.auth_token_comfy_org,
                IO.Hidden.api_key_comfy_org,
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
            ),
        )
    @classmethod
    async def execute(
        cls,
        video: Input.Video,
        green_shade: str,
        seed: int,
    ) -> IO.NodeOutput:
        validate_video_duration(video, max_duration=60.0)
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/bria/v2/video/edit/green_screen", method="POST"),
            data=BriaVideoGreenScreenRequest(
                video=await upload_video_to_comfyapi(cls, video),
                green_shade=green_shade,
                output_container_and_codec="mp4_h264",
                seed=seed,
            ),
            response_model=BriaStatusResponse,
        )
        response = await poll_op(
            cls,
            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
            status_extractor=lambda r: r.status,
            response_model=BriaRemoveVideoBackgroundResponse,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
 class BriaVideoReplaceBackground(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="BriaVideoReplaceBackground",
            display_name="Bria Video Replace Background",
            category="partner/video/Bria",
            description="Replace a video's background with a supplied image or video using Bria. "
            "The output keeps the foreground's resolution and frame rate; a background with a "
            "different aspect ratio is stretched to fit, so match it for undistorted results.",
            inputs=[
                IO.Video.Input("video", tooltip="Foreground video whose background is replaced."),
                IO.Image.Input(
                    "background_image",
                    optional=True,
                    tooltip="Background image to composite behind the foreground. "
                    "Provide either a background image or a background video, not both.",
                ),
                IO.Video.Input(
                    "background_video",
                    optional=True,
                    tooltip="Background video to composite behind the foreground. "
                    "Provide either a background image or a background video, not both.",
                ),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed controls whether the node should re-run; "
                    "results are non-deterministic regardless of seed.",
                ),
            ],
            outputs=[IO.Video.Output()],
            hidden=[
                IO.Hidden.auth_token_comfy_org,
                IO.Hidden.api_key_comfy_org,
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
            ),
        )
    @classmethod
    async def execute(
        cls,
        video: Input.Video,
        seed: int,
        background_image: Input.Image | None = None,
        background_video: Input.Video | None = None,
    ) -> IO.NodeOutput:
        if (background_image is None) == (background_video is None):
            raise ValueError("Provide either a background image or a background video, not both.")
        validate_video_duration(video, max_duration=60.0)
        if background_video is not None:
            validate_video_duration(background_video, max_duration=60.0)
            background_url = await upload_video_to_comfyapi(cls, background_video, wait_label="Uploading background")
        else:
            background_url = await upload_image_to_comfyapi(cls, background_image, wait_label="Uploading background")
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/bria/v2/video/edit/replace_background", method="POST"),
            data=BriaVideoReplaceBackgroundRequest(
                video=await upload_video_to_comfyapi(cls, video),
                background_url=background_url,
                output_container_and_codec="mp4_h264",
                seed=seed,
            ),
            response_model=BriaStatusResponse,
        )
        response = await poll_op(
            cls,
            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
            status_extractor=lambda r: r.status,
            response_model=BriaRemoveVideoBackgroundResponse,
        )
        return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
 def _video_to_images_and_mask(video: Input.Video) -> tuple[Input.Image, Input.Mask]:
    """Decode a transparent webm (VP9 + alpha) into image frames and an alpha mask.
    VP9 keeps its alpha in a side layer that PyAV's default vp9 decoder drops, so the frames
    are decoded with libvpx-vp9. Returns RGB images [B,H,W,3] in 0..1 and a mask [B,H,W]
    following the Load Image convention (1 = transparent) for compositing or Save WEBM.
    """
    rgb_frames: list[torch.Tensor] = []
    alpha_frames: list[torch.Tensor] = []
    with av.open(video.get_stream_source(), mode="r") as container:
        stream = container.streams.video[0]
        decoder = CodecContext.create("libvpx-vp9", "r") if stream.codec_context.name == "vp9" else None
        for packet in container.demux(stream):
            for frame in (decoder.decode(packet) if decoder is not None else packet.decode()):
                rgba = torch.from_numpy(frame.to_ndarray(format="rgba")).float() / 255.0
                rgb_frames.append(rgba[..., :3])
                alpha_frames.append(rgba[..., 3])
    images = torch.stack(rgb_frames) if rgb_frames else torch.zeros(0, 0, 0, 3)
    mask = (1.0 - torch.stack(alpha_frames)) if alpha_frames else torch.zeros((images.shape[0], 64, 64))
    return images, mask
 class BriaTransparentVideoBackground(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="BriaTransparentVideoBackground",
            display_name="Bria Remove Video Background (Transparent)",
            category="partner/video/Bria",
            description="Remove the background from a video using Bria and return the cut-out frames "
            "plus an alpha mask. Connect both to a compositing node, or feed them to Save WEBM to "
            "write a transparent video.",
            inputs=[
                IO.Video.Input("video"),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="Seed controls whether the node should re-run; "
                    "results are non-deterministic regardless of seed.",
                ),
            ],
            outputs=[
                IO.Image.Output(display_name="images"),
                IO.Mask.Output(display_name="mask"),
            ],
            hidden=[
                IO.Hidden.auth_token_comfy_org,
                IO.Hidden.api_key_comfy_org,
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
                expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
            ),
        )
    @classmethod
    async def execute(
        cls,
        video: Input.Video,
        seed: int,
    ) -> IO.NodeOutput:
        validate_video_duration(video, max_duration=60.0)
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"),
            data=BriaRemoveVideoBackgroundRequest(
                video=await upload_video_to_comfyapi(cls, video),
                background_color="Transparent",
                output_container_and_codec="webm_vp9",
                seed=seed,
            ),
            response_model=BriaStatusResponse,
        )
        response = await poll_op(
            cls,
            ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
            status_extractor=lambda r: r.status,
            response_model=BriaRemoveVideoBackgroundResponse,
        )
        video_out = await download_url_to_video_output(response.result.video_url)
        images, mask = _video_to_images_and_mask(video_out)
        return IO.NodeOutput(images, mask)
 class BriaExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -323,6 +570,9 @@ class BriaExtension(ComfyExtension):
            BriaImageEditNode,
            BriaRemoveImageBackground,
            BriaRemoveVideoBackground,
            BriaVideoGreenScreen,
            # BriaVideoReplaceBackground,  # server returns Status 500 when we pass background video
            BriaTransparentVideoBackground,
        ]
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@ -7,6 +7,7 @@ from io import BytesIO
 import torch
 from typing_extensions import override
 from comfy.utils import common_upscale
 from comfy_api.latest import IO, ComfyExtension, Input, Types
 from comfy_api_nodes.apis.bytedance import (
    RECOMMENDED_PRESETS,
@ -131,6 +132,44 @@ def _prepare_seedance_image(image: Input.Image) -> Input.Image:
    return image
 # Supported output aspect ratios, used to pre-size FLF frames to matching pixel pair to avoid the 1080p stretch jump.
 SEEDANCE2_RATIO_WH = {
    "16:9": (16, 9),
    "4:3": (4, 3),
    "1:1": (1, 1),
    "3:4": (3, 4),
    "9:16": (9, 16),
    "21:9": (21, 9),
 }
 SEEDANCE2_RES_SHORT_SIDE = {"480p": 480, "720p": 720, "1080p": 1080}
 def _seedance2_target_dims(resolution: str, ratio: str, image: torch.Tensor) -> tuple[int, int]:
    """Exact supported output (width, height) for (resolution, ratio).
    The shorter side equals the resolution number (e.g. 1080p 16:9 -> 1920x1080). For ratio
    "adaptive" (or any unexpected value) the ratio is derived from the image's own aspect, snapped
    to the nearest supported ratio, so the output keeps the frame's orientation.
    """
    short = SEEDANCE2_RES_SHORT_SIDE[resolution]
    if ratio not in SEEDANCE2_RATIO_WH:
        aspect = image.shape[-2] / image.shape[-3]  # W / H; tensor is (B, H, W, C)
        ratio = min(SEEDANCE2_RATIO_WH, key=lambda k: abs(SEEDANCE2_RATIO_WH[k][0] / SEEDANCE2_RATIO_WH[k][1] - aspect))
    rw, rh = SEEDANCE2_RATIO_WH[ratio]
    if rw >= rh:  # landscape or square: shorter side is the height
        out_w, out_h = round(short * rw / rh), short
    else:  # portrait: shorter side is the width
        out_w, out_h = short, round(short * rh / rw)
    return out_w - out_w % 2, out_h - out_h % 2
 def _resize_to_exact(image: torch.Tensor, width: int, height: int) -> torch.Tensor:
    """Center-crop to the target aspect and resize to exactly width x height (lanczos)."""
    samples = image.movedim(-1, 1)  # (B, H, W, C) -> (B, C, H, W)
    resized = common_upscale(samples, width, height, "lanczos", "center")
    return resized.movedim(1, -1)
 async def _resolve_reference_assets(
    cls: type[IO.ComfyNode],
    asset_ids: list[str],
@ -1790,10 +1829,28 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
        if last_frame is not None and last_frame_asset_id:
            raise ValueError("Provide only one of last_frame or last_frame_asset_id, not both.")
-        if first_frame is not None:
+        request_ratio = model["ratio"]
-            first_frame = _prepare_seedance_image(first_frame)
+        if first_frame_asset_id or last_frame_asset_id:
-        if last_frame is not None:
+            if first_frame is not None:
-            last_frame = _prepare_seedance_image(last_frame)
+                first_frame = _prepare_seedance_image(first_frame)
            if last_frame is not None:
                last_frame = _prepare_seedance_image(last_frame)
        else:
            # The 1080p FLF stretch fix (pre-size frames to a supported pixel pair + submit ratio="adaptive")
            # only applies to local image inputs we can resize.
            request_ratio = "adaptive"
            target_dims: tuple[int, int] | None = None
            if first_frame is not None:
                validate_image_aspect_ratio(first_frame, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
                validate_image_dimensions(first_frame, min_width=300, min_height=300)
                target_dims = _seedance2_target_dims(model["resolution"], model["ratio"], first_frame)
                first_frame = _resize_to_exact(first_frame, *target_dims)
            if last_frame is not None:
                validate_image_aspect_ratio(last_frame, (2, 5), (5, 2), strict=False)  # 0.4 to 2.5
                validate_image_dimensions(last_frame, min_width=300, min_height=300)
                if target_dims is None:
                    target_dims = _seedance2_target_dims(model["resolution"], model["ratio"], last_frame)
                last_frame = _resize_to_exact(last_frame, *target_dims)
        asset_ids_to_resolve = [a for a in (first_frame_asset_id, last_frame_asset_id) if a]
        image_assets: dict[str, str] = {}
@ -1844,7 +1901,7 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
                content=content,
                generate_audio=model["generate_audio"],
                resolution=model["resolution"],
-                ratio=model["ratio"],
+                ratio=request_ratio,
                duration=model["duration"],
                seed=seed,
                watermark=watermark,
--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@ -10,6 +10,7 @@ from comfy_api_nodes.apis.ideogram import (
    ImageRequest,
    IdeogramV3Request,
    IdeogramV3EditRequest,
    IdeogramV4Request,
 )
 from comfy_api_nodes.util import (
    ApiEndpoint,
@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
    download_url_as_bytesio,
    resize_mask_to_image,
    sync_op,
    validate_string,
 )
 V1_V1_RES_MAP = {
@ -798,6 +800,119 @@ class IdeogramV3(IO.ComfyNode):
        return IO.NodeOutput(await download_and_process_images(image_urls))
 class IdeogramV4(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="IdeogramV4",
            display_name="Ideogram V4",
            category="partner/image/Ideogram",
            description="Generates images using the Ideogram 4.0 model from a text prompt.",
            inputs=[
                IO.String.Input(
                    "prompt",
                    multiline=True,
                    default="",
                    tooltip="Text prompt for the image generation.",
                ),
                IO.Combo.Input(
                    "resolution",
                    options=[
                        "Auto",
                        "2048x2048 (1:1)",
                        "1440x2880 (1:2)",
                        "2880x1440 (2:1)",
                        "1664x2496 (2:3)",
                        "2496x1664 (3:2)",
                        "1792x2240 (4:5)",
                        "2240x1792 (5:4)",
                        "1440x2560 (9:16)",
                        "2560x1440 (16:9)",
                        "1600x2560 (5:8)",
                        "2560x1600 (8:5)",
                        "1728x2304 (3:4)",
                        "2304x1728 (4:3)",
                        "1296x3168 (9:22)",
                        "3168x1296 (22:9)",
                        "1152x2944 (9:23)",
                        "2944x1152 (23:9)",
                        "1248x3328 (3:8)",
                        "3328x1248 (8:3)",
                        "1280x3072 (5:12)",
                        "3072x1280 (12:5)",
                    ],
                    default="Auto",
                ),
                IO.Combo.Input(
                    "rendering_speed",
                    options=["DEFAULT", "TURBO", "QUALITY"],
                    default="DEFAULT",
                    tooltip="Controls the trade-off between generation speed and quality.",
                ),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
                    control_after_generate=True,
                    display_mode=IO.NumberDisplay.number,
                ),
            ],
            outputs=[
                IO.Image.Output(),
            ],
            hidden=[
                IO.Hidden.auth_token_comfy_org,
                IO.Hidden.api_key_comfy_org,
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
                depends_on=IO.PriceBadgeDepends(widgets=["rendering_speed"]),
                expr="""
                (
                  $speed := widgets.rendering_speed;
                  $price :=
                    $contains($speed,"turbo") ? 0.0429 :
                    $contains($speed,"quality") ? 0.143 :
                    0.0858;
                  {"type":"usd","usd": $price}
                )
                """,
            ),
        )
    @classmethod
    async def execute(
        cls,
        prompt: str,
        resolution: str,
        rendering_speed: str,
        seed: int,
    ):
        validate_string(prompt, strip_whitespace=True, min_length=1)
        response = await sync_op(
            cls,
            ApiEndpoint(path="/proxy/ideogram/ideogram-v4/generate", method="POST"),
            response_model=IdeogramGenerateResponse,
            data=IdeogramV4Request(
                text_prompt=prompt,
                resolution=resolution.split(" ")[0] if resolution != "Auto" else None,
                rendering_speed=rendering_speed,
            ),
            max_retries=1,
        )
        if not response.data or len(response.data) == 0:
            raise Exception("No images were generated in the response")
        image_urls = [image_data.url for image_data in response.data if image_data.url]
        if not image_urls:
            raise Exception("No image URLs were generated in the response")
        return IO.NodeOutput(await download_and_process_images(image_urls))
 class IdeogramExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -805,6 +920,7 @@ class IdeogramExtension(ComfyExtension):
            IdeogramV1,
            IdeogramV2,
            IdeogramV3,
            IdeogramV4,
        ]
--- a/comfy_api_nodes/nodes_krea.py
+++ b/comfy_api_nodes/nodes_krea.py
@ -42,9 +42,11 @@ async def _upload_image_to_krea_assets(cls: type[IO.ComfyNode], image: Input.Ima
 _MODEL_MEDIUM = "Krea 2 Medium"
 _MODEL_MEDIUM_TURBO = "Krea 2 Medium Turbo"
 _MODEL_LARGE = "Krea 2 Large"
 _MODEL_ENDPOINTS: dict[str, str] = {
    _MODEL_MEDIUM: "/proxy/krea/generate/image/krea/krea-2/medium",
    _MODEL_MEDIUM_TURBO: "/proxy/krea/generate/image/krea/krea-2/medium-turbo",
    _MODEL_LARGE: "/proxy/krea/generate/image/krea/krea-2/large",
 }
@ -57,7 +59,7 @@ _UUID_RE = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F
 def _krea_model_inputs() -> list:
-    """Nested inputs shared by both Krea 2 Medium and Large under the DynamicCombo."""
+    """Nested inputs shared by Krea 2 Medium, Medium Turbo and Large under the DynamicCombo."""
    return [
        IO.Combo.Input(
            "aspect_ratio",
@ -123,6 +125,7 @@ class Krea2ImageNode(IO.ComfyNode):
                    "model",
                    options=[
                        IO.DynamicCombo.Option(_MODEL_MEDIUM, _krea_model_inputs()),
                        IO.DynamicCombo.Option(_MODEL_MEDIUM_TURBO, _krea_model_inputs()),
                        IO.DynamicCombo.Option(_MODEL_LARGE, _krea_model_inputs()),
                    ],
                    tooltip="Krea 2 Medium is best for expressive illustrations; "
@ -151,14 +154,15 @@ class Krea2ImageNode(IO.ComfyNode):
                ),
                expr="""
                (
-                  $isLarge := widgets.model = "krea 2 large";
+                  $rates := {
                    "krea 2 medium turbo": {"text": 0.015, "style": 0.0175, "moodboard": 0.02},
                    "krea 2 medium": {"text": 0.03, "style": 0.035, "moodboard": 0.04},
                    "krea 2 large": {"text": 0.06, "style": 0.065, "moodboard": 0.07}
                  };
                  $r := $lookup($rates, widgets.model);
                  $hasMoodboard := $length($lookup(widgets, "model.moodboard_id")) > 0;
                  $hasStyle := $lookup(inputs, "model.style_reference").connected;
-                  $usd := $hasMoodboard
+                  $usd := $hasMoodboard ? $r.moodboard : ($hasStyle ? $r.style : $r.text);
                    ? ($isLarge ? 0.07 : 0.04)
                    : ($hasStyle
                        ? ($isLarge ? 0.065 : 0.035)
                        : ($isLarge ? 0.06 : 0.03));
                  {"type":"usd","usd": $usd}
                )
                """,
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@ -158,7 +158,7 @@ class SaveAudio(IO.ComfyNode):
        return IO.Schema(
            node_id="SaveAudio",
            search_aliases=["export flac"],
-            display_name="Save Audio (FLAC)",
+            display_name="Save Audio (FLAC) (Deprecated)",
            category="audio",
            essentials_category="Audio",
            inputs=[
@ -167,6 +167,7 @@ class SaveAudio(IO.ComfyNode):
            ],
            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
            is_output_node=True,
            is_deprecated=True,
        )
    @classmethod
@ -186,7 +187,7 @@ class SaveAudioMP3(IO.ComfyNode):
        return IO.Schema(
            node_id="SaveAudioMP3",
            search_aliases=["export mp3"],
-            display_name="Save Audio (MP3)",
+            display_name="Save Audio (MP3) (Deprecated)",
            category="audio",
            essentials_category="Audio",
            inputs=[
@ -196,6 +197,7 @@ class SaveAudioMP3(IO.ComfyNode):
            ],
            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
            is_output_node=True,
            is_deprecated=True,
        )
    @classmethod
@ -217,7 +219,7 @@ class SaveAudioOpus(IO.ComfyNode):
        return IO.Schema(
            node_id="SaveAudioOpus",
            search_aliases=["export opus"],
-            display_name="Save Audio (Opus)",
+            display_name="Save Audio (Opus) (Deprecated)",
            category="audio",
            inputs=[
                IO.Audio.Input("audio"),
@ -226,6 +228,7 @@ class SaveAudioOpus(IO.ComfyNode):
            ],
            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
            is_output_node=True,
            is_deprecated=True,
        )
    @classmethod
@ -241,6 +244,54 @@ class SaveAudioOpus(IO.ComfyNode):
    save_opus = execute  # TODO: remove
 class SaveAudioAdvanced(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="SaveAudioAdvanced",
            search_aliases=["save audio", "export audio", "output audio", "write audio", "flac", "mp3", "opus"],
            display_name="Save Audio (Advanced)",
            description="Saves the input audio to your ComfyUI output directory.",
            category="audio",
            inputs=[
                IO.Audio.Input("audio", tooltip="The audio to save."),
                IO.String.Input(
                    "filename_prefix",
                    default="audio/ComfyUI",
                    tooltip=(
                        "The prefix for the file to save. May include formatting tokens "
                        "such as %date:yyyy-MM-dd%."
                    ),
                ),
                IO.DynamicCombo.Input(
                    "format",
                    options=[
                        IO.DynamicCombo.Option("flac", []),
                        IO.DynamicCombo.Option("mp3", [
                            IO.Combo.Input("quality", options=["V0", "128k", "320k"], default="V0"),
                        ]),
                        IO.DynamicCombo.Option("opus", [
                            IO.Combo.Input("quality", options=["64k", "96k", "128k", "192k", "320k"], default="128k"),
                        ]),
                    ],
                    tooltip="The file format in which to save the audio.",
                ),
            ],
            hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo],
            is_output_node=True,
        )
    @classmethod
    def execute(cls, audio, filename_prefix: str, format: dict) -> IO.NodeOutput:
        file_format = format.get("format", None)
        quality = format.get("quality", None)
        if quality:
            ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=file_format, quality=quality)
        else:
            ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=file_format)
        return IO.NodeOutput(ui=ui)
 class PreviewAudio(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -822,6 +873,7 @@ class AudioExtension(ComfyExtension):
            SaveAudio,
            SaveAudioMP3,
            SaveAudioOpus,
            SaveAudioAdvanced,
            LoadAudio,
            PreviewAudio,
            ConditioningStableAudio,
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@ -1,5 +1,7 @@
 import math
 import comfy.samplers
 import comfy.sampler_helpers
 import comfy.patcher_extension
 import comfy.sample
 from comfy.k_diffusion import sampling as k_diffusion_sampling
 from comfy.k_diffusion import sa_solver
@ -894,6 +896,85 @@ class DualCFGGuider(io.ComfyNode):
    get_guider = execute
 class Guider_DualModel(comfy.samplers.CFGGuider):
    # Runs the positive (cond) pass on the main model and the negative (uncond) pass on a separate model
    def __init__(self, model_patcher, uncond_model_patcher):
        super().__init__(model_patcher)
        self.uncond_model_patcher = uncond_model_patcher
        self.uncond_inner = None
    def outer_sample(self, noise, latent_image, sampler, sigmas, denoise_mask=None, callback=None, disable_pbar=False, seed=None, latent_shapes=None):
        self.uncond_inner = None
        self.uncond_loaded = []
        self._uncond_neg = None
        # skip at cfg 1.0
        if not math.isclose(self.cfg, 1.0):
            uc = {"negative": list(map(lambda a: a.copy(), self.conds["negative"]))}
            self.uncond_inner, uc, self.uncond_loaded = comfy.sampler_helpers.prepare_sampling(
                self.uncond_model_patcher, noise.shape, uc, self.uncond_model_patcher.model_options)
            self._uncond_neg = uc["negative"]
            self.uncond_model_patcher.pre_run()
        try:
            return super().outer_sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
        finally:
            if self.uncond_inner is not None:
                self.uncond_model_patcher.cleanup()
                comfy.sampler_helpers.cleanup_models({"negative": self._uncond_neg}, self.uncond_loaded)
                self.uncond_inner = None
    def inner_sample(self, noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=None):
        if self.uncond_inner is not None:
            li = latent_image
            if li is not None and torch.count_nonzero(li) > 0:
                li = self.uncond_inner.process_latent_in(li)
            self._uncond_conds = comfy.samplers.process_conds(
                self.uncond_inner, noise, {"negative": self._uncond_neg}, device, li, denoise_mask, seed, latent_shapes=latent_shapes)["negative"]
        return super().inner_sample(noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
    def predict_noise(self, x, timestep, model_options={}, seed=None):
        positive = self.conds.get("positive", None)
        cond = comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0]
        # uncond model not loaded (base cfg==1/no negative), or cfg driven to 1.0 this step -> single model, cond only
        if self.uncond_inner is None or (math.isclose(self.cfg, 1.0) and not model_options.get("disable_cfg1_optimization", False)):
            return cond
        uncond_model_options = model_options
        if "multigpu_clones" in model_options: # TODO: support multigpu instead of just running uncond on a single GPU
            uncond_model_options = {k: v for k, v in model_options.items() if k != "multigpu_clones"}
        uncond = comfy.samplers.calc_cond_batch(self.uncond_inner, [self._uncond_conds], x, timestep, uncond_model_options)[0]
        return comfy.samplers.cfg_function(self.inner_model, cond, uncond, self.cfg, x, timestep,
                                           model_options=model_options, cond=positive, uncond=self._uncond_conds)
 class DualModelGuider(io.ComfyNode):
    @classmethod
    def define_schema(cls):
        return io.Schema(
            node_id="DualModelGuider",
            display_name="Dual Model CFG Guider",
            category="model/sampling/guiders",
            is_experimental=True,
            inputs=[
                io.Model.Input("model", tooltip="Model used for the positive (conditional) pass."),
                io.Model.Input("model_negative", optional=True, tooltip="Model used for the negative (unconditional) pass. Use the same model for ordinary CFG."),
                io.Conditioning.Input("positive"),
                io.Float.Input("cfg", default=4.0, min=0.0, max=100.0, step=0.1, round=0.01),
                io.Conditioning.Input("negative", optional=True, tooltip="Negative conditioning run on the negative model. Leave unconnected for a text-free (image-only) unconditional pass."),
            ],
            outputs=[io.Guider.Output()],
        )
    @classmethod
    def execute(cls, model, positive, cfg, model_negative=None, negative=None) -> io.NodeOutput:
        if negative is None:
            negative = [[None, {}]]  # null cond -> no cross_attn -> model runs image-only
        guider = Guider_DualModel(model, model_negative) if model_negative is not None else comfy.samplers.CFGGuider(model)
        guider.set_conds(positive, negative)
        guider.set_cfg(cfg)
        return io.NodeOutput(guider)
    get_guider = execute
 class DisableNoise(io.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -1054,11 +1135,53 @@ class ManualSigmas(io.ComfyNode):
        sigmas = torch.FloatTensor(sigmas)
        return io.NodeOutput(sigmas)
 class CFGOverride(io.ComfyNode):
    @classmethod
    def define_schema(cls) -> io.Schema:
        return io.Schema(
            node_id="CFGOverride",
            display_name="CFG Override",
            description="Override cfg to a fixed value over a [start, end] percent (sigma) range. "
                        "With multiple overrides, the one nearest the sampler wins on overlap.",
            category="sampling/custom_sampling",
            inputs=[
                io.Model.Input("model"),
                io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01),
                io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
                io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
            ],
            outputs=[io.Model.Output()],
        )
    @classmethod
    def execute(cls, model, cfg, start_percent, end_percent) -> io.NodeOutput:
        ms = model.get_model_object("model_sampling")
        sigma_hi = ms.percent_to_sigma(start_percent)  # percent->sigma decreasing, so hi >= lo
        sigma_lo = ms.percent_to_sigma(end_percent)
        def predict_noise_wrapper(executor, *args, **kwargs):
            sigma = float(args[1].flatten()[0])        # args = (x, timestep, model_options, seed)
            if not (sigma_lo <= sigma <= sigma_hi):
                return executor(*args, **kwargs)
            guider = executor.class_obj                # guider.cfg feeds cond_scale
            saved = guider.cfg
            guider.cfg = cfg
            try:
                return executor(*args, **kwargs)
            finally:
                guider.cfg = saved                     # restore for other steps/overrides
        m = model.clone()
        m.add_wrapper(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, predict_noise_wrapper)
        return io.NodeOutput(m)
 class CustomSamplersExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
            SamplerCustom,
            CFGOverride,
            BasicScheduler,
            KarrasScheduler,
            ExponentialScheduler,
@ -1087,6 +1210,7 @@ class CustomSamplersExtension(ComfyExtension):
            SamplingPercentToSigma,
            CFGGuider,
            DualCFGGuider,
            DualModelGuider,
            BasicGuider,
            RandomNoise,
            DisableNoise,
--- a/comfy_extras/nodes_dataset.py
+++ b/comfy_extras/nodes_dataset.py
@ -411,6 +411,21 @@ class ImageProcessingNode(io.ComfyNode):
        return has_group
    @classmethod
    def _ensure_image_list(cls, images):
        """Normalize to a flat list of [1, H, W, C] tensors."""
        if isinstance(images, torch.Tensor):
            if images.ndim != 4:
                raise ValueError(f"Expected 4D image tensor, got shape {tuple(images.shape)}")
            return [images[i:i+1] for i in range(images.shape[0])]
        flat = []
        for item in images:
            if not isinstance(item, torch.Tensor) or item.ndim != 4:
                raise ValueError(f"Expected 4D image tensor, got {type(item).__name__} shape {getattr(item, 'shape', None)}")
            flat.extend([item[i:i+1] for i in range(item.shape[0])])
        return flat
    @classmethod
    def define_schema(cls):
        if cls.node_id is None:
@ -458,6 +473,9 @@ class ImageProcessingNode(io.ComfyNode):
        """Execute the node. Routes to _process or _group_process based on mode."""
        is_group = cls._detect_processing_mode()
        if is_group:
            images = cls._ensure_image_list(images)
        # Extract scalar values from lists for parameters
        params = {}
        for k, v in kwargs.items():
--- a/comfy_extras/nodes_ideogram4.py
+++ b/comfy_extras/nodes_ideogram4.py
@ -0,0 +1,64 @@
 """Ideogram 4 sampling helper
 """
 import math
 import torch
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 _LOGSNR_MIN = -15.0
 _LOGSNR_MAX = 18.0
 def _logit_normal_schedule(u, mean, std):
    # Reference time (0=noise..1=clean) via the probit/ndtri quantile.
    u = torch.as_tensor(u, dtype=torch.float64)
    t = 1.0 - torch.special.expit(mean + std * torch.special.ndtri(u))
    t_min = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MAX))
    t_max = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MIN))
    return t.clamp(t_min, t_max)
 def ideogram4_sigmas(num_steps, width, height, mu, std):
    """Descending sigmas (len num_steps+1) for the reference schedule.
    mu + the resolution term form the logSNR shift; std is the spread.
    """
    mean = mu + 0.5 * math.log((width * height) / (512 * 512))
    u = torch.linspace(0.0, 1.0, num_steps + 1, dtype=torch.float64)
    sigmas = (1.0 - _logit_normal_schedule(u, mean, std)).flip(0)
    sigmas[-1] = 0.0                                      # clamp leaves ~6e-4; force full denoise
    return sigmas.to(torch.float32)
 class Ideogram4Scheduler(io.ComfyNode):
    @classmethod
    def define_schema(cls) -> io.Schema:
        return io.Schema(
            node_id="Ideogram4Scheduler",
            display_name="Ideogram 4 Scheduler",
            category="sampling/custom_sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=200),
                io.Int.Input("width", default=1024, min=256, max=8192, step=16),
                io.Int.Input("height", default=1024, min=256, max=8192, step=16),
                io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.05),
                io.Float.Input("std", default=1.75, min=0.1, max=5.0, step=0.05),
            ],
            outputs=[io.Sigmas.Output()],
        )
    @classmethod
    def execute(cls, steps, width, height, mu, std) -> io.NodeOutput:
        return io.NodeOutput(ideogram4_sigmas(steps, width, height, mu, std))
 class Ideogram4Extension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [Ideogram4Scheduler]
 async def comfy_entrypoint() -> Ideogram4Extension:
    return Ideogram4Extension()
--- a/comfy_extras/nodes_video.py
+++ b/comfy_extras/nodes_video.py
@ -19,7 +19,7 @@ class SaveWEBM(io.ComfyNode):
            category="video",
            is_experimental=True,
            inputs=[
-                io.Image.Input("images"),
+                io.Image.Input("images", tooltip="RGBA images are saved with their alpha channel as transparency (vp9 codec only)."),
                io.String.Input("filename_prefix", default="ComfyUI"),
                io.Combo.Input("codec", options=["vp9", "av1"]),
                io.Float.Input("fps", default=24.0, min=0.01, max=1000.0, step=0.01),
@ -45,18 +45,25 @@ class SaveWEBM(io.ComfyNode):
            for x in cls.hidden.extra_pnginfo:
                container.metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
        # Save transparency when the images carry an alpha channel (RGBA) and the codec supports it.
        # vp9 -> yuva420p; other codecs have no usable alpha path, so the alpha is ignored.
        save_alpha = images.shape[-1] == 4 and codec == "vp9"
        codec_map = {"vp9": "libvpx-vp9", "av1": "libsvtav1"}
        stream = container.add_stream(codec_map[codec], rate=Fraction(round(fps * 1000), 1000))
        stream.width = images.shape[-2]
        stream.height = images.shape[-3]
-        stream.pix_fmt = "yuv420p10le" if codec == "av1" else "yuv420p"
+        stream.pix_fmt = "yuva420p" if save_alpha else ("yuv420p10le" if codec == "av1" else "yuv420p")
        stream.bit_rate = 0
        stream.options = {'crf': str(crf)}
        if codec == "av1":
            stream.options["preset"] = "6"
        for frame in images:
-            frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
+            if save_alpha:
                frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :4] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgba")
            else:
                frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
            for packet in stream.encode(frame):
                container.mux(packet)
        container.mux(stream.encode())
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.23.0"
+__version__ = "0.24.0"
--- a/nodes.py
+++ b/nodes.py
@ -969,7 +969,7 @@ class CLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit"], ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4"], ),
                              },
                "optional": {
                              "device": (["default", "cpu"], {"advanced": True}),
@ -2362,6 +2362,7 @@ async def init_builtin_extra_nodes():
        "nodes_model_downscale.py",
        "nodes_images.py",
        "nodes_video_model.py",
        "nodes_ideogram4.py",
        "nodes_train.py",
        "nodes_dataset.py",
        "nodes_sag.py",
--- a/openapi.yaml
+++ b/openapi.yaml
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.23.0"
+version = "0.24.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.10"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
-comfyui-frontend-package==1.44.19
+comfyui-frontend-package==1.45.15
-comfyui-workflow-templates==0.9.92
+comfyui-workflow-templates==0.9.98
 comfyui-embedded-docs==0.5.2
 torch
 torchsde