mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-15 20:39:48 +08:00
Merge branch 'master' of https://github.com/Comfy-Org/ComfyUI into alexis/add_output_save_nodes
This commit is contained in:
commit
cf66f99b94
@ -38,6 +38,8 @@ class ChromaRadianceParams(ChromaParams):
|
||||
# None means use the same dtype as the model.
|
||||
nerf_embedder_dtype: Optional[torch.dtype]
|
||||
use_x0: bool
|
||||
# Use sequential txt_ids instead of zeros
|
||||
use_sequential_txt_ids: bool
|
||||
|
||||
class ChromaRadiance(Chroma):
|
||||
"""
|
||||
@ -162,6 +164,9 @@ class ChromaRadiance(Chroma):
|
||||
if params.use_x0:
|
||||
self.register_buffer("__x0__", torch.tensor([]))
|
||||
|
||||
if params.use_sequential_txt_ids:
|
||||
self.register_buffer("__sequential__", torch.tensor([]))
|
||||
|
||||
@property
|
||||
def _nerf_final_layer(self) -> nn.Module:
|
||||
if self.params.nerf_final_head_type == "linear":
|
||||
@ -313,6 +318,9 @@ class ChromaRadiance(Chroma):
|
||||
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
|
||||
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
|
||||
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
|
||||
# Radiance after 2026-05-22 uses sequential txt_ids instead of zeros
|
||||
if params.use_sequential_txt_ids:
|
||||
txt_ids[:, :, 0] = torch.arange(context.shape[1], device=x.device, dtype=x.dtype).unsqueeze(0).expand(bs, -1)
|
||||
|
||||
img_out = self.forward_orig(
|
||||
img,
|
||||
|
||||
@ -4,7 +4,7 @@ from torch import Tensor
|
||||
|
||||
from comfy.ldm.modules.attention import optimized_attention
|
||||
import comfy.model_management
|
||||
import logging
|
||||
import comfy.quant_ops
|
||||
|
||||
|
||||
def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None, transformer_options={}) -> Tensor:
|
||||
@ -44,21 +44,15 @@ def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
|
||||
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
|
||||
|
||||
|
||||
try:
|
||||
import comfy.quant_ops
|
||||
q_apply_rope = comfy.quant_ops.ck.apply_rope
|
||||
q_apply_rope1 = comfy.quant_ops.ck.apply_rope1
|
||||
def apply_rope(xq, xk, freqs_cis):
|
||||
if comfy.model_management.in_training:
|
||||
return _apply_rope(xq, xk, freqs_cis)
|
||||
else:
|
||||
return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis)
|
||||
def apply_rope1(x, freqs_cis):
|
||||
if comfy.model_management.in_training:
|
||||
return _apply_rope1(x, freqs_cis)
|
||||
else:
|
||||
return q_apply_rope1(x, freqs_cis)
|
||||
except:
|
||||
logging.warning("No comfy kitchen, using old apply_rope functions.")
|
||||
apply_rope = _apply_rope
|
||||
apply_rope1 = _apply_rope1
|
||||
def apply_rope(xq, xk, freqs_cis):
|
||||
if comfy.model_management.in_training:
|
||||
return _apply_rope(xq, xk, freqs_cis)
|
||||
else:
|
||||
return comfy.quant_ops.ck.apply_rope(xq, xk, freqs_cis)
|
||||
|
||||
|
||||
def apply_rope1(x, freqs_cis):
|
||||
if comfy.model_management.in_training:
|
||||
return _apply_rope1(x, freqs_cis)
|
||||
else:
|
||||
return comfy.quant_ops.ck.apply_rope1(x, freqs_cis)
|
||||
|
||||
297
comfy/ldm/ideogram4/model.py
Normal file
297
comfy/ldm/ideogram4/model.py
Normal file
@ -0,0 +1,297 @@
|
||||
"""
|
||||
The Ideogram 4 transformer is a NextDiT/Lumina2-family single-stream model
|
||||
consumes Qwen3-VL hidden-state features (concatenated from 13 layers -> 53248 dims)
|
||||
packs ``[text tokens, image tokens]`` into one sequence with block-diagonal segment attention and 3D interleaved MRoPE.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import comfy.patcher_extension
|
||||
from comfy.ldm.lumina.model import FeedForward
|
||||
from comfy.ldm.modules.attention import optimized_attention_masked
|
||||
from comfy.text_encoders.llama import apply_rope, precompute_freqs_cis
|
||||
|
||||
# Per-token role indicators
|
||||
SEQUENCE_PADDING_INDICATOR = -1
|
||||
OUTPUT_IMAGE_INDICATOR = 2
|
||||
LLM_TOKEN_INDICATOR = 3
|
||||
# Image grid coordinates are offset so they never collide with text positions
|
||||
IMAGE_POSITION_OFFSET = 65536
|
||||
|
||||
|
||||
class Ideogram4Attention(nn.Module):
|
||||
def __init__(self, hidden_size, num_heads, eps=1e-5, dtype=None, device=None, operations=None):
|
||||
super().__init__()
|
||||
self.num_heads = num_heads
|
||||
self.head_dim = hidden_size // num_heads
|
||||
self.hidden_size = hidden_size
|
||||
|
||||
self.qkv = operations.Linear(hidden_size, hidden_size * 3, bias=False, dtype=dtype, device=device)
|
||||
self.norm_q = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.norm_k = operations.RMSNorm(self.head_dim, eps=eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.o = operations.Linear(hidden_size, hidden_size, bias=False, dtype=dtype, device=device)
|
||||
|
||||
def forward(self, x, attn_mask, freqs_cis, transformer_options={}):
|
||||
batch_size, seq_len, _ = x.shape
|
||||
qkv = self.qkv(x).view(batch_size, seq_len, 3, self.num_heads, self.head_dim)
|
||||
q, k, v = qkv.unbind(dim=2)
|
||||
|
||||
q = self.norm_q(q)
|
||||
k = self.norm_k(k)
|
||||
|
||||
# (B, heads, L, head_dim)
|
||||
q = q.transpose(1, 2)
|
||||
k = k.transpose(1, 2)
|
||||
v = v.transpose(1, 2)
|
||||
|
||||
q, k = apply_rope(q, k, freqs_cis)
|
||||
|
||||
out = optimized_attention_masked(q, k, v, self.num_heads, attn_mask, skip_reshape=True, transformer_options=transformer_options)
|
||||
return self.o(out)
|
||||
|
||||
|
||||
class Ideogram4TransformerBlock(nn.Module):
|
||||
def __init__(self, hidden_size, intermediate_size, num_heads, norm_eps, adaln_dim, dtype=None, device=None, operations=None):
|
||||
super().__init__()
|
||||
self.attention = Ideogram4Attention(hidden_size, num_heads, eps=1e-5, dtype=dtype, device=device, operations=operations)
|
||||
self.feed_forward = FeedForward(
|
||||
dim=hidden_size, hidden_dim=intermediate_size, multiple_of=1, ffn_dim_multiplier=None,
|
||||
operation_settings={"operations": operations, "dtype": dtype, "device": device},
|
||||
)
|
||||
|
||||
self.attention_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.ffn_norm1 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.attention_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.ffn_norm2 = operations.RMSNorm(hidden_size, eps=norm_eps, elementwise_affine=True, dtype=dtype, device=device)
|
||||
|
||||
self.adaln_modulation = operations.Linear(adaln_dim, 4 * hidden_size, bias=True, dtype=dtype, device=device)
|
||||
|
||||
def forward(self, x, attn_mask, freqs_cis, adaln_input, transformer_options={}):
|
||||
mod = self.adaln_modulation(adaln_input)
|
||||
scale_msa, gate_msa, scale_mlp, gate_mlp = mod.chunk(4, dim=-1)
|
||||
gate_msa = torch.tanh(gate_msa)
|
||||
gate_mlp = torch.tanh(gate_mlp)
|
||||
scale_msa = 1.0 + scale_msa
|
||||
scale_mlp = 1.0 + scale_mlp
|
||||
|
||||
attn_out = self.attention(self.attention_norm1(x) * scale_msa, attn_mask, freqs_cis, transformer_options=transformer_options)
|
||||
x = x + gate_msa * self.attention_norm2(attn_out)
|
||||
x = x + gate_mlp * self.ffn_norm2(self.feed_forward(self.ffn_norm1(x) * scale_mlp))
|
||||
return x
|
||||
|
||||
|
||||
def _sinusoidal_embedding(t, dim, scale=1e4):
|
||||
t = t.to(torch.float32)
|
||||
half = dim // 2
|
||||
freq = math.log(scale) / (half - 1)
|
||||
freq = torch.exp(torch.arange(half, dtype=torch.float32, device=t.device) * -freq)
|
||||
emb = t.unsqueeze(-1) * freq
|
||||
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
|
||||
if dim % 2 == 1:
|
||||
emb = F.pad(emb, (0, 1))
|
||||
return emb
|
||||
|
||||
|
||||
class Ideogram4EmbedScalar(nn.Module):
|
||||
def __init__(self, dim, input_range=(0.0, 1.0), dtype=None, device=None, operations=None):
|
||||
super().__init__()
|
||||
self.dim = dim
|
||||
self.range_min, self.range_max = input_range
|
||||
self.mlp_in = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
|
||||
self.mlp_out = operations.Linear(dim, dim, bias=True, dtype=dtype, device=device)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.to(torch.float32)
|
||||
scaled = 1e4 * (x - self.range_min) / (self.range_max - self.range_min)
|
||||
emb = _sinusoidal_embedding(scaled, self.dim)
|
||||
emb = emb.to(self.mlp_in.weight.dtype)
|
||||
emb = F.silu(self.mlp_in(emb))
|
||||
return self.mlp_out(emb)
|
||||
|
||||
|
||||
class Ideogram4FinalLayer(nn.Module):
|
||||
def __init__(self, hidden_size, out_channels, adaln_dim, dtype=None, device=None, operations=None):
|
||||
super().__init__()
|
||||
self.norm_final = operations.LayerNorm(hidden_size, eps=1e-6, elementwise_affine=False, dtype=dtype, device=device)
|
||||
self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
|
||||
self.adaln_modulation = operations.Linear(adaln_dim, hidden_size, bias=True, dtype=dtype, device=device)
|
||||
|
||||
def forward(self, x, c):
|
||||
scale = 1.0 + self.adaln_modulation(F.silu(c))
|
||||
return self.linear(self.norm_final(x) * scale)
|
||||
|
||||
|
||||
class Ideogram4Transformer(nn.Module):
|
||||
"""A single Ideogram 4 backbone operating on a packed token sequence."""
|
||||
|
||||
def __init__(self, emb_dim, num_layers, num_heads, intermediate_size, adaln_dim,
|
||||
in_channels, llm_features_dim, rope_theta, mrope_section, norm_eps,
|
||||
dtype=None, device=None, operations=None):
|
||||
super().__init__()
|
||||
self.head_dim = emb_dim // num_heads
|
||||
self.rope_theta = rope_theta
|
||||
self.mrope_section = tuple(mrope_section)
|
||||
|
||||
self.input_proj = operations.Linear(in_channels, emb_dim, bias=True, dtype=dtype, device=device)
|
||||
self.llm_cond_norm = operations.RMSNorm(llm_features_dim, eps=1e-6, elementwise_affine=True, dtype=dtype, device=device)
|
||||
self.llm_cond_proj = operations.Linear(llm_features_dim, emb_dim, bias=True, dtype=dtype, device=device)
|
||||
self.t_embedding = Ideogram4EmbedScalar(emb_dim, input_range=(0.0, 1.0), dtype=dtype, device=device, operations=operations)
|
||||
self.adaln_proj = operations.Linear(emb_dim, adaln_dim, bias=True, dtype=dtype, device=device)
|
||||
|
||||
self.embed_image_indicator = operations.Embedding(2, emb_dim, dtype=dtype, device=device)
|
||||
|
||||
self.layers = nn.ModuleList([
|
||||
Ideogram4TransformerBlock(emb_dim, intermediate_size, num_heads, norm_eps, adaln_dim,
|
||||
dtype=dtype, device=device, operations=operations)
|
||||
for _ in range(num_layers)
|
||||
])
|
||||
|
||||
self.final_layer = Ideogram4FinalLayer(emb_dim, in_channels, adaln_dim, dtype=dtype, device=device, operations=operations)
|
||||
|
||||
def _backbone(self, llm_features, x, t, position_ids, attn_mask, indicator, transformer_options={}):
|
||||
indicator = indicator.to(torch.long)
|
||||
output_image_mask = (indicator == OUTPUT_IMAGE_INDICATOR).to(x.dtype).unsqueeze(-1)
|
||||
|
||||
x = x * output_image_mask
|
||||
h = self.input_proj(x) * output_image_mask
|
||||
|
||||
t_cond = self.t_embedding(t)
|
||||
if t.dim() == 1:
|
||||
t_cond = t_cond.unsqueeze(1)
|
||||
adaln_input = F.silu(self.adaln_proj(t_cond))
|
||||
|
||||
# h is zero on the text rows (content lives only on image rows), add writes the text features in place
|
||||
if llm_features is not None:
|
||||
L_text = llm_features.shape[1]
|
||||
text_mask = (indicator[:, :L_text] == LLM_TOKEN_INDICATOR).to(x.dtype).unsqueeze(-1)
|
||||
llm = self.llm_cond_norm(llm_features * text_mask)
|
||||
llm = self.llm_cond_proj(llm) * text_mask
|
||||
h[:, :L_text] = h[:, :L_text] + llm
|
||||
|
||||
h = h + self.embed_image_indicator((indicator == OUTPUT_IMAGE_INDICATOR).to(torch.long))
|
||||
|
||||
# Qwen3-VL interleaved MRoPE; position_ids (B, L, 3) -> (3, L) (same across batch).
|
||||
freqs_cis = precompute_freqs_cis(
|
||||
self.head_dim, position_ids[0].transpose(0, 1), self.rope_theta,
|
||||
rope_dims=self.mrope_section, interleaved_mrope=True, device=position_ids.device,
|
||||
)
|
||||
|
||||
if attn_mask is not None and attn_mask.dtype == torch.bool:
|
||||
attn_mask = torch.zeros_like(attn_mask, dtype=h.dtype).masked_fill_(~attn_mask, -torch.finfo(h.dtype).max)
|
||||
|
||||
for layer in self.layers:
|
||||
h = layer(h, attn_mask, freqs_cis, adaln_input, transformer_options=transformer_options)
|
||||
|
||||
return self.final_layer(h, adaln_input)
|
||||
|
||||
|
||||
class Ideogram4Transformer2DModel(Ideogram4Transformer):
|
||||
"""Ideogram 4 single-stream DiT.
|
||||
|
||||
Runs a packed ``[text, image]`` sequence when text context is supplied, or an image-only sequence when ``context is None``.
|
||||
"""
|
||||
|
||||
def __init__(self, image_model=None, in_channels=128, num_layers=34, num_attention_heads=18, attention_head_dim=256, intermediate_size=12288,
|
||||
adaln_dim=512, llm_features_dim=53248, rope_theta=5000000, mrope_section=(24, 20, 20), norm_eps=1e-5,
|
||||
dtype=None, device=None, operations=None, **kwargs):
|
||||
emb_dim = num_attention_heads * attention_head_dim
|
||||
super().__init__(
|
||||
emb_dim=emb_dim, num_layers=num_layers, num_heads=num_attention_heads,
|
||||
intermediate_size=intermediate_size, adaln_dim=adaln_dim, in_channels=in_channels,
|
||||
llm_features_dim=llm_features_dim, rope_theta=rope_theta, mrope_section=mrope_section,
|
||||
norm_eps=norm_eps, dtype=dtype, device=device, operations=operations)
|
||||
self.dtype = dtype
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = in_channels
|
||||
# 128-dim token = patch (2x2) * ae_channels (32).
|
||||
self.patch_size = 2
|
||||
self.ae_channels = in_channels // (self.patch_size * self.patch_size)
|
||||
|
||||
def _img_to_tokens(self, x):
|
||||
B, C, gh, gw = x.shape
|
||||
x = x.view(B, self.ae_channels, self.patch_size, self.patch_size, gh, gw)
|
||||
x = x.permute(0, 4, 5, 2, 3, 1) # (B, gh, gw, pi, pj, c)
|
||||
return x.reshape(B, gh * gw, C)
|
||||
|
||||
def _tokens_to_img(self, tokens, gh, gw):
|
||||
B = tokens.shape[0]
|
||||
C = tokens.shape[-1]
|
||||
x = tokens.reshape(B, gh, gw, self.patch_size, self.patch_size, self.ae_channels)
|
||||
x = x.permute(0, 5, 3, 4, 1, 2) # (B, c, pi, pj, gh, gw)
|
||||
return x.reshape(B, C, gh, gw)
|
||||
|
||||
def _image_position_ids(self, gh, gw, device):
|
||||
h_idx = torch.arange(gh, device=device).view(-1, 1).expand(gh, gw).reshape(-1)
|
||||
w_idx = torch.arange(gw, device=device).view(1, -1).expand(gh, gw).reshape(-1)
|
||||
t_idx = torch.zeros_like(h_idx)
|
||||
return torch.stack([t_idx, h_idx, w_idx], dim=1) + IMAGE_POSITION_OFFSET # (L_img, 3)
|
||||
|
||||
def _run_conditional(self, x_chunk, context_chunk, attn_mask_chunk, t_chunk, gh, gw, transformer_options):
|
||||
B = x_chunk.shape[0]
|
||||
device = x_chunk.device
|
||||
img_tokens = self._img_to_tokens(x_chunk).to(self.dtype)
|
||||
L_img = img_tokens.shape[1]
|
||||
L_text = context_chunk.shape[1]
|
||||
L = L_text + L_img
|
||||
latent_dim = img_tokens.shape[-1]
|
||||
|
||||
x_full = torch.zeros(B, L, latent_dim, dtype=img_tokens.dtype, device=device)
|
||||
x_full[:, L_text:] = img_tokens
|
||||
|
||||
text_pos = torch.arange(L_text, device=device).view(-1, 1).expand(L_text, 3)
|
||||
img_pos = self._image_position_ids(gh, gw, device)
|
||||
position_ids = torch.cat([text_pos, img_pos], dim=0).unsqueeze(0).expand(B, L, 3)
|
||||
|
||||
indicator = torch.empty(B, L, dtype=torch.long, device=device)
|
||||
indicator[:, :L_text] = LLM_TOKEN_INDICATOR
|
||||
indicator[:, L_text:] = OUTPUT_IMAGE_INDICATOR
|
||||
|
||||
attn_mask = None
|
||||
if attn_mask_chunk is not None:
|
||||
segment_ids = torch.ones(B, L, dtype=torch.long, device=device)
|
||||
pad = (attn_mask_chunk == 0)
|
||||
segment_ids[:, :L_text][pad] = SEQUENCE_PADDING_INDICATOR
|
||||
indicator[:, :L_text][pad] = 0
|
||||
# Block-diagonal mask from segment ids: (B, 1, L, L), True = attend.
|
||||
attn_mask = (segment_ids.unsqueeze(2) == segment_ids.unsqueeze(1)).unsqueeze(1)
|
||||
|
||||
out = self._backbone(context_chunk, x_full, t_chunk, position_ids, attn_mask, indicator,
|
||||
transformer_options=transformer_options)
|
||||
return self._tokens_to_img(out[:, L_text:], gh, gw)
|
||||
|
||||
def _run_image_only(self, x_chunk, t_chunk, gh, gw, transformer_options):
|
||||
B = x_chunk.shape[0]
|
||||
device = x_chunk.device
|
||||
img_tokens = self._img_to_tokens(x_chunk).to(self.dtype)
|
||||
L_img = img_tokens.shape[1]
|
||||
|
||||
position_ids = self._image_position_ids(gh, gw, device).unsqueeze(0).expand(B, L_img, 3)
|
||||
indicator = torch.full((B, L_img), OUTPUT_IMAGE_INDICATOR, dtype=torch.long, device=device)
|
||||
|
||||
# Image-only sequence is a single segment -> no mask, full attention, no LLM context.
|
||||
out = self._backbone(None, img_tokens, t_chunk, position_ids, None, indicator, transformer_options=transformer_options)
|
||||
return self._tokens_to_img(out, gh, gw)
|
||||
|
||||
def forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
|
||||
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
|
||||
self._forward,
|
||||
self,
|
||||
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options),
|
||||
).execute(x, timesteps, context, attention_mask, transformer_options, **kwargs)
|
||||
|
||||
def _forward(self, x, timesteps, context=None, attention_mask=None, transformer_options={}, **kwargs):
|
||||
bs, c, gh, gw = x.shape
|
||||
|
||||
timesteps = 1.0 - timesteps
|
||||
|
||||
# unconditional pass
|
||||
if context is None:
|
||||
return -self._run_image_only(x, timesteps, gh, gw, transformer_options)
|
||||
|
||||
return -self._run_conditional(x, context, attention_mask, timesteps, gh, gw, transformer_options)
|
||||
@ -55,6 +55,7 @@ import comfy.ldm.pixeldit.pid
|
||||
import comfy.ldm.ace.model
|
||||
import comfy.ldm.omnigen.omnigen2
|
||||
import comfy.ldm.qwen_image.model
|
||||
import comfy.ldm.ideogram4.model
|
||||
import comfy.ldm.kandinsky5.model
|
||||
import comfy.ldm.anima.model
|
||||
import comfy.ldm.ace.ace_step15
|
||||
@ -2018,6 +2019,21 @@ class QwenImage(BaseModel):
|
||||
out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()), ref_latents)) // 16])
|
||||
return out
|
||||
|
||||
class Ideogram4(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ideogram4.model.Ideogram4Transformer2DModel)
|
||||
|
||||
def extra_conds(self, **kwargs):
|
||||
out = super().extra_conds(**kwargs)
|
||||
attention_mask = kwargs.get("attention_mask", None)
|
||||
if attention_mask is not None:
|
||||
if torch.numel(attention_mask) != attention_mask.sum():
|
||||
out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
|
||||
cross_attn = kwargs.get("cross_attn", None)
|
||||
if cross_attn is not None:
|
||||
out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
|
||||
return out
|
||||
|
||||
class HunyuanImage21(BaseModel):
|
||||
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan_video.model.HunyuanVideo)
|
||||
|
||||
@ -313,6 +313,10 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["use_x0"] = True
|
||||
else:
|
||||
dit_config["use_x0"] = False
|
||||
if "{}__sequential__".format(key_prefix) in state_dict_keys: # sequential txt_ids
|
||||
dit_config["use_sequential_txt_ids"] = True
|
||||
else:
|
||||
dit_config["use_sequential_txt_ids"] = False
|
||||
else:
|
||||
dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
|
||||
dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys
|
||||
@ -811,6 +815,13 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["default_ref_method"] = "negative_index"
|
||||
return dit_config
|
||||
|
||||
if '{}embed_image_indicator.weight'.format(key_prefix) in state_dict_keys: # Ideogram 4
|
||||
dit_config = {}
|
||||
dit_config["image_model"] = "ideogram4"
|
||||
dit_config["in_channels"] = state_dict['{}input_proj.weight'.format(key_prefix)].shape[1]
|
||||
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
|
||||
return dit_config
|
||||
|
||||
if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5
|
||||
dit_config = {}
|
||||
model_dim = state_dict['{}visual_embeddings.in_layer.bias'.format(key_prefix)].shape[0]
|
||||
|
||||
@ -54,6 +54,8 @@ class MultiGPUThreadPool:
|
||||
try:
|
||||
result = fn(*args, **kwargs)
|
||||
result_q.put((result, None))
|
||||
except comfy.model_management.InterruptProcessingException as e:
|
||||
result_q.put((None, e))
|
||||
except Exception as e:
|
||||
result_q.put((None, e))
|
||||
|
||||
|
||||
10
comfy/sd.py
10
comfy/sd.py
@ -58,6 +58,7 @@ import comfy.text_encoders.omnigen2
|
||||
import comfy.text_encoders.qwen_image
|
||||
import comfy.text_encoders.hunyuan_image
|
||||
import comfy.text_encoders.z_image
|
||||
import comfy.text_encoders.ideogram4
|
||||
import comfy.text_encoders.ovis
|
||||
import comfy.text_encoders.kandinsky5
|
||||
import comfy.text_encoders.jina_clip_2
|
||||
@ -1298,6 +1299,7 @@ class CLIPType(Enum):
|
||||
COGVIDEOX = 27
|
||||
LENS = 28
|
||||
PIXELDIT = 29
|
||||
IDEOGRAM4 = 30
|
||||
|
||||
|
||||
|
||||
@ -1596,8 +1598,12 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
|
||||
clip_target.clip = comfy.text_encoders.ovis.te(**llama_detect(clip_data))
|
||||
clip_target.tokenizer = comfy.text_encoders.ovis.OvisTokenizer
|
||||
elif te_model == TEModel.QWEN3_8B:
|
||||
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
|
||||
clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
|
||||
if clip_type == CLIPType.IDEOGRAM4:
|
||||
clip_target.clip = comfy.text_encoders.ideogram4.te(**llama_detect(clip_data))
|
||||
clip_target.tokenizer = comfy.text_encoders.ideogram4.Ideogram4Tokenizer
|
||||
else:
|
||||
clip_target.clip = comfy.text_encoders.flux.klein_te(**llama_detect(clip_data), model_type="qwen3_8b")
|
||||
clip_target.tokenizer = comfy.text_encoders.flux.KleinTokenizer8B
|
||||
elif te_model == TEModel.JINA_CLIP_2:
|
||||
clip_target.clip = comfy.text_encoders.jina_clip_2.JinaClip2TextModelWrapper
|
||||
clip_target.tokenizer = comfy.text_encoders.jina_clip_2.JinaClip2TokenizerWrapper
|
||||
|
||||
@ -24,6 +24,7 @@ import comfy.text_encoders.qwen_image
|
||||
import comfy.text_encoders.hunyuan_image
|
||||
import comfy.text_encoders.kandinsky5
|
||||
import comfy.text_encoders.z_image
|
||||
import comfy.text_encoders.ideogram4
|
||||
import comfy.text_encoders.anima
|
||||
import comfy.text_encoders.ace15
|
||||
import comfy.text_encoders.longcat_image
|
||||
@ -1746,6 +1747,44 @@ class Omnigen2(supported_models_base.BASE):
|
||||
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_3b.transformer.".format(pref))
|
||||
return supported_models_base.ClipTarget(comfy.text_encoders.omnigen2.Omnigen2Tokenizer, comfy.text_encoders.omnigen2.te(**hunyuan_detect))
|
||||
|
||||
class Ideogram4(supported_models_base.BASE):
|
||||
unet_config = {
|
||||
"image_model": "ideogram4",
|
||||
}
|
||||
|
||||
sampling_settings = {
|
||||
"multiplier": 1.0,
|
||||
"shift": 1.0,
|
||||
}
|
||||
|
||||
memory_usage_factor = 11.6
|
||||
|
||||
unet_extra_config = {
|
||||
"num_attention_heads": 18,
|
||||
"attention_head_dim": 256,
|
||||
"intermediate_size": 12288,
|
||||
"adaln_dim": 512,
|
||||
"llm_features_dim": 53248,
|
||||
"rope_theta": 5000000,
|
||||
"mrope_section": [24, 20, 20],
|
||||
"norm_eps": 1e-5,
|
||||
}
|
||||
latent_format = latent_formats.Flux2
|
||||
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
vae_key_prefix = ["vae."]
|
||||
text_encoder_key_prefix = ["text_encoders."]
|
||||
|
||||
def get_model(self, state_dict, prefix="", device=None):
|
||||
out = model_base.Ideogram4(self, device=device)
|
||||
return out
|
||||
|
||||
def clip_target(self, state_dict={}):
|
||||
pref = self.text_encoder_key_prefix[0]
|
||||
hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3vl_8b.transformer.".format(pref))
|
||||
return supported_models_base.ClipTarget(comfy.text_encoders.ideogram4.Ideogram4Tokenizer, comfy.text_encoders.ideogram4.te(**hunyuan_detect))
|
||||
|
||||
class QwenImage(supported_models_base.BASE):
|
||||
unet_config = {
|
||||
"image_model": "qwen_image",
|
||||
@ -2233,6 +2272,7 @@ models = [
|
||||
ACEStep15,
|
||||
Omnigen2,
|
||||
QwenImage,
|
||||
Ideogram4,
|
||||
Flux2,
|
||||
Lens,
|
||||
Kandinsky5Image,
|
||||
|
||||
77
comfy/text_encoders/ideogram4.py
Normal file
77
comfy/text_encoders/ideogram4.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""Ideogram 4 text encoder: Qwen3-VL-8B language model, 13-layer tap.
|
||||
|
||||
Ideogram 4 conditions on the concatenation of hidden states from 13 layers of
|
||||
Qwen3-VL (layers 0,3,...,33,35), giving a 4096*13 = 53248-dim feature per token.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from transformers import Qwen2Tokenizer
|
||||
|
||||
import comfy.text_encoders.llama
|
||||
from comfy import sd1_clip
|
||||
|
||||
# Reference taps outputs of layers (0,3,...,35); comfy captures layer inputs, offset by +1.
|
||||
IDEOGRAM4_TAP_LAYERS = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 36]
|
||||
|
||||
|
||||
class Qwen3VLTokenizer(sd1_clip.SDTokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer")
|
||||
super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory,
|
||||
embedding_size=4096, embedding_key='qwen3vl_8b', tokenizer_class=Qwen2Tokenizer,
|
||||
has_start_token=False, has_end_token=False, pad_to_max_length=False,
|
||||
max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data)
|
||||
|
||||
|
||||
class Ideogram4Tokenizer(sd1_clip.SD1Tokenizer):
|
||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||
super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data,
|
||||
name="qwen3vl_8b", tokenizer=Qwen3VLTokenizer)
|
||||
|
||||
self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
|
||||
|
||||
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, **kwargs):
|
||||
if llama_template is None:
|
||||
llama_text = self.llama_template.format(text)
|
||||
else:
|
||||
llama_text = llama_template.format(text)
|
||||
return super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
|
||||
|
||||
|
||||
# Qwen3-VL-8B = 5e6 (vs plain Qwen3-8B's 1e6)
|
||||
# final_norm/lm_head off -> Ideogram only reads raw tapped hidden states
|
||||
QWEN3VL_8B_CONFIG = {"rope_theta": 5000000.0, "final_norm": False, "lm_head": False}
|
||||
|
||||
|
||||
class Qwen3VL8BModel(sd1_clip.SDClipModel):
|
||||
def __init__(self, device="cpu", layer="hidden", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
|
||||
super().__init__(device=device, layer=IDEOGRAM4_TAP_LAYERS, layer_idx=None,
|
||||
textmodel_json_config=dict(QWEN3VL_8B_CONFIG),
|
||||
dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False,
|
||||
model_class=comfy.text_encoders.llama.Qwen3_8B,
|
||||
enable_attention_masks=attention_mask, return_attention_masks=attention_mask,
|
||||
model_options=model_options)
|
||||
|
||||
|
||||
class Ideogram4TEModel(sd1_clip.SD1ClipModel):
|
||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||
super().__init__(device=device, dtype=dtype, name="qwen3vl_8b", clip_model=Qwen3VL8BModel, model_options=model_options)
|
||||
|
||||
def encode_token_weights(self, token_weight_pairs):
|
||||
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
|
||||
b, n, seq, h = out.shape # (B, n_taps=13, seq, 4096) stacked in ascending layer order.
|
||||
out = out.permute(0, 2, 3, 1).reshape(b, seq, h * n) # (B, seq, 4096*13). permute -> (B, seq, H, taps).
|
||||
return out, pooled, extra
|
||||
|
||||
|
||||
def te(dtype_llama=None, llama_quantization_metadata=None):
|
||||
class Ideogram4TEModel_(Ideogram4TEModel):
|
||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||
if dtype_llama is not None:
|
||||
dtype = dtype_llama
|
||||
if llama_quantization_metadata is not None:
|
||||
model_options = model_options.copy()
|
||||
model_options["quantization_metadata"] = llama_quantization_metadata
|
||||
super().__init__(device=device, dtype=dtype, model_options=model_options)
|
||||
return Ideogram4TEModel_
|
||||
@ -290,3 +290,19 @@ class IdeogramV3Request(BaseModel):
|
||||
None,
|
||||
description='Optional masks for character reference images. When provided, must match the number of character_reference_images. Each mask should be a grayscale image of the same dimensions as the corresponding character reference image. The images should be in JPEG, PNG or WebP format.'
|
||||
)
|
||||
|
||||
|
||||
class IdeogramV4Request(BaseModel):
|
||||
text_prompt: str | None = Field(
|
||||
None,
|
||||
description="Natural-language prompt; Magic Prompt is applied automatically. "
|
||||
"Supply exactly one of text_prompt or json_prompt.",
|
||||
)
|
||||
json_prompt: dict[str, Any] | None = Field(
|
||||
None,
|
||||
description="Structured V4 prompt object consumed directly (disables Magic Prompt). "
|
||||
"Supply exactly one of text_prompt or json_prompt.",
|
||||
)
|
||||
resolution: str | None = Field(None, description="Output resolution in WIDTHxHEIGHT (e.g. '2048x2048').")
|
||||
rendering_speed: str | None = Field(None, description="Rendering speed: 'TURBO', 'DEFAULT', or 'QUALITY'.")
|
||||
enable_copyright_detection: bool | None = Field(None, description="Opt into post-generation copyright detection.")
|
||||
|
||||
@ -155,7 +155,7 @@ class ClaudeNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ClaudeNode",
|
||||
display_name="Anthropic Claude",
|
||||
category="text/partner/Anthropic",
|
||||
category="partner/text/Anthropic",
|
||||
essentials_category="Text Generation",
|
||||
description="Generate text responses with Anthropic's Claude models. "
|
||||
"Provide a text prompt and optionally one or more images for multimodal context.",
|
||||
|
||||
@ -206,7 +206,7 @@ class BeebleSwitchXVideoEdit(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="BeebleSwitchXVideoEdit",
|
||||
display_name="Beeble SwitchX Video Edit",
|
||||
category="video/partner/Beeble",
|
||||
category="partner/video/Beeble",
|
||||
description=(
|
||||
"Edit a video with Beeble SwitchX. Switches anything in the scene (background, "
|
||||
"lighting, costume) while preserving the original subject's pixels and motion. "
|
||||
@ -302,7 +302,7 @@ class BeebleSwitchXImageEdit(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="BeebleSwitchXImageEdit",
|
||||
display_name="Beeble SwitchX Image Edit",
|
||||
category="image/partner/Beeble",
|
||||
category="partner/image/Beeble",
|
||||
description=(
|
||||
"Edit a single image with Beeble SwitchX. Switches anything in the scene "
|
||||
"(background, lighting, costume) while preserving the original subject's pixels. "
|
||||
|
||||
@ -37,7 +37,7 @@ class FluxProUltraImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="FluxProUltraImageNode",
|
||||
display_name="Flux 1.1 [pro] Ultra Image",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Generates images using Flux Pro 1.1 Ultra via api based on prompt and resolution.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -155,7 +155,7 @@ class FluxKontextProImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id=cls.NODE_ID,
|
||||
display_name=cls.DISPLAY_NAME,
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Edits images using Flux.1 Kontext [pro] via api based on prompt and aspect ratio.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -277,7 +277,7 @@ class FluxProExpandNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="FluxProExpandNode",
|
||||
display_name="Flux.1 Expand Image",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Outpaints image based on prompt.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -414,7 +414,7 @@ class FluxProFillNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="FluxProFillNode",
|
||||
display_name="Flux.1 Fill Image",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Inpaints image based on mask and prompt.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -521,7 +521,7 @@ class FluxEraseNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="FluxEraseNode",
|
||||
display_name="Flux Erase Image",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Removes the masked object from an image and reconstructs the background. "
|
||||
"Paint the mask over what you want to erase.",
|
||||
inputs=[
|
||||
@ -597,7 +597,7 @@ class FluxVTONode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="FluxVTONode",
|
||||
display_name="Flux Virtual Try-On",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Virtual try-on: dresses the person in the provided garment.",
|
||||
inputs=[
|
||||
IO.Image.Input("person", tooltip="Image of the person to dress."),
|
||||
@ -697,7 +697,7 @@ class Flux2ProImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id=cls.NODE_ID,
|
||||
display_name=cls.DISPLAY_NAME,
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Generates images synchronously based on prompt and resolution.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -868,7 +868,7 @@ class Flux2ImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Flux2ImageNode",
|
||||
display_name="Flux.2 Image",
|
||||
category="image/partner/BFL",
|
||||
category="partner/image/BFL",
|
||||
description="Generate images via Flux.2 [pro] or Flux.2 [max] from a prompt and optional reference images.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -1,13 +1,16 @@
|
||||
import av
|
||||
import torch
|
||||
from av.codec import CodecContext
|
||||
from typing_extensions import override
|
||||
|
||||
from comfy_api.latest import IO, ComfyExtension, Input
|
||||
from comfy_api_nodes.apis.bria import (
|
||||
BriaEditImageRequest,
|
||||
BriaImageEditResponse,
|
||||
BriaRemoveBackgroundRequest,
|
||||
BriaRemoveBackgroundResponse,
|
||||
BriaRemoveVideoBackgroundRequest,
|
||||
BriaRemoveVideoBackgroundResponse,
|
||||
BriaImageEditResponse,
|
||||
BriaStatusResponse,
|
||||
InputModerationSettings,
|
||||
)
|
||||
@ -31,7 +34,7 @@ class BriaImageEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="BriaImageEditNode",
|
||||
display_name="Bria FIBO Image Edit",
|
||||
category="image/partner/Bria",
|
||||
category="partner/image/Bria",
|
||||
description="Edit images using Bria latest model",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["FIBO"]),
|
||||
@ -169,7 +172,7 @@ class BriaRemoveImageBackground(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="BriaRemoveImageBackground",
|
||||
display_name="Bria Remove Image Background",
|
||||
category="image/partner/Bria",
|
||||
category="partner/image/Bria",
|
||||
description="Remove the background from an image using Bria RMBG 2.0.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -245,7 +248,7 @@ class BriaRemoveVideoBackground(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="BriaRemoveVideoBackground",
|
||||
display_name="Bria Remove Video Background",
|
||||
category="video/partner/Bria",
|
||||
category="partner/video/Bria",
|
||||
description="Remove the background from a video using Bria. ",
|
||||
inputs=[
|
||||
IO.Video.Input("video"),
|
||||
@ -316,6 +319,96 @@ class BriaRemoveVideoBackground(IO.ComfyNode):
|
||||
return IO.NodeOutput(await download_url_to_video_output(response.result.video_url))
|
||||
|
||||
|
||||
def _video_to_images_and_mask(video: Input.Video) -> tuple[Input.Image, Input.Mask]:
|
||||
"""Decode a transparent webm (VP9 + alpha) into image frames and an alpha mask.
|
||||
|
||||
VP9 keeps its alpha in a side layer that PyAV's default vp9 decoder drops, so the frames
|
||||
are decoded with libvpx-vp9. Returns RGB images [B,H,W,3] in 0..1 and a mask [B,H,W]
|
||||
following the Load Image convention (1 = transparent) for compositing or Save WEBM.
|
||||
"""
|
||||
rgb_frames: list[torch.Tensor] = []
|
||||
alpha_frames: list[torch.Tensor] = []
|
||||
with av.open(video.get_stream_source(), mode="r") as container:
|
||||
stream = container.streams.video[0]
|
||||
decoder = CodecContext.create("libvpx-vp9", "r") if stream.codec_context.name == "vp9" else None
|
||||
for packet in container.demux(stream):
|
||||
for frame in (decoder.decode(packet) if decoder is not None else packet.decode()):
|
||||
rgba = torch.from_numpy(frame.to_ndarray(format="rgba")).float() / 255.0
|
||||
rgb_frames.append(rgba[..., :3])
|
||||
alpha_frames.append(rgba[..., 3])
|
||||
images = torch.stack(rgb_frames) if rgb_frames else torch.zeros(0, 0, 0, 3)
|
||||
mask = (1.0 - torch.stack(alpha_frames)) if alpha_frames else torch.zeros((images.shape[0], 64, 64))
|
||||
return images, mask
|
||||
|
||||
|
||||
class BriaTransparentVideoBackground(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="BriaTransparentVideoBackground",
|
||||
display_name="Bria Remove Video Background (Transparent)",
|
||||
category="partner/video/Bria",
|
||||
description="Remove the background from a video using Bria and return the cut-out frames "
|
||||
"plus an alpha mask. Connect both to a compositing node, or feed them to Save WEBM to "
|
||||
"write a transparent video.",
|
||||
inputs=[
|
||||
IO.Video.Input("video"),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
default=0,
|
||||
min=0,
|
||||
max=2147483647,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
control_after_generate=True,
|
||||
tooltip="Seed controls whether the node should re-run; "
|
||||
"results are non-deterministic regardless of seed.",
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
IO.Image.Output(display_name="images"),
|
||||
IO.Mask.Output(display_name="mask"),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
video: Input.Video,
|
||||
seed: int,
|
||||
) -> IO.NodeOutput:
|
||||
validate_video_duration(video, max_duration=60.0)
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"),
|
||||
data=BriaRemoveVideoBackgroundRequest(
|
||||
video=await upload_video_to_comfyapi(cls, video),
|
||||
background_color="Transparent",
|
||||
output_container_and_codec="webm_vp9",
|
||||
seed=seed,
|
||||
),
|
||||
response_model=BriaStatusResponse,
|
||||
)
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"),
|
||||
status_extractor=lambda r: r.status,
|
||||
response_model=BriaRemoveVideoBackgroundResponse,
|
||||
)
|
||||
video_out = await download_url_to_video_output(response.result.video_url)
|
||||
images, mask = _video_to_images_and_mask(video_out)
|
||||
return IO.NodeOutput(images, mask)
|
||||
|
||||
|
||||
class BriaExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -323,6 +416,7 @@ class BriaExtension(ComfyExtension):
|
||||
BriaImageEditNode,
|
||||
BriaRemoveImageBackground,
|
||||
BriaRemoveVideoBackground,
|
||||
BriaTransparentVideoBackground,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -368,7 +368,7 @@ class ByteDanceImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceImageNode",
|
||||
display_name="ByteDance Image",
|
||||
category="image/partner/ByteDance",
|
||||
category="partner/image/ByteDance",
|
||||
description="Generate images using ByteDance models via api based on prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["seedream-3-0-t2i-250415"]),
|
||||
@ -492,7 +492,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceSeedreamNode",
|
||||
display_name="ByteDance Seedream 4.5 & 5.0",
|
||||
category="image/partner/ByteDance",
|
||||
category="partner/image/ByteDance",
|
||||
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -754,7 +754,7 @@ class ByteDanceSeedreamNodeV2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceSeedreamNodeV2",
|
||||
display_name="ByteDance Seedream 4.5 & 5.0",
|
||||
category="image/partner/ByteDance",
|
||||
category="partner/image/ByteDance",
|
||||
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -920,7 +920,7 @@ class ByteDanceTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceTextToVideoNode",
|
||||
display_name="ByteDance Text to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using ByteDance models via api based on prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -1048,7 +1048,7 @@ class ByteDanceImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceImageToVideoNode",
|
||||
display_name="ByteDance Image to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using ByteDance models via api based on image and prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -1185,7 +1185,7 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceFirstLastFrameNode",
|
||||
display_name="ByteDance First-Last-Frame to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using prompt and first and last frames.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -1333,7 +1333,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceImageReferenceNode",
|
||||
display_name="ByteDance Reference Images to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using prompt and reference images.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -1576,7 +1576,7 @@ class ByteDance2TextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDance2TextToVideoNode",
|
||||
display_name="ByteDance Seedance 2.0 Text to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using Seedance 2.0 models based on a text prompt.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1677,7 +1677,7 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDance2FirstLastFrameNode",
|
||||
display_name="ByteDance Seedance 2.0 First-Last-Frame to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate video using Seedance 2.0 from a first frame image and optional last frame image.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1944,7 +1944,7 @@ class ByteDance2ReferenceNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDance2ReferenceNode",
|
||||
display_name="ByteDance Seedance 2.0 Reference to Video",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description="Generate, edit, or extend video using Seedance 2.0 with reference images, "
|
||||
"videos, and audio. Supports multimodal reference, video editing, and video extension.",
|
||||
inputs=[
|
||||
@ -2241,7 +2241,7 @@ class ByteDanceCreateImageAsset(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceCreateImageAsset",
|
||||
display_name="ByteDance Create Image Asset",
|
||||
category="image/partner/ByteDance",
|
||||
category="partner/image/ByteDance",
|
||||
description=(
|
||||
"Create a Seedance 2.0 personal image asset. Uploads the input image and "
|
||||
"registers it in the given asset group. If group_id is empty, runs a real-person "
|
||||
@ -2308,7 +2308,7 @@ class ByteDanceCreateVideoAsset(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceCreateVideoAsset",
|
||||
display_name="ByteDance Create Video Asset",
|
||||
category="video/partner/ByteDance",
|
||||
category="partner/video/ByteDance",
|
||||
description=(
|
||||
"Create a Seedance 2.0 personal video asset. Uploads the input video and "
|
||||
"registers it in the given asset group. If group_id is empty, runs a real-person "
|
||||
|
||||
@ -144,7 +144,7 @@ class ByteDanceSeedNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ByteDanceSeedNode",
|
||||
display_name="ByteDance Seed",
|
||||
category="text/partner/ByteDance",
|
||||
category="partner/text/ByteDance",
|
||||
essentials_category="Text Generation",
|
||||
description="Generate text responses with ByteDance's Seed 2.0 models. "
|
||||
"Provide a text prompt and optionally one or more images or videos for multimodal context.",
|
||||
|
||||
@ -69,7 +69,7 @@ class ElevenLabsSpeechToText(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsSpeechToText",
|
||||
display_name="ElevenLabs Speech to Text",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Transcribe audio to text. "
|
||||
"Supports automatic language detection, speaker diarization, and audio event tagging.",
|
||||
inputs=[
|
||||
@ -210,7 +210,7 @@ class ElevenLabsVoiceSelector(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsVoiceSelector",
|
||||
display_name="ElevenLabs Voice Selector",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Select a predefined ElevenLabs voice for text-to-speech generation.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -239,7 +239,7 @@ class ElevenLabsTextToSpeech(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsTextToSpeech",
|
||||
display_name="ElevenLabs Text to Speech",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Convert text to speech.",
|
||||
inputs=[
|
||||
IO.Custom(ELEVENLABS_VOICE).Input(
|
||||
@ -414,7 +414,7 @@ class ElevenLabsAudioIsolation(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsAudioIsolation",
|
||||
display_name="ElevenLabs Voice Isolation",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Remove background noise from audio, isolating vocals or speech.",
|
||||
inputs=[
|
||||
IO.Audio.Input(
|
||||
@ -459,7 +459,7 @@ class ElevenLabsTextToSoundEffects(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsTextToSoundEffects",
|
||||
display_name="ElevenLabs Text to Sound Effects",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Generate sound effects from text descriptions.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -555,7 +555,7 @@ class ElevenLabsInstantVoiceClone(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsInstantVoiceClone",
|
||||
display_name="ElevenLabs Instant Voice Clone",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Create a cloned voice from audio samples. "
|
||||
"Provide 1-8 audio recordings of the voice to clone.",
|
||||
inputs=[
|
||||
@ -658,7 +658,7 @@ class ElevenLabsSpeechToSpeech(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsSpeechToSpeech",
|
||||
display_name="ElevenLabs Speech to Speech",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Transform speech from one voice to another while preserving the original content and emotion.",
|
||||
inputs=[
|
||||
IO.Custom(ELEVENLABS_VOICE).Input(
|
||||
@ -793,7 +793,7 @@ class ElevenLabsTextToDialogue(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ElevenLabsTextToDialogue",
|
||||
display_name="ElevenLabs Text to Dialogue",
|
||||
category="audio/partner/ElevenLabs",
|
||||
category="partner/audio/ElevenLabs",
|
||||
description="Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.",
|
||||
inputs=[
|
||||
IO.Float.Input(
|
||||
|
||||
@ -300,7 +300,7 @@ class GeminiNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiNode",
|
||||
display_name="Google Gemini",
|
||||
category="text/partner/Gemini",
|
||||
category="partner/text/Gemini",
|
||||
description="Generate text responses with Google's Gemini AI model. "
|
||||
"You can provide multiple types of inputs (text, images, audio, video) "
|
||||
"as context for generating more relevant and meaningful responses.",
|
||||
@ -541,7 +541,7 @@ class GeminiInputFiles(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiInputFiles",
|
||||
display_name="Gemini Input Files",
|
||||
category="text/partner/Gemini",
|
||||
category="partner/text/Gemini",
|
||||
description="Loads and prepares input files to include as inputs for Gemini LLM nodes. "
|
||||
"The files will be read by the Gemini model when generating a response. "
|
||||
"The contents of the text file count toward the token limit. "
|
||||
@ -598,7 +598,7 @@ class GeminiImage(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiImageNode",
|
||||
display_name="Nano Banana (Google Gemini Image)",
|
||||
category="image/partner/Gemini",
|
||||
category="partner/image/Gemini",
|
||||
description="Edit images synchronously via Google API.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -731,7 +731,7 @@ class GeminiImage2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiImage2Node",
|
||||
display_name="Nano Banana Pro (Google Gemini Image)",
|
||||
category="image/partner/Gemini",
|
||||
category="partner/image/Gemini",
|
||||
description="Generate or edit images synchronously via Google Vertex API.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -869,7 +869,7 @@ class GeminiNanoBanana2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiNanoBanana2",
|
||||
display_name="Nano Banana 2",
|
||||
category="image/partner/Gemini",
|
||||
category="partner/image/Gemini",
|
||||
description="Generate or edit images synchronously via Google Vertex API.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -1085,7 +1085,7 @@ class GeminiNanoBanana2V2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GeminiNanoBanana2V2",
|
||||
display_name="Nano Banana 2",
|
||||
category="image/partner/Gemini",
|
||||
category="partner/image/Gemini",
|
||||
description="Generate or edit images synchronously via Google Vertex API.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -54,7 +54,7 @@ class GrokImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokImageNode",
|
||||
display_name="Grok Image",
|
||||
category="image/partner/Grok",
|
||||
category="partner/image/Grok",
|
||||
description="Generate images using Grok based on a text prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -228,7 +228,7 @@ class GrokImageEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokImageEditNode",
|
||||
display_name="Grok Image Edit",
|
||||
category="image/partner/Grok",
|
||||
category="partner/image/Grok",
|
||||
description="Modify an existing image based on a text prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -369,7 +369,7 @@ class GrokImageEditNodeV2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokImageEditNodeV2",
|
||||
display_name="Grok Image Edit",
|
||||
category="image/partner/Grok",
|
||||
category="partner/image/Grok",
|
||||
description="Modify an existing image based on a text prompt",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -506,7 +506,7 @@ class GrokVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokVideoNode",
|
||||
display_name="Grok Video",
|
||||
category="video/partner/Grok",
|
||||
category="partner/video/Grok",
|
||||
description="Generate video from a prompt or an image",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -630,7 +630,7 @@ class GrokVideoEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokVideoEditNode",
|
||||
display_name="Grok Video Edit",
|
||||
category="video/partner/Grok",
|
||||
category="partner/video/Grok",
|
||||
description="Edit an existing video based on a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["grok-imagine-video"]),
|
||||
@ -708,7 +708,7 @@ class GrokVideoReferenceNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokVideoReferenceNode",
|
||||
display_name="Grok Reference-to-Video",
|
||||
category="video/partner/Grok",
|
||||
category="partner/video/Grok",
|
||||
description="Generate video guided by reference images as style and content references.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -841,7 +841,7 @@ class GrokVideoExtendNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="GrokVideoExtendNode",
|
||||
display_name="Grok Video Extend",
|
||||
category="video/partner/Grok",
|
||||
category="partner/video/Grok",
|
||||
description="Extend an existing video with a seamless continuation based on a text prompt.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -71,7 +71,7 @@ class HitPawGeneralImageEnhance(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HitPawGeneralImageEnhance",
|
||||
display_name="HitPaw General Image Enhance",
|
||||
category="image/partner/HitPaw",
|
||||
category="partner/image/HitPaw",
|
||||
description="Upscale low-resolution images to super-resolution, eliminate artifacts and noise. "
|
||||
f"Maximum output: {MAX_MP_GENERATIVE} megapixels.",
|
||||
inputs=[
|
||||
@ -201,7 +201,7 @@ class HitPawVideoEnhance(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HitPawVideoEnhance",
|
||||
display_name="HitPaw Video Enhance",
|
||||
category="video/partner/HitPaw",
|
||||
category="partner/video/HitPaw",
|
||||
description="Upscale low-resolution videos to high resolution, eliminate artifacts and noise. "
|
||||
"Prices shown are per second of video.",
|
||||
inputs=[
|
||||
|
||||
@ -123,7 +123,7 @@ class TencentTextToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TencentTextToModelNode",
|
||||
display_name="Hunyuan3D: Text to Model",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
essentials_category="3D",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -242,7 +242,7 @@ class TencentImageToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TencentImageToModelNode",
|
||||
display_name="Hunyuan3D: Image(s) to Model",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
essentials_category="3D",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -415,7 +415,7 @@ class TencentModelTo3DUVNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TencentModelTo3DUVNode",
|
||||
display_name="Hunyuan3D: Model to UV",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
description="Perform UV unfolding on a 3D model to generate UV texture. "
|
||||
"Input model must have less than 30000 faces.",
|
||||
inputs=[
|
||||
@ -505,7 +505,7 @@ class Tencent3DTextureEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Tencent3DTextureEditNode",
|
||||
display_name="Hunyuan3D: 3D Texture Edit",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
description="After inputting the 3D model, perform 3D model texture redrawing.",
|
||||
inputs=[
|
||||
IO.MultiType.Input(
|
||||
@ -594,7 +594,7 @@ class Tencent3DPartNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Tencent3DPartNode",
|
||||
display_name="Hunyuan3D: 3D Part",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
description="Automatically perform component identification and generation based on the model structure.",
|
||||
inputs=[
|
||||
IO.MultiType.Input(
|
||||
@ -666,7 +666,7 @@ class TencentSmartTopologyNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TencentSmartTopologyNode",
|
||||
display_name="Hunyuan3D: Smart Topology",
|
||||
category="3d/partner/Tencent",
|
||||
category="partner/3d/Tencent",
|
||||
description="Perform smart retopology on a 3D model. "
|
||||
"Supports GLB/OBJ formats; max 200MB; recommended for high-poly models.",
|
||||
inputs=[
|
||||
|
||||
@ -10,6 +10,7 @@ from comfy_api_nodes.apis.ideogram import (
|
||||
ImageRequest,
|
||||
IdeogramV3Request,
|
||||
IdeogramV3EditRequest,
|
||||
IdeogramV4Request,
|
||||
)
|
||||
from comfy_api_nodes.util import (
|
||||
ApiEndpoint,
|
||||
@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
|
||||
download_url_as_bytesio,
|
||||
resize_mask_to_image,
|
||||
sync_op,
|
||||
validate_string,
|
||||
)
|
||||
|
||||
V1_V1_RES_MAP = {
|
||||
@ -234,7 +236,7 @@ class IdeogramV1(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="IdeogramV1",
|
||||
display_name="Ideogram V1",
|
||||
category="image/partner/Ideogram",
|
||||
category="partner/image/Ideogram",
|
||||
description="Generates images using the Ideogram V1 model.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -360,7 +362,7 @@ class IdeogramV2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="IdeogramV2",
|
||||
display_name="Ideogram V2",
|
||||
category="image/partner/Ideogram",
|
||||
category="partner/image/Ideogram",
|
||||
description="Generates images using the Ideogram V2 model.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -526,7 +528,7 @@ class IdeogramV3(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="IdeogramV3",
|
||||
display_name="Ideogram V3",
|
||||
category="image/partner/Ideogram",
|
||||
category="partner/image/Ideogram",
|
||||
description="Generates images using the Ideogram V3 model. "
|
||||
"Supports both regular image generation from text prompts and image editing with mask.",
|
||||
inputs=[
|
||||
@ -798,6 +800,119 @@ class IdeogramV3(IO.ComfyNode):
|
||||
return IO.NodeOutput(await download_and_process_images(image_urls))
|
||||
|
||||
|
||||
class IdeogramV4(IO.ComfyNode):
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="IdeogramV4",
|
||||
display_name="Ideogram V4",
|
||||
category="partner/image/Ideogram",
|
||||
description="Generates images using the Ideogram 4.0 model from a text prompt.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Text prompt for the image generation.",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"resolution",
|
||||
options=[
|
||||
"Auto",
|
||||
"2048x2048 (1:1)",
|
||||
"1440x2880 (1:2)",
|
||||
"2880x1440 (2:1)",
|
||||
"1664x2496 (2:3)",
|
||||
"2496x1664 (3:2)",
|
||||
"1792x2240 (4:5)",
|
||||
"2240x1792 (5:4)",
|
||||
"1440x2560 (9:16)",
|
||||
"2560x1440 (16:9)",
|
||||
"1600x2560 (5:8)",
|
||||
"2560x1600 (8:5)",
|
||||
"1728x2304 (3:4)",
|
||||
"2304x1728 (4:3)",
|
||||
"1296x3168 (9:22)",
|
||||
"3168x1296 (22:9)",
|
||||
"1152x2944 (9:23)",
|
||||
"2944x1152 (23:9)",
|
||||
"1248x3328 (3:8)",
|
||||
"3328x1248 (8:3)",
|
||||
"1280x3072 (5:12)",
|
||||
"3072x1280 (12:5)",
|
||||
],
|
||||
default="Auto",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"rendering_speed",
|
||||
options=["DEFAULT", "TURBO", "QUALITY"],
|
||||
default="DEFAULT",
|
||||
tooltip="Controls the trade-off between generation speed and quality.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
default=0,
|
||||
min=0,
|
||||
max=2147483647,
|
||||
step=1,
|
||||
control_after_generate=True,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
IO.Image.Output(),
|
||||
],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
price_badge=IO.PriceBadge(
|
||||
depends_on=IO.PriceBadgeDepends(widgets=["rendering_speed"]),
|
||||
expr="""
|
||||
(
|
||||
$speed := widgets.rendering_speed;
|
||||
$price :=
|
||||
$contains($speed,"turbo") ? 0.0429 :
|
||||
$contains($speed,"quality") ? 0.143 :
|
||||
0.0858;
|
||||
{"type":"usd","usd": $price}
|
||||
)
|
||||
""",
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt: str,
|
||||
resolution: str,
|
||||
rendering_speed: str,
|
||||
seed: int,
|
||||
):
|
||||
validate_string(prompt, strip_whitespace=True, min_length=1)
|
||||
response = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path="/proxy/ideogram/ideogram-v4/generate", method="POST"),
|
||||
response_model=IdeogramGenerateResponse,
|
||||
data=IdeogramV4Request(
|
||||
text_prompt=prompt,
|
||||
resolution=resolution.split(" ")[0] if resolution != "Auto" else None,
|
||||
rendering_speed=rendering_speed,
|
||||
),
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
if not response.data or len(response.data) == 0:
|
||||
raise Exception("No images were generated in the response")
|
||||
image_urls = [image_data.url for image_data in response.data if image_data.url]
|
||||
if not image_urls:
|
||||
raise Exception("No image URLs were generated in the response")
|
||||
return IO.NodeOutput(await download_and_process_images(image_urls))
|
||||
|
||||
|
||||
class IdeogramExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
@ -805,6 +920,7 @@ class IdeogramExtension(ComfyExtension):
|
||||
IdeogramV1,
|
||||
IdeogramV2,
|
||||
IdeogramV3,
|
||||
IdeogramV4,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -642,7 +642,7 @@ class KlingCameraControls(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingCameraControls",
|
||||
display_name="Kling Camera Controls",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Allows specifying configuration options for Kling Camera Controls and motion control effects.",
|
||||
inputs=[
|
||||
IO.Combo.Input("camera_control_type", options=KlingCameraControlType),
|
||||
@ -762,7 +762,7 @@ class KlingTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingTextToVideoNode",
|
||||
display_name="Kling Text to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Kling Text to Video Node",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
|
||||
@ -849,7 +849,7 @@ class OmniProTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProTextToVideoNode",
|
||||
display_name="Kling 3.0 Omni Text to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Use text prompts to generate videos with the latest Kling model.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
|
||||
@ -998,7 +998,7 @@ class OmniProFirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProFirstLastFrameNode",
|
||||
display_name="Kling 3.0 Omni First-Last-Frame to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Use a start frame, an optional end frame, or reference images with the latest Kling model.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
|
||||
@ -1205,7 +1205,7 @@ class OmniProImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProImageToVideoNode",
|
||||
display_name="Kling 3.0 Omni Image to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Use up to 7 reference images to generate a video with the latest Kling model.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
|
||||
@ -1374,7 +1374,7 @@ class OmniProVideoToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProVideoToVideoNode",
|
||||
display_name="Kling 3.0 Omni Video to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Use a video and up to 4 reference images to generate a video with the latest Kling model.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]),
|
||||
@ -1485,7 +1485,7 @@ class OmniProEditVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProEditVideoNode",
|
||||
display_name="Kling 3.0 Omni Edit Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
essentials_category="Video Generation",
|
||||
description="Edit an existing video with the latest model from Kling.",
|
||||
inputs=[
|
||||
@ -1593,7 +1593,7 @@ class OmniProImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingOmniProImageNode",
|
||||
display_name="Kling 3.0 Omni Image",
|
||||
category="image/partner/Kling",
|
||||
category="partner/image/Kling",
|
||||
description="Create or edit images with the latest model from Kling.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-image-o1"]),
|
||||
@ -1721,7 +1721,7 @@ class KlingCameraControlT2VNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingCameraControlT2VNode",
|
||||
display_name="Kling Text to Video (Camera Control)",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Transform text into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original text.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
|
||||
@ -1783,7 +1783,7 @@ class KlingImage2VideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingImage2VideoNode",
|
||||
display_name="Kling Image(First Frame) to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
inputs=[
|
||||
IO.Image.Input("start_frame", tooltip="The reference image used to generate the video."),
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
|
||||
@ -1882,7 +1882,7 @@ class KlingCameraControlI2VNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingCameraControlI2VNode",
|
||||
display_name="Kling Image to Video (Camera Control)",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Transform still images into cinematic videos with professional camera movements that simulate real-world cinematography. Control virtual camera actions including zoom, rotation, pan, tilt, and first-person view, while maintaining focus on your original image.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -1953,7 +1953,7 @@ class KlingStartEndFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingStartEndFrameNode",
|
||||
display_name="Kling Start-End Frame to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Generate a video sequence that transitions between your provided start and end images. The node creates all frames in between, producing a smooth transformation from the first frame to the last.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -2047,7 +2047,7 @@ class KlingVideoExtendNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingVideoExtendNode",
|
||||
display_name="Kling Video Extend",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Kling Video Extend Node. Extend videos made by other Kling nodes. The video_id is created by using other Kling Nodes.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -2128,7 +2128,7 @@ class KlingDualCharacterVideoEffectNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingDualCharacterVideoEffectNode",
|
||||
display_name="Kling Dual Character Video Effects",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Achieve different special effects when generating a video based on the effect_scene. First image will be positioned on left side, second on right side of the composite.",
|
||||
inputs=[
|
||||
IO.Image.Input("image_left", tooltip="Left side image"),
|
||||
@ -2218,7 +2218,7 @@ class KlingSingleImageVideoEffectNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingSingleImageVideoEffectNode",
|
||||
display_name="Kling Video Effects",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Achieve different special effects when generating a video based on the effect_scene.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -2291,7 +2291,7 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingLipSyncAudioToVideoNode",
|
||||
display_name="Kling Lip Sync Video with Audio",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
essentials_category="Video Generation",
|
||||
description="Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
|
||||
inputs=[
|
||||
@ -2343,7 +2343,7 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingLipSyncTextToVideoNode",
|
||||
display_name="Kling Lip Sync Video with Text",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Kling Lip Sync Text to Video Node. Syncs mouth movements in a video file to a text prompt. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
|
||||
inputs=[
|
||||
IO.Video.Input("video"),
|
||||
@ -2411,7 +2411,7 @@ class KlingVirtualTryOnNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingVirtualTryOnNode",
|
||||
display_name="Kling Virtual Try On",
|
||||
category="image/partner/Kling",
|
||||
category="partner/image/Kling",
|
||||
description="Kling Virtual Try On Node. Input a human image and a cloth image to try on the cloth on the human. You can merge multiple clothing item pictures into one image with a white background.",
|
||||
inputs=[
|
||||
IO.Image.Input("human_image"),
|
||||
@ -2478,7 +2478,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingImageGenerationNode",
|
||||
display_name="Kling 3.0 Image",
|
||||
category="image/partner/Kling",
|
||||
category="partner/image/Kling",
|
||||
description="Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt"),
|
||||
@ -2615,7 +2615,7 @@ class TextToVideoWithAudio(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingTextToVideoWithAudio",
|
||||
display_name="Kling 2.6 Text to Video with Audio",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v2-6"]),
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
|
||||
@ -2683,7 +2683,7 @@ class ImageToVideoWithAudio(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingImageToVideoWithAudio",
|
||||
display_name="Kling 2.6 Image(First Frame) to Video with Audio",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
inputs=[
|
||||
IO.Combo.Input("model_name", options=["kling-v2-6"]),
|
||||
IO.Image.Input("start_frame"),
|
||||
@ -2753,7 +2753,7 @@ class MotionControl(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingMotionControl",
|
||||
display_name="Kling Motion Control",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True),
|
||||
IO.Image.Input("reference_image"),
|
||||
@ -2854,7 +2854,7 @@ class KlingVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingVideoNode",
|
||||
display_name="Kling 3.0 Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Generate videos with Kling V3. "
|
||||
"Supports text-to-video and image-to-video with optional storyboard multi-prompt and audio generation.",
|
||||
inputs=[
|
||||
@ -3077,7 +3077,7 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingFirstLastFrameNode",
|
||||
display_name="Kling 3.0 First-Last-Frame to Video",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Generate videos with Kling V3 using first and last frames.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, default=""),
|
||||
@ -3202,7 +3202,7 @@ class KlingAvatarNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="KlingAvatarNode",
|
||||
display_name="Kling Avatar 2.0",
|
||||
category="video/partner/Kling",
|
||||
category="partner/video/Kling",
|
||||
description="Generate broadcast-style digital human videos from a single photo and an audio file.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
|
||||
@ -106,7 +106,7 @@ class Krea2ImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Krea2ImageNode",
|
||||
display_name="Krea 2 Image",
|
||||
category="image/partner/Krea",
|
||||
category="partner/image/Krea",
|
||||
description=(
|
||||
"Generate images via Krea 2 — pick Medium (expressive illustrations) or "
|
||||
"Large (expressive photorealism). Supports an optional moodboard and up "
|
||||
@ -229,7 +229,7 @@ class Krea2StyleReferenceNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Krea2StyleReferenceNode",
|
||||
display_name="Krea 2 Style Reference",
|
||||
category="image/partner/Krea",
|
||||
category="partner/image/Krea",
|
||||
description=(
|
||||
"Add an image style reference to a Krea 2 generation. Chain multiple Krea 2 "
|
||||
"Style Reference nodes (max 10) and feed the final `style_reference` output "
|
||||
|
||||
@ -50,7 +50,7 @@ class TextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LtxvApiTextToVideo",
|
||||
display_name="LTXV Text To Video",
|
||||
category="video/partner/LTXV",
|
||||
category="partner/video/LTXV",
|
||||
description="Professional-quality videos with customizable duration and resolution.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=list(MODELS_MAP.keys())),
|
||||
@ -127,7 +127,7 @@ class ImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LtxvApiImageToVideo",
|
||||
display_name="LTXV Image To Video",
|
||||
category="video/partner/LTXV",
|
||||
category="partner/video/LTXV",
|
||||
description="Professional-quality videos with customizable duration and resolution based on start image.",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="First frame to be used for the video."),
|
||||
|
||||
@ -46,7 +46,7 @@ class LumaReferenceNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaReferenceNode",
|
||||
display_name="Luma Reference",
|
||||
category="image/partner/Luma",
|
||||
category="partner/image/Luma",
|
||||
description="Holds an image and weight for use with Luma Generate Image node.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -85,7 +85,7 @@ class LumaConceptsNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaConceptsNode",
|
||||
display_name="Luma Concepts",
|
||||
category="video/partner/Luma",
|
||||
category="partner/video/Luma",
|
||||
description="Camera Concepts for use with Luma Text to Video and Luma Image to Video nodes.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -134,7 +134,7 @@ class LumaImageGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaImageNode",
|
||||
display_name="Luma Text to Image",
|
||||
category="image/partner/Luma",
|
||||
category="partner/image/Luma",
|
||||
description="Generates images synchronously based on prompt and aspect ratio.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -278,7 +278,7 @@ class LumaImageModifyNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaImageModifyNode",
|
||||
display_name="Luma Image to Image",
|
||||
category="image/partner/Luma",
|
||||
category="partner/image/Luma",
|
||||
description="Modifies images synchronously based on prompt and aspect ratio.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -371,7 +371,7 @@ class LumaTextToVideoGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaVideoNode",
|
||||
display_name="Luma Text to Video",
|
||||
category="video/partner/Luma",
|
||||
category="partner/video/Luma",
|
||||
description="Generates videos synchronously based on prompt and output_size.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -472,7 +472,7 @@ class LumaImageToVideoGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaImageToVideoNode",
|
||||
display_name="Luma Image to Video",
|
||||
category="video/partner/Luma",
|
||||
category="partner/video/Luma",
|
||||
description="Generates videos synchronously based on prompt, input images, and output_size.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -724,7 +724,7 @@ class LumaImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaImageNode2",
|
||||
display_name="Luma UNI-1 Image",
|
||||
category="image/partner/Luma",
|
||||
category="partner/image/Luma",
|
||||
description="Generate images from text using the Luma UNI-1 model.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -853,7 +853,7 @@ class LumaImageEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="LumaImageEditNode2",
|
||||
display_name="Luma UNI-1 Image Edit",
|
||||
category="image/partner/Luma",
|
||||
category="partner/image/Luma",
|
||||
description="Edit an existing image with a text prompt using the Luma UNI-1 model.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
|
||||
@ -61,7 +61,7 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MagnificImageUpscalerCreativeNode",
|
||||
display_name="Magnific Image Upscale (Creative)",
|
||||
category="image/partner/Magnific",
|
||||
category="partner/image/Magnific",
|
||||
description="Prompt‑guided enhancement, stylization, and 2x/4x/8x/16x upscaling. "
|
||||
"Maximum output: 25.3 megapixels.",
|
||||
inputs=[
|
||||
@ -240,7 +240,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MagnificImageUpscalerPreciseV2Node",
|
||||
display_name="Magnific Image Upscale (Precise V2)",
|
||||
category="image/partner/Magnific",
|
||||
category="partner/image/Magnific",
|
||||
description="High-fidelity upscaling with fine control over sharpness, grain, and detail. "
|
||||
"Maximum output: 10060×10060 pixels.",
|
||||
inputs=[
|
||||
@ -400,7 +400,7 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MagnificImageStyleTransferNode",
|
||||
display_name="Magnific Image Style Transfer",
|
||||
category="image/partner/Magnific",
|
||||
category="partner/image/Magnific",
|
||||
description="Transfer the style from a reference image to your input image.",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The image to apply style transfer to."),
|
||||
@ -549,7 +549,7 @@ class MagnificImageRelightNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MagnificImageRelightNode",
|
||||
display_name="Magnific Image Relight",
|
||||
category="image/partner/Magnific",
|
||||
category="partner/image/Magnific",
|
||||
description="Relight an image with lighting adjustments and optional reference-based light transfer.",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The image to relight."),
|
||||
@ -789,7 +789,7 @@ class MagnificImageSkinEnhancerNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MagnificImageSkinEnhancerNode",
|
||||
display_name="Magnific Image Skin Enhancer",
|
||||
category="image/partner/Magnific",
|
||||
category="partner/image/Magnific",
|
||||
description="Skin enhancement for portraits with multiple processing modes.",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The portrait image to enhance."),
|
||||
|
||||
@ -33,7 +33,7 @@ class MeshyTextToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyTextToModelNode",
|
||||
display_name="Meshy: Text to Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["latest"]),
|
||||
IO.String.Input("prompt", multiline=True, default=""),
|
||||
@ -145,7 +145,7 @@ class MeshyRefineNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyRefineNode",
|
||||
display_name="Meshy: Refine Draft Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
description="Refine a previously created draft model.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["latest"]),
|
||||
@ -240,7 +240,7 @@ class MeshyImageToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyImageToModelNode",
|
||||
display_name="Meshy: Image to Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["latest"]),
|
||||
IO.Image.Input("image"),
|
||||
@ -405,7 +405,7 @@ class MeshyMultiImageToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyMultiImageToModelNode",
|
||||
display_name="Meshy: Multi-Image to Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["latest"]),
|
||||
IO.Autogrow.Input(
|
||||
@ -575,7 +575,7 @@ class MeshyRigModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyRigModelNode",
|
||||
display_name="Meshy: Rig Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
description="Provides a rigged character in standard formats. "
|
||||
"Auto-rigging is currently not suitable for untextured meshes, non-humanoid assets, "
|
||||
"or humanoid assets with unclear limb and body structure.",
|
||||
@ -656,7 +656,7 @@ class MeshyAnimateModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyAnimateModelNode",
|
||||
display_name="Meshy: Animate Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
description="Apply a specific animation action to a previously rigged character.",
|
||||
inputs=[
|
||||
IO.Custom("MESHY_RIGGED_TASK_ID").Input("rig_task_id"),
|
||||
@ -722,7 +722,7 @@ class MeshyTextureNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MeshyTextureNode",
|
||||
display_name="Meshy: Texture Model",
|
||||
category="3d/partner/Meshy",
|
||||
category="partner/3d/Meshy",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["latest"]),
|
||||
IO.Custom("MESHY_TASK_ID").Input("meshy_task_id"),
|
||||
|
||||
@ -101,7 +101,7 @@ class MinimaxTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MinimaxTextToVideoNode",
|
||||
display_name="MiniMax Text to Video",
|
||||
category="video/partner/MiniMax",
|
||||
category="partner/video/MiniMax",
|
||||
description="Generates videos synchronously based on a prompt, and optional parameters.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -163,7 +163,7 @@ class MinimaxImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MinimaxImageToVideoNode",
|
||||
display_name="MiniMax Image to Video",
|
||||
category="video/partner/MiniMax",
|
||||
category="partner/video/MiniMax",
|
||||
description="Generates videos synchronously based on an image and prompt, and optional parameters.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -230,7 +230,7 @@ class MinimaxSubjectToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MinimaxSubjectToVideoNode",
|
||||
display_name="MiniMax Subject to Video",
|
||||
category="video/partner/MiniMax",
|
||||
category="partner/video/MiniMax",
|
||||
description="Generates videos synchronously based on an image and prompt, and optional parameters.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
@ -294,7 +294,7 @@ class MinimaxHailuoVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="MinimaxHailuoVideoNode",
|
||||
display_name="MiniMax Hailuo Video",
|
||||
category="video/partner/MiniMax",
|
||||
category="partner/video/MiniMax",
|
||||
description="Generates videos from prompt, with optional start frame using the new MiniMax Hailuo-02 model.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -99,7 +99,7 @@ class OpenAIDalle2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIDalle2",
|
||||
display_name="OpenAI DALL·E 2",
|
||||
category="image/partner/OpenAI",
|
||||
category="partner/image/OpenAI",
|
||||
description="Generates images synchronously via OpenAI's DALL·E 2 endpoint.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -249,7 +249,7 @@ class OpenAIDalle3(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIDalle3",
|
||||
display_name="OpenAI DALL·E 3",
|
||||
category="image/partner/OpenAI",
|
||||
category="partner/image/OpenAI",
|
||||
description="Generates images synchronously via OpenAI's DALL·E 3 endpoint.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -371,7 +371,7 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIGPTImage1",
|
||||
display_name="OpenAI GPT Image 2",
|
||||
category="image/partner/OpenAI",
|
||||
category="partner/image/OpenAI",
|
||||
description="Generates images synchronously via OpenAI's GPT Image endpoint.",
|
||||
is_deprecated=True,
|
||||
inputs=[
|
||||
@ -695,7 +695,7 @@ class OpenAIGPTImageNodeV2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIGPTImageNodeV2",
|
||||
display_name="OpenAI GPT Image 2",
|
||||
category="image/partner/OpenAI",
|
||||
category="partner/image/OpenAI",
|
||||
description="Generates images via OpenAI's GPT Image endpoint.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -962,7 +962,7 @@ class OpenAIChatNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIChatNode",
|
||||
display_name="OpenAI ChatGPT",
|
||||
category="text/partner/OpenAI",
|
||||
category="partner/text/OpenAI",
|
||||
essentials_category="Text Generation",
|
||||
description="Generate text responses from an OpenAI model.",
|
||||
inputs=[
|
||||
@ -1201,7 +1201,7 @@ class OpenAIInputFiles(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIInputFiles",
|
||||
display_name="OpenAI ChatGPT Input Files",
|
||||
category="text/partner/OpenAI",
|
||||
category="partner/text/OpenAI",
|
||||
description="Loads and prepares input files (text, pdf, etc.) to include as inputs for the OpenAI Chat Node. The files will be read by the OpenAI model when generating a response. 🛈 TIP: Can be chained together with other OpenAI Input File nodes.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -1248,7 +1248,7 @@ class OpenAIChatConfig(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIChatConfig",
|
||||
display_name="OpenAI ChatGPT Advanced Options",
|
||||
category="text/partner/OpenAI",
|
||||
category="partner/text/OpenAI",
|
||||
description="Allows specifying advanced configuration options for the OpenAI Chat Nodes.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
|
||||
@ -265,7 +265,7 @@ class OpenRouterLLMNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenRouterLLMNode",
|
||||
display_name="OpenRouter LLM",
|
||||
category="text/partner/OpenRouter",
|
||||
category="partner/text/OpenRouter",
|
||||
essentials_category="Text Generation",
|
||||
description=(
|
||||
"Generate text responses through OpenRouter. Routes to a curated set of popular "
|
||||
|
||||
@ -53,7 +53,7 @@ class PixverseTemplateNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="PixverseTemplateNode",
|
||||
display_name="PixVerse Template",
|
||||
category="video/partner/PixVerse",
|
||||
category="partner/video/PixVerse",
|
||||
inputs=[
|
||||
IO.Combo.Input("template", options=list(pixverse_templates.keys())),
|
||||
],
|
||||
@ -74,7 +74,7 @@ class PixverseTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="PixverseTextToVideoNode",
|
||||
display_name="PixVerse Text to Video",
|
||||
category="video/partner/PixVerse",
|
||||
category="partner/video/PixVerse",
|
||||
description="Generates videos based on prompt and output_size.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -192,7 +192,7 @@ class PixverseImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="PixverseImageToVideoNode",
|
||||
display_name="PixVerse Image to Video",
|
||||
category="video/partner/PixVerse",
|
||||
category="partner/video/PixVerse",
|
||||
description="Generates videos based on prompt and output_size.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -310,7 +310,7 @@ class PixverseTransitionVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="PixverseTransitionVideoNode",
|
||||
display_name="PixVerse Transition Video",
|
||||
category="video/partner/PixVerse",
|
||||
category="partner/video/PixVerse",
|
||||
description="Generates videos based on prompt and output_size.",
|
||||
inputs=[
|
||||
IO.Image.Input("first_frame"),
|
||||
|
||||
@ -62,7 +62,7 @@ class QuiverTextToSVGNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="QuiverTextToSVGNode",
|
||||
display_name="Quiver Text to SVG",
|
||||
category="image/partner/Quiver",
|
||||
category="partner/image/Quiver",
|
||||
description="Generate an SVG from a text prompt using Quiver AI.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -177,7 +177,7 @@ class QuiverImageToSVGNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="QuiverImageToSVGNode",
|
||||
display_name="Quiver Image to SVG",
|
||||
category="image/partner/Quiver",
|
||||
category="partner/image/Quiver",
|
||||
description="Vectorize a raster image into SVG using Quiver AI.",
|
||||
inputs=[
|
||||
IO.Image.Input(
|
||||
|
||||
@ -178,7 +178,7 @@ class RecraftColorRGBNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftColorRGB",
|
||||
display_name="Recraft Color RGB",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Create Recraft Color by choosing specific RGB values.",
|
||||
inputs=[
|
||||
IO.Int.Input("r", default=0, min=0, max=255, tooltip="Red value of color."),
|
||||
@ -204,7 +204,7 @@ class RecraftControlsNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftControls",
|
||||
display_name="Recraft Controls",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Create Recraft Controls for customizing Recraft generation.",
|
||||
inputs=[
|
||||
IO.Custom(RecraftIO.COLOR).Input("colors", optional=True),
|
||||
@ -228,7 +228,7 @@ class RecraftStyleV3RealisticImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftStyleV3RealisticImage",
|
||||
display_name="Recraft Style - Realistic Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Select realistic_image style and optional substyle.",
|
||||
inputs=[
|
||||
IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
|
||||
@ -253,7 +253,7 @@ class RecraftStyleV3DigitalIllustrationNode(RecraftStyleV3RealisticImageNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftStyleV3DigitalIllustration",
|
||||
display_name="Recraft Style - Digital Illustration",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Select realistic_image style and optional substyle.",
|
||||
inputs=[
|
||||
IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
|
||||
@ -272,7 +272,7 @@ class RecraftStyleV3VectorIllustrationNode(RecraftStyleV3RealisticImageNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftStyleV3VectorIllustrationNode",
|
||||
display_name="Recraft Style - Realistic Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Select realistic_image style and optional substyle.",
|
||||
inputs=[
|
||||
IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE)),
|
||||
@ -291,7 +291,7 @@ class RecraftStyleV3LogoRasterNode(RecraftStyleV3RealisticImageNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftStyleV3LogoRaster",
|
||||
display_name="Recraft Style - Logo Raster",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Select realistic_image style and optional substyle.",
|
||||
inputs=[
|
||||
IO.Combo.Input("substyle", options=get_v3_substyles(cls.RECRAFT_STYLE, include_none=False)),
|
||||
@ -308,7 +308,7 @@ class RecraftStyleInfiniteStyleLibrary(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftStyleV3InfiniteStyleLibrary",
|
||||
display_name="Recraft Style - Infinite Style Library",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Choose style based on preexisting UUID from Recraft's Infinite Style Library.",
|
||||
inputs=[
|
||||
IO.String.Input("style_id", default="", tooltip="UUID of style from Infinite Style Library."),
|
||||
@ -331,7 +331,7 @@ class RecraftCreateStyleNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftCreateStyleNode",
|
||||
display_name="Recraft Create Style",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Create a custom style from reference images. "
|
||||
"Upload 1-5 images to use as style references. "
|
||||
"Total size of all images is limited to 5 MB.",
|
||||
@ -400,7 +400,7 @@ class RecraftTextToImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftTextToImageNode",
|
||||
display_name="Recraft Text to Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Generates images synchronously based on prompt and resolution.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, default="", tooltip="Prompt for the image generation."),
|
||||
@ -512,7 +512,7 @@ class RecraftImageToImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftImageToImageNode",
|
||||
display_name="Recraft Image to Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Modify image based on prompt and strength.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -630,7 +630,7 @@ class RecraftImageInpaintingNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftImageInpaintingNode",
|
||||
display_name="Recraft Image Inpainting",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Modify image based on prompt and mask.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -732,7 +732,7 @@ class RecraftTextToVectorNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftTextToVectorNode",
|
||||
display_name="Recraft Text to Vector",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Generates SVG synchronously based on prompt and resolution.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", default="", tooltip="Prompt for the image generation.", multiline=True),
|
||||
@ -832,7 +832,7 @@ class RecraftVectorizeImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftVectorizeImageNode",
|
||||
display_name="Recraft Vectorize Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
essentials_category="Image Tools",
|
||||
description="Generates SVG synchronously from an input image.",
|
||||
inputs=[
|
||||
@ -876,7 +876,7 @@ class RecraftReplaceBackgroundNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftReplaceBackgroundNode",
|
||||
display_name="Recraft Replace Background",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Replace background on image, based on provided prompt.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -963,7 +963,7 @@ class RecraftRemoveBackgroundNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftRemoveBackgroundNode",
|
||||
display_name="Recraft Remove Background",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
essentials_category="Image Tools",
|
||||
description="Remove background from image, and return processed image and mask.",
|
||||
inputs=[
|
||||
@ -1012,7 +1012,7 @@ class RecraftCrispUpscaleNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftCrispUpscaleNode",
|
||||
display_name="Recraft Crisp Upscale Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Upscale image synchronously.\n"
|
||||
"Enhances a given raster image using ‘crisp upscale’ tool, "
|
||||
"increasing image resolution, making the image sharper and cleaner.",
|
||||
@ -1058,7 +1058,7 @@ class RecraftCreativeUpscaleNode(RecraftCrispUpscaleNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftCreativeUpscaleNode",
|
||||
display_name="Recraft Creative Upscale Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Upscale image synchronously.\n"
|
||||
"Enhances a given raster image using ‘creative upscale’ tool, "
|
||||
"boosting resolution with a focus on refining small details and faces.",
|
||||
@ -1086,7 +1086,7 @@ class RecraftV4TextToImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftV4TextToImageNode",
|
||||
display_name="Recraft V4 Text to Image",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Generates images using Recraft V4 or V4 Pro models.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -1210,7 +1210,7 @@ class RecraftV4TextToVectorNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RecraftV4TextToVectorNode",
|
||||
display_name="Recraft V4 Text to Vector",
|
||||
category="image/partner/Recraft",
|
||||
category="partner/image/Recraft",
|
||||
description="Generates SVG using Recraft V4 or V4 Pro models.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -109,7 +109,7 @@ class ReveImageCreateNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ReveImageCreateNode",
|
||||
display_name="Reve Image Create",
|
||||
category="image/partner/Reve",
|
||||
category="partner/image/Reve",
|
||||
description="Generate images from text descriptions using Reve.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -200,7 +200,7 @@ class ReveImageEditNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ReveImageEditNode",
|
||||
display_name="Reve Image Edit",
|
||||
category="image/partner/Reve",
|
||||
category="partner/image/Reve",
|
||||
description="Edit images using natural language instructions with Reve.",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The image to edit."),
|
||||
@ -300,7 +300,7 @@ class ReveImageRemixNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ReveImageRemixNode",
|
||||
display_name="Reve Image Remix",
|
||||
category="image/partner/Reve",
|
||||
category="partner/image/Reve",
|
||||
description="Combine reference images with text prompts to create new images using Reve.",
|
||||
inputs=[
|
||||
IO.Autogrow.Input(
|
||||
|
||||
@ -230,7 +230,7 @@ class Rodin3D_Regular(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Regular",
|
||||
display_name="Rodin 3D Generate - Regular Generate",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("Images"),
|
||||
@ -289,7 +289,7 @@ class Rodin3D_Detail(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Detail",
|
||||
display_name="Rodin 3D Generate - Detail Generate",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("Images"),
|
||||
@ -348,7 +348,7 @@ class Rodin3D_Smooth(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Smooth",
|
||||
display_name="Rodin 3D Generate - Smooth Generate",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("Images"),
|
||||
@ -406,7 +406,7 @@ class Rodin3D_Sketch(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Sketch",
|
||||
display_name="Rodin 3D Generate - Sketch Generate",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("Images"),
|
||||
@ -468,7 +468,7 @@ class Rodin3D_Gen2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Gen2",
|
||||
display_name="Rodin 3D Generate - Gen-2 Generate",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("Images"),
|
||||
@ -941,7 +941,7 @@ class Rodin3D_Gen25_Image(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Gen25_Image",
|
||||
display_name="Rodin 3D Gen-2.5 - Image to 3D",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=(
|
||||
"Generate a 3D model from 1-5 reference images via Rodin Gen-2.5. "
|
||||
"Pick a mode (Fast / Regular / Extreme-High) to tune quality vs. cost."
|
||||
@ -1035,7 +1035,7 @@ class Rodin3D_Gen25_Text(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Rodin3D_Gen25_Text",
|
||||
display_name="Rodin 3D Gen-2.5 - Text to 3D",
|
||||
category="3d/partner/Rodin",
|
||||
category="partner/3d/Rodin",
|
||||
description=(
|
||||
"Generate a 3D model from a text prompt via Rodin Gen-2.5. "
|
||||
"Pick a mode (Fast / Regular / Extreme-High) to tune quality vs. cost."
|
||||
|
||||
@ -140,7 +140,7 @@ class RunwayImageToVideoNodeGen3a(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RunwayImageToVideoNodeGen3a",
|
||||
display_name="Runway Image to Video (Gen3a Turbo)",
|
||||
category="video/partner/Runway",
|
||||
category="partner/video/Runway",
|
||||
description="Generate a video from a single starting frame using Gen3a Turbo model. "
|
||||
"Before diving in, review these best practices to ensure that "
|
||||
"your input selections will set your generation up for success: "
|
||||
@ -234,7 +234,7 @@ class RunwayImageToVideoNodeGen4(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RunwayImageToVideoNodeGen4",
|
||||
display_name="Runway Image to Video (Gen4 Turbo)",
|
||||
category="video/partner/Runway",
|
||||
category="partner/video/Runway",
|
||||
description="Generate a video from a single starting frame using Gen4 Turbo model. "
|
||||
"Before diving in, review these best practices to ensure that "
|
||||
"your input selections will set your generation up for success: "
|
||||
@ -329,7 +329,7 @@ class RunwayFirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RunwayFirstLastFrameNode",
|
||||
display_name="Runway First-Last-Frame to Video",
|
||||
category="video/partner/Runway",
|
||||
category="partner/video/Runway",
|
||||
description="Upload first and last keyframes, draft a prompt, and generate a video. "
|
||||
"More complex transitions, such as cases where the Last frame is completely different "
|
||||
"from the First frame, may benefit from the longer 10s duration. "
|
||||
@ -440,7 +440,7 @@ class RunwayTextToImageNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="RunwayTextToImageNode",
|
||||
display_name="Runway Text to Image",
|
||||
category="image/partner/Runway",
|
||||
category="partner/image/Runway",
|
||||
description="Generate an image from a text prompt using Runway's Gen 4 model. "
|
||||
"You can also include reference image to guide the generation.",
|
||||
inputs=[
|
||||
|
||||
@ -34,7 +34,7 @@ class SoniloVideoToMusic(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="SoniloVideoToMusic",
|
||||
display_name="Sonilo Video to Music",
|
||||
category="audio/partner/Sonilo",
|
||||
category="partner/audio/Sonilo",
|
||||
description="Generate music from video content using Sonilo's AI model. "
|
||||
"Analyzes the video and creates matching music.",
|
||||
inputs=[
|
||||
@ -99,7 +99,7 @@ class SoniloTextToMusic(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="SoniloTextToMusic",
|
||||
display_name="Sonilo Text to Music",
|
||||
category="audio/partner/Sonilo",
|
||||
category="partner/audio/Sonilo",
|
||||
description="Generate music from a text prompt using Sonilo's AI model. "
|
||||
"Leave duration at 0 to let the model infer it from the prompt.",
|
||||
inputs=[
|
||||
|
||||
@ -34,7 +34,7 @@ class OpenAIVideoSora2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="OpenAIVideoSora2",
|
||||
display_name="OpenAI Sora - Video (DEPRECATED)",
|
||||
category="video/partner/Sora",
|
||||
category="partner/video/Sora",
|
||||
description=(
|
||||
"OpenAI video and audio generation.\n\n"
|
||||
"DEPRECATION NOTICE: OpenAI will stop serving the Sora v2 API in September 2026. "
|
||||
|
||||
@ -62,7 +62,7 @@ class StabilityStableImageUltraNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityStableImageUltraNode",
|
||||
display_name="Stability AI Stable Image Ultra",
|
||||
category="image/partner/Stability AI",
|
||||
category="partner/image/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -197,7 +197,7 @@ class StabilityStableImageSD_3_5Node(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityStableImageSD_3_5Node",
|
||||
display_name="Stability AI Stable Diffusion 3.5 Image",
|
||||
category="image/partner/Stability AI",
|
||||
category="partner/image/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -354,7 +354,7 @@ class StabilityUpscaleConservativeNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityUpscaleConservativeNode",
|
||||
display_name="Stability AI Upscale Conservative",
|
||||
category="image/partner/Stability AI",
|
||||
category="partner/image/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -457,7 +457,7 @@ class StabilityUpscaleCreativeNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityUpscaleCreativeNode",
|
||||
display_name="Stability AI Upscale Creative",
|
||||
category="image/partner/Stability AI",
|
||||
category="partner/image/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -578,7 +578,7 @@ class StabilityUpscaleFastNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityUpscaleFastNode",
|
||||
display_name="Stability AI Upscale Fast",
|
||||
category="image/partner/Stability AI",
|
||||
category="partner/image/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -630,7 +630,7 @@ class StabilityTextToAudio(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityTextToAudio",
|
||||
display_name="Stability AI Text To Audio",
|
||||
category="audio/partner/Stability AI",
|
||||
category="partner/audio/Stability AI",
|
||||
essentials_category="Audio",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
@ -708,7 +708,7 @@ class StabilityAudioToAudio(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityAudioToAudio",
|
||||
display_name="Stability AI Audio To Audio",
|
||||
category="audio/partner/Stability AI",
|
||||
category="partner/audio/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -802,7 +802,7 @@ class StabilityAudioInpaint(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="StabilityAudioInpaint",
|
||||
display_name="Stability AI Audio Inpaint",
|
||||
category="audio/partner/Stability AI",
|
||||
category="partner/audio/Stability AI",
|
||||
description=cleandoc(cls.__doc__ or ""),
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
|
||||
@ -52,7 +52,7 @@ class TopazImageEnhance(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TopazImageEnhance",
|
||||
display_name="Topaz Image Enhance",
|
||||
category="image/partner/Topaz",
|
||||
category="partner/image/Topaz",
|
||||
description="Industry-standard upscaling and image enhancement.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["Reimagine"]),
|
||||
@ -235,7 +235,7 @@ class TopazVideoEnhance(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TopazVideoEnhance",
|
||||
display_name="Topaz Video Enhance (Legacy)",
|
||||
category="video/partner/Topaz",
|
||||
category="partner/video/Topaz",
|
||||
description="Breathe new life into video with powerful upscaling and recovery technology.",
|
||||
inputs=[
|
||||
IO.Video.Input("video"),
|
||||
@ -475,7 +475,7 @@ class TopazVideoEnhanceV2(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TopazVideoEnhanceV2",
|
||||
display_name="Topaz Video Enhance",
|
||||
category="video/partner/Topaz",
|
||||
category="partner/video/Topaz",
|
||||
description="Breathe new life into video with powerful upscaling and recovery technology.",
|
||||
inputs=[
|
||||
IO.Video.Input("video"),
|
||||
|
||||
@ -83,7 +83,7 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoTextToModelNode",
|
||||
display_name="Tripo: Text to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True),
|
||||
IO.String.Input("negative_prompt", multiline=True, optional=True),
|
||||
@ -210,7 +210,7 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoImageToModelNode",
|
||||
display_name="Tripo: Image to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
IO.Combo.Input(
|
||||
@ -358,7 +358,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoMultiviewToModelNode",
|
||||
display_name="Tripo: Multiview to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
IO.Image.Input("image_left", optional=True),
|
||||
@ -518,7 +518,7 @@ class TripoTextureNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoTextureNode",
|
||||
display_name="Tripo: Texture model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.Custom("MODEL_TASK_ID").Input("model_task_id"),
|
||||
IO.Boolean.Input("texture", default=True, optional=True),
|
||||
@ -595,7 +595,7 @@ class TripoRefineNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoRefineNode",
|
||||
display_name="Tripo: Refine Draft model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
description="Refine a draft model created by v1.4 Tripo models only.",
|
||||
inputs=[
|
||||
IO.Custom("MODEL_TASK_ID").Input("model_task_id", tooltip="Must be a v1.4 Tripo model"),
|
||||
@ -635,7 +635,7 @@ class TripoRigNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoRigNode",
|
||||
display_name="Tripo: Rig model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[IO.Custom("MODEL_TASK_ID").Input("original_model_task_id")],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"), # for backward compatibility only
|
||||
@ -672,7 +672,7 @@ class TripoRetargetNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoRetargetNode",
|
||||
display_name="Tripo: Retarget rigged model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.Custom("RIG_TASK_ID").Input("original_model_task_id"),
|
||||
IO.Combo.Input(
|
||||
@ -737,7 +737,7 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoConversionNode",
|
||||
display_name="Tripo: Convert model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
inputs=[
|
||||
IO.Custom("MODEL_TASK_ID,RIG_TASK_ID,RETARGET_TASK_ID").Input("original_model_task_id"),
|
||||
IO.Combo.Input("format", options=["GLTF", "USDZ", "FBX", "OBJ", "STL", "3MF"]),
|
||||
@ -1051,7 +1051,7 @@ class TripoP1TextToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoP1TextToModelNode",
|
||||
display_name="Tripo P1: Text to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
description="Tripo P1 text-to-3D. Optimized for low-poly, game-ready meshes with stable topology.",
|
||||
inputs=[
|
||||
IO.String.Input("prompt", multiline=True, tooltip="Up to 1024 characters."),
|
||||
@ -1122,7 +1122,7 @@ class TripoP1ImageToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoP1ImageToModelNode",
|
||||
display_name="Tripo P1: Image to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
description="Tripo P1 image-to-3D. Optimized for low-poly, game-ready meshes.",
|
||||
inputs=[
|
||||
IO.Image.Input("image"),
|
||||
@ -1202,7 +1202,7 @@ class TripoP1MultiviewToModelNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="TripoP1MultiviewToModelNode",
|
||||
display_name="Tripo P1: Multiview to Model",
|
||||
category="3d/partner/Tripo",
|
||||
category="partner/3d/Tripo",
|
||||
description="Tripo P1 multiview-to-3D from 2-4 reference images in [front, left, back, right] order. "
|
||||
"Front is required; any combination of the other three may be omitted.",
|
||||
inputs=[
|
||||
|
||||
@ -45,7 +45,7 @@ class VeoVideoGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="VeoVideoGenerationNode",
|
||||
display_name="Google Veo 2 Video Generation",
|
||||
category="video/partner/Veo",
|
||||
category="partner/video/Veo",
|
||||
description="Generates videos from text prompts using Google's Veo 2 API",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -256,7 +256,7 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Veo3VideoGenerationNode",
|
||||
display_name="Google Veo 3 Video Generation",
|
||||
category="video/partner/Veo",
|
||||
category="partner/video/Veo",
|
||||
description="Generates videos from text prompts using Google's Veo 3 API",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
@ -468,7 +468,7 @@ class Veo3FirstLastFrameNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Veo3FirstLastFrameNode",
|
||||
display_name="Google Veo 3 First-Last-Frame to Video",
|
||||
category="video/partner/Veo",
|
||||
category="partner/video/Veo",
|
||||
description="Generate video using prompt and first and last frames.",
|
||||
inputs=[
|
||||
IO.String.Input(
|
||||
|
||||
@ -71,7 +71,7 @@ class ViduTextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduTextToVideoNode",
|
||||
display_name="Vidu Text To Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate video from a text prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
|
||||
@ -169,7 +169,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduImageToVideoNode",
|
||||
display_name="Vidu Image To Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate video from image and optional prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
|
||||
@ -273,7 +273,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduReferenceVideoNode",
|
||||
display_name="Vidu Reference To Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate video from multiple images and a prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
|
||||
@ -388,7 +388,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduStartEndToVideoNode",
|
||||
display_name="Vidu Start End To Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from start and end frames and a prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
|
||||
@ -492,7 +492,7 @@ class Vidu2TextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu2TextToVideoNode",
|
||||
display_name="Vidu2 Text-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate video from a text prompt",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq2"]),
|
||||
@ -584,7 +584,7 @@ class Vidu2ImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu2ImageToVideoNode",
|
||||
display_name="Vidu2 Image-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from an image and an optional prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
|
||||
@ -714,7 +714,7 @@ class Vidu2ReferenceVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu2ReferenceVideoNode",
|
||||
display_name="Vidu2 Reference-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from multiple reference images and a prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq2"]),
|
||||
@ -849,7 +849,7 @@ class Vidu2StartEndToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu2StartEndToVideoNode",
|
||||
display_name="Vidu2 Start/End Frame-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from a start frame, an end frame, and a prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
|
||||
@ -969,7 +969,7 @@ class ViduExtendVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduExtendVideoNode",
|
||||
display_name="Vidu Video Extension",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Extend an existing video by generating additional frames.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1138,7 +1138,7 @@ class ViduMultiFrameVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="ViduMultiFrameVideoNode",
|
||||
display_name="Vidu Multi-Frame Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video with multiple keyframe transitions.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["viduq2-pro", "viduq2-turbo"]),
|
||||
@ -1284,7 +1284,7 @@ class Vidu3TextToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu3TextToVideoNode",
|
||||
display_name="Vidu Q3 Text-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate video from a text prompt.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1429,7 +1429,7 @@ class Vidu3ImageToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu3ImageToVideoNode",
|
||||
display_name="Vidu Q3 Image-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from an image and an optional prompt.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1571,7 +1571,7 @@ class Vidu3StartEndToVideoNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Vidu3StartEndToVideoNode",
|
||||
display_name="Vidu Q3 Start/End Frame-to-Video Generation",
|
||||
category="video/partner/Vidu",
|
||||
category="partner/video/Vidu",
|
||||
description="Generate a video from a start frame, an end frame, and a prompt.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
|
||||
@ -61,7 +61,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WanTextToImageApi",
|
||||
display_name="Wan Text to Image",
|
||||
category="image/partner/Wan",
|
||||
category="partner/image/Wan",
|
||||
description="Generates an image based on a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -184,7 +184,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WanImageToImageApi",
|
||||
display_name="Wan Image to Image",
|
||||
category="image/partner/Wan",
|
||||
category="partner/image/Wan",
|
||||
description="Generates an image from one or two input images and a text prompt. "
|
||||
"The output image is currently fixed at 1.6 MP, and its aspect ratio matches the input image(s).",
|
||||
inputs=[
|
||||
@ -312,7 +312,7 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WanTextToVideoApi",
|
||||
display_name="Wan Text to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generates a video based on a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -495,7 +495,7 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WanImageToVideoApi",
|
||||
display_name="Wan Image to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generates a video from the first frame and a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
@ -674,7 +674,7 @@ class WanReferenceVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WanReferenceVideoApi",
|
||||
display_name="Wan Reference to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Use the character and voice from input videos, combined with a prompt, "
|
||||
"to generate a new video that maintains character consistency.",
|
||||
inputs=[
|
||||
@ -828,7 +828,7 @@ class Wan2TextToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Wan2TextToVideoApi",
|
||||
display_name="Wan 2.7 Text to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generates a video based on a text prompt using the Wan 2.7 model.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -981,7 +981,7 @@ class Wan2ImageToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Wan2ImageToVideoApi",
|
||||
display_name="Wan 2.7 Image to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generate a video from a first-frame image, with optional last-frame image and audio.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1152,7 +1152,7 @@ class Wan2VideoContinuationApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Wan2VideoContinuationApi",
|
||||
display_name="Wan 2.7 Video Continuation",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Continue a video from where it left off, with optional last-frame control.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1319,7 +1319,7 @@ class Wan2VideoEditApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Wan2VideoEditApi",
|
||||
display_name="Wan 2.7 Video Edit",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Edit a video using text instructions, reference images, or style transfer.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1477,7 +1477,7 @@ class Wan2ReferenceVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="Wan2ReferenceVideoApi",
|
||||
display_name="Wan 2.7 Reference to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generate a video featuring a person or object from reference materials. "
|
||||
"Supports single-character performances and multi-character interactions.",
|
||||
inputs=[
|
||||
@ -1651,7 +1651,7 @@ class HappyHorseTextToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HappyHorseTextToVideoApi",
|
||||
display_name="HappyHorse Text to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generates a video based on a text prompt using the HappyHorse model.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1775,7 +1775,7 @@ class HappyHorseImageToVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HappyHorseImageToVideoApi",
|
||||
display_name="HappyHorse Image to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generate a video from a first-frame image using the HappyHorse model.",
|
||||
inputs=[
|
||||
IO.DynamicCombo.Input(
|
||||
@ -1905,7 +1905,7 @@ class HappyHorseVideoEditApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HappyHorseVideoEditApi",
|
||||
display_name="HappyHorse Video Edit",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Edit a video using text instructions or reference images with the HappyHorse model. "
|
||||
"Output duration is 3-15s and matches the input video; inputs longer than 15s are truncated.",
|
||||
inputs=[
|
||||
@ -2046,7 +2046,7 @@ class HappyHorseReferenceVideoApi(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="HappyHorseReferenceVideoApi",
|
||||
display_name="HappyHorse Reference to Video",
|
||||
category="video/partner/Wan",
|
||||
category="partner/video/Wan",
|
||||
description="Generate a video featuring a person or object from reference materials with the HappyHorse "
|
||||
"model. Supports single-character performances and multi-character interactions.",
|
||||
inputs=[
|
||||
|
||||
@ -27,7 +27,7 @@ class WavespeedFlashVSRNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WavespeedFlashVSRNode",
|
||||
display_name="FlashVSR Video Upscale",
|
||||
category="video/partner/WaveSpeed",
|
||||
category="partner/video/WaveSpeed",
|
||||
description="Fast, high-quality video upscaler that "
|
||||
"boosts resolution and restores clarity for low-resolution or blurry footage.",
|
||||
inputs=[
|
||||
@ -98,7 +98,7 @@ class WavespeedImageUpscaleNode(IO.ComfyNode):
|
||||
return IO.Schema(
|
||||
node_id="WavespeedImageUpscaleNode",
|
||||
display_name="WaveSpeed Image Upscale",
|
||||
category="image/partner/WaveSpeed",
|
||||
category="partner/image/WaveSpeed",
|
||||
description="Boost image resolution and quality, upscaling photos to 4K or 8K for sharp, detailed results.",
|
||||
inputs=[
|
||||
IO.Combo.Input("model", options=["SeedVR2", "Ultimate"]),
|
||||
|
||||
@ -65,6 +65,12 @@ class ChromaRadianceOptions(io.ComfyNode):
|
||||
tooltip="Allows overriding the default NeRF tile size. -1 means use the default (32). 0 means use non-tiling mode (may require a lot of VRAM).",
|
||||
advanced=True,
|
||||
),
|
||||
io.Boolean.Input(
|
||||
id="force_sequential_txt_ids",
|
||||
default=False,
|
||||
tooltip="Force usage of sequential text token IDs instead of zeroes. Should be used for checkpoints from 2026-05-22 to 2026-06-01 that are trained in this way but do not contain the __sequential__ key in the state dict.",
|
||||
advanced=True,
|
||||
),
|
||||
],
|
||||
outputs=[io.Model.Output()],
|
||||
)
|
||||
@ -78,11 +84,15 @@ class ChromaRadianceOptions(io.ComfyNode):
|
||||
start_sigma: float,
|
||||
end_sigma: float,
|
||||
nerf_tile_size: int,
|
||||
force_sequential_txt_ids: bool,
|
||||
) -> io.NodeOutput:
|
||||
radiance_options = {}
|
||||
if nerf_tile_size >= 0:
|
||||
radiance_options["nerf_tile_size"] = nerf_tile_size
|
||||
|
||||
if force_sequential_txt_ids:
|
||||
radiance_options["use_sequential_txt_ids"] = True
|
||||
|
||||
if not radiance_options:
|
||||
return io.NodeOutput(model)
|
||||
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import math
|
||||
import comfy.samplers
|
||||
import comfy.sampler_helpers
|
||||
import comfy.patcher_extension
|
||||
import comfy.sample
|
||||
from comfy.k_diffusion import sampling as k_diffusion_sampling
|
||||
from comfy.k_diffusion import sa_solver
|
||||
@ -894,6 +896,84 @@ class DualCFGGuider(io.ComfyNode):
|
||||
|
||||
get_guider = execute
|
||||
|
||||
class Guider_DualModel(comfy.samplers.CFGGuider):
|
||||
# Runs the positive (cond) pass on the main model and the negative (uncond) pass on a separate model
|
||||
def __init__(self, model_patcher, uncond_model_patcher):
|
||||
super().__init__(model_patcher)
|
||||
self.uncond_model_patcher = uncond_model_patcher
|
||||
self.uncond_inner = None
|
||||
|
||||
def outer_sample(self, noise, latent_image, sampler, sigmas, denoise_mask=None, callback=None, disable_pbar=False, seed=None, latent_shapes=None):
|
||||
self.uncond_inner = None
|
||||
self.uncond_loaded = []
|
||||
self._uncond_neg = None
|
||||
# skip at cfg 1.0
|
||||
if not math.isclose(self.cfg, 1.0):
|
||||
uc = {"negative": list(map(lambda a: a.copy(), self.conds["negative"]))}
|
||||
self.uncond_inner, uc, self.uncond_loaded = comfy.sampler_helpers.prepare_sampling(
|
||||
self.uncond_model_patcher, noise.shape, uc, self.uncond_model_patcher.model_options)
|
||||
self._uncond_neg = uc["negative"]
|
||||
self.uncond_model_patcher.pre_run()
|
||||
try:
|
||||
return super().outer_sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
|
||||
finally:
|
||||
if self.uncond_inner is not None:
|
||||
self.uncond_model_patcher.cleanup()
|
||||
comfy.sampler_helpers.cleanup_models({"negative": self._uncond_neg}, self.uncond_loaded)
|
||||
self.uncond_inner = None
|
||||
|
||||
def inner_sample(self, noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=None):
|
||||
if self.uncond_inner is not None:
|
||||
li = latent_image
|
||||
if li is not None and torch.count_nonzero(li) > 0:
|
||||
li = self.uncond_inner.process_latent_in(li)
|
||||
self._uncond_conds = comfy.samplers.process_conds(
|
||||
self.uncond_inner, noise, {"negative": self._uncond_neg}, device, li, denoise_mask, seed, latent_shapes=latent_shapes)["negative"]
|
||||
return super().inner_sample(noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
|
||||
|
||||
def predict_noise(self, x, timestep, model_options={}, seed=None):
|
||||
positive = self.conds.get("positive", None)
|
||||
if self.uncond_inner is None: # cfg == 1 or no negative -> single model, cond only
|
||||
return comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0]
|
||||
cond = comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0]
|
||||
|
||||
uncond_model_options = model_options
|
||||
if "multigpu_clones" in model_options: # TODO: support multigpu instead of just running uncond on a single GPU
|
||||
uncond_model_options = {k: v for k, v in model_options.items() if k != "multigpu_clones"}
|
||||
uncond = comfy.samplers.calc_cond_batch(self.uncond_inner, [self._uncond_conds], x, timestep, uncond_model_options)[0]
|
||||
return comfy.samplers.cfg_function(self.inner_model, cond, uncond, self.cfg, x, timestep,
|
||||
model_options=model_options, cond=positive, uncond=self._uncond_conds)
|
||||
|
||||
class DualModelGuider(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="DualModelGuider",
|
||||
display_name="Dual Model CFG Guider",
|
||||
category="model/sampling/guiders",
|
||||
is_experimental=True,
|
||||
inputs=[
|
||||
io.Model.Input("model", tooltip="Model used for the positive (conditional) pass."),
|
||||
io.Model.Input("model_negative", optional=True, tooltip="Model used for the negative (unconditional) pass. Use the same model for ordinary CFG."),
|
||||
io.Conditioning.Input("positive"),
|
||||
io.Float.Input("cfg", default=4.0, min=0.0, max=100.0, step=0.1, round=0.01),
|
||||
io.Conditioning.Input("negative", optional=True, tooltip="Negative conditioning run on the negative model. Leave unconnected for a text-free (image-only) unconditional pass."),
|
||||
],
|
||||
outputs=[io.Guider.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, model, positive, cfg, model_negative=None, negative=None) -> io.NodeOutput:
|
||||
if negative is None:
|
||||
negative = [[None, {}]] # null cond -> no cross_attn -> model runs image-only
|
||||
|
||||
guider = Guider_DualModel(model, model_negative) if model_negative is not None else comfy.samplers.CFGGuider(model)
|
||||
guider.set_conds(positive, negative)
|
||||
guider.set_cfg(cfg)
|
||||
return io.NodeOutput(guider)
|
||||
|
||||
get_guider = execute
|
||||
|
||||
class DisableNoise(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
@ -1054,11 +1134,53 @@ class ManualSigmas(io.ComfyNode):
|
||||
sigmas = torch.FloatTensor(sigmas)
|
||||
return io.NodeOutput(sigmas)
|
||||
|
||||
class CFGOverride(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls) -> io.Schema:
|
||||
return io.Schema(
|
||||
node_id="CFGOverride",
|
||||
display_name="CFG Override",
|
||||
description="Override cfg to a fixed value over a [start, end] percent slice of the steps. "
|
||||
"With multiple overrides, the one nearest the sampler wins on overlap.",
|
||||
category="sampling/custom_sampling",
|
||||
inputs=[
|
||||
io.Model.Input("model"),
|
||||
io.Float.Input("cfg", default=1.0, min=0.0, max=100.0, step=0.1, round=0.01),
|
||||
io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001),
|
||||
io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001),
|
||||
],
|
||||
outputs=[io.Model.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, model, cfg, start_percent, end_percent) -> io.NodeOutput:
|
||||
ms = model.get_model_object("model_sampling")
|
||||
sigma_hi = ms.percent_to_sigma(start_percent) # percent->sigma decreasing, so hi >= lo
|
||||
sigma_lo = ms.percent_to_sigma(end_percent)
|
||||
|
||||
def predict_noise_wrapper(executor, *args, **kwargs):
|
||||
sigma = float(args[1].flatten()[0]) # args = (x, timestep, model_options, seed)
|
||||
if not (sigma_lo <= sigma <= sigma_hi):
|
||||
return executor(*args, **kwargs)
|
||||
guider = executor.class_obj # guider.cfg feeds cond_scale
|
||||
saved = guider.cfg
|
||||
guider.cfg = cfg
|
||||
try:
|
||||
return executor(*args, **kwargs)
|
||||
finally:
|
||||
guider.cfg = saved # restore for other steps/overrides
|
||||
|
||||
m = model.clone()
|
||||
m.add_wrapper(comfy.patcher_extension.WrappersMP.PREDICT_NOISE, predict_noise_wrapper)
|
||||
return io.NodeOutput(m)
|
||||
|
||||
|
||||
class CustomSamplersExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [
|
||||
SamplerCustom,
|
||||
CFGOverride,
|
||||
BasicScheduler,
|
||||
KarrasScheduler,
|
||||
ExponentialScheduler,
|
||||
@ -1087,6 +1209,7 @@ class CustomSamplersExtension(ComfyExtension):
|
||||
SamplingPercentToSigma,
|
||||
CFGGuider,
|
||||
DualCFGGuider,
|
||||
DualModelGuider,
|
||||
BasicGuider,
|
||||
RandomNoise,
|
||||
DisableNoise,
|
||||
|
||||
@ -411,6 +411,21 @@ class ImageProcessingNode(io.ComfyNode):
|
||||
|
||||
return has_group
|
||||
|
||||
@classmethod
|
||||
def _ensure_image_list(cls, images):
|
||||
"""Normalize to a flat list of [1, H, W, C] tensors."""
|
||||
if isinstance(images, torch.Tensor):
|
||||
if images.ndim != 4:
|
||||
raise ValueError(f"Expected 4D image tensor, got shape {tuple(images.shape)}")
|
||||
return [images[i:i+1] for i in range(images.shape[0])]
|
||||
|
||||
flat = []
|
||||
for item in images:
|
||||
if not isinstance(item, torch.Tensor) or item.ndim != 4:
|
||||
raise ValueError(f"Expected 4D image tensor, got {type(item).__name__} shape {getattr(item, 'shape', None)}")
|
||||
flat.extend([item[i:i+1] for i in range(item.shape[0])])
|
||||
return flat
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
if cls.node_id is None:
|
||||
@ -458,6 +473,9 @@ class ImageProcessingNode(io.ComfyNode):
|
||||
"""Execute the node. Routes to _process or _group_process based on mode."""
|
||||
is_group = cls._detect_processing_mode()
|
||||
|
||||
if is_group:
|
||||
images = cls._ensure_image_list(images)
|
||||
|
||||
# Extract scalar values from lists for parameters
|
||||
params = {}
|
||||
for k, v in kwargs.items():
|
||||
|
||||
64
comfy_extras/nodes_ideogram4.py
Normal file
64
comfy_extras/nodes_ideogram4.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""Ideogram 4 sampling helper
|
||||
"""
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
from typing_extensions import override
|
||||
from comfy_api.latest import ComfyExtension, io
|
||||
|
||||
_LOGSNR_MIN = -15.0
|
||||
_LOGSNR_MAX = 18.0
|
||||
|
||||
|
||||
def _logit_normal_schedule(u, mean, std):
|
||||
# Reference time (0=noise..1=clean) via the probit/ndtri quantile.
|
||||
u = torch.as_tensor(u, dtype=torch.float64)
|
||||
t = 1.0 - torch.special.expit(mean + std * torch.special.ndtri(u))
|
||||
t_min = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MAX))
|
||||
t_max = 1.0 / (1.0 + math.exp(0.5 * _LOGSNR_MIN))
|
||||
return t.clamp(t_min, t_max)
|
||||
|
||||
|
||||
def ideogram4_sigmas(num_steps, width, height, mu, std):
|
||||
"""Descending sigmas (len num_steps+1) for the reference schedule.
|
||||
|
||||
mu + the resolution term form the logSNR shift; std is the spread.
|
||||
"""
|
||||
mean = mu + 0.5 * math.log((width * height) / (512 * 512))
|
||||
u = torch.linspace(0.0, 1.0, num_steps + 1, dtype=torch.float64)
|
||||
sigmas = (1.0 - _logit_normal_schedule(u, mean, std)).flip(0)
|
||||
sigmas[-1] = 0.0 # clamp leaves ~6e-4; force full denoise
|
||||
return sigmas.to(torch.float32)
|
||||
|
||||
|
||||
class Ideogram4Scheduler(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls) -> io.Schema:
|
||||
return io.Schema(
|
||||
node_id="Ideogram4Scheduler",
|
||||
display_name="Ideogram 4 Scheduler",
|
||||
category="sampling/custom_sampling/schedulers",
|
||||
inputs=[
|
||||
io.Int.Input("steps", default=20, min=1, max=200),
|
||||
io.Int.Input("width", default=1024, min=256, max=8192, step=16),
|
||||
io.Int.Input("height", default=1024, min=256, max=8192, step=16),
|
||||
io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.05),
|
||||
io.Float.Input("std", default=1.75, min=0.1, max=5.0, step=0.05),
|
||||
],
|
||||
outputs=[io.Sigmas.Output()],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, steps, width, height, mu, std) -> io.NodeOutput:
|
||||
return io.NodeOutput(ideogram4_sigmas(steps, width, height, mu, std))
|
||||
|
||||
|
||||
class Ideogram4Extension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[io.ComfyNode]]:
|
||||
return [Ideogram4Scheduler]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> Ideogram4Extension:
|
||||
return Ideogram4Extension()
|
||||
@ -21,8 +21,8 @@ class PiDConditioning(io.ComfyNode):
|
||||
inputs=[
|
||||
io.Conditioning.Input("positive"),
|
||||
io.Latent.Input("latent", tooltip="latent (from VAEEncode or a KSampler)."),
|
||||
io.Combo.Input("latent_format", options=["flux", "sd3"], default="flux",
|
||||
tooltip="Flux1 and Flux2 latents auto-detected from channel dim, sd3 has to be selected manually."),
|
||||
io.Combo.Input("latent_format", options=["flux", "sd3", "sdxl", "qwenimage"], default="flux",
|
||||
tooltip="Flux1 (16-ch) and Flux2 (128-ch) latents are auto-detected from channel dim under 'flux'. For SD3 (16-ch), SDXL (4-ch), or QwenImage (16-ch), select manually."),
|
||||
io.Float.Input(
|
||||
"degrade_sigma", default=0.0, min=0.0, max=1.0, step=0.01,
|
||||
tooltip="0 = clean latent. Increase to denoise corrupted latent outputs.",
|
||||
@ -36,9 +36,17 @@ class PiDConditioning(io.ComfyNode):
|
||||
samples = latent["samples"]
|
||||
if latent_format == "flux":
|
||||
fmt_cls = comfy.latent_formats.Flux2 if samples.shape[1] == 128 else comfy.latent_formats.Flux
|
||||
else:
|
||||
elif latent_format == "sd3":
|
||||
fmt_cls = comfy.latent_formats.SD3
|
||||
elif latent_format == "sdxl":
|
||||
fmt_cls = comfy.latent_formats.SDXL
|
||||
elif latent_format == "qwenimage":
|
||||
fmt_cls = comfy.latent_formats.Wan21
|
||||
else:
|
||||
raise ValueError(f"Unknown latent_format: {latent_format}")
|
||||
lq_latent = fmt_cls().process_in(samples)
|
||||
if lq_latent.ndim == 5:
|
||||
lq_latent = lq_latent[:, :, 0]
|
||||
sigma_t = torch.tensor([float(degrade_sigma)], dtype=torch.float32)
|
||||
return io.NodeOutput(node_helpers.conditioning_set_values(
|
||||
positive, {"lq_latent": lq_latent, "degrade_sigma": sigma_t},
|
||||
|
||||
@ -19,7 +19,7 @@ class SaveWEBM(io.ComfyNode):
|
||||
category="video",
|
||||
is_experimental=True,
|
||||
inputs=[
|
||||
io.Image.Input("images"),
|
||||
io.Image.Input("images", tooltip="RGBA images are saved with their alpha channel as transparency (vp9 codec only)."),
|
||||
io.String.Input("filename_prefix", default="ComfyUI"),
|
||||
io.Combo.Input("codec", options=["vp9", "av1"]),
|
||||
io.Float.Input("fps", default=24.0, min=0.01, max=1000.0, step=0.01),
|
||||
@ -45,18 +45,25 @@ class SaveWEBM(io.ComfyNode):
|
||||
for x in cls.hidden.extra_pnginfo:
|
||||
container.metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x])
|
||||
|
||||
# Save transparency when the images carry an alpha channel (RGBA) and the codec supports it.
|
||||
# vp9 -> yuva420p; other codecs have no usable alpha path, so the alpha is ignored.
|
||||
save_alpha = images.shape[-1] == 4 and codec == "vp9"
|
||||
|
||||
codec_map = {"vp9": "libvpx-vp9", "av1": "libsvtav1"}
|
||||
stream = container.add_stream(codec_map[codec], rate=Fraction(round(fps * 1000), 1000))
|
||||
stream.width = images.shape[-2]
|
||||
stream.height = images.shape[-3]
|
||||
stream.pix_fmt = "yuv420p10le" if codec == "av1" else "yuv420p"
|
||||
stream.pix_fmt = "yuva420p" if save_alpha else ("yuv420p10le" if codec == "av1" else "yuv420p")
|
||||
stream.bit_rate = 0
|
||||
stream.options = {'crf': str(crf)}
|
||||
if codec == "av1":
|
||||
stream.options["preset"] = "6"
|
||||
|
||||
for frame in images:
|
||||
frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
|
||||
if save_alpha:
|
||||
frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :4] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgba")
|
||||
else:
|
||||
frame = av.VideoFrame.from_ndarray(torch.clamp(frame[..., :3] * 255, min=0, max=255).to(device=torch.device("cpu"), dtype=torch.uint8).numpy(), format="rgb24")
|
||||
for packet in stream.encode(frame):
|
||||
container.mux(packet)
|
||||
container.mux(stream.encode())
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
# This file is automatically generated by the build process when version is
|
||||
# updated in pyproject.toml.
|
||||
__version__ = "0.23.0"
|
||||
__version__ = "0.24.0"
|
||||
|
||||
3
nodes.py
3
nodes.py
@ -970,7 +970,7 @@ class CLIPLoader:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
|
||||
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit"], ),
|
||||
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image", "cogvideox", "lens", "pixeldit", "ideogram4"], ),
|
||||
},
|
||||
"optional": {
|
||||
"device": (["default", "cpu"], {"advanced": True}),
|
||||
@ -2364,6 +2364,7 @@ async def init_builtin_extra_nodes():
|
||||
"nodes_model_downscale.py",
|
||||
"nodes_images.py",
|
||||
"nodes_video_model.py",
|
||||
"nodes_ideogram4.py",
|
||||
"nodes_train.py",
|
||||
"nodes_dataset.py",
|
||||
"nodes_sag.py",
|
||||
|
||||
16661
openapi.yaml
16661
openapi.yaml
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "ComfyUI"
|
||||
version = "0.23.0"
|
||||
version = "0.24.0"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
comfyui-frontend-package==1.44.19
|
||||
comfyui-workflow-templates==0.9.92
|
||||
comfyui-workflow-templates==0.9.94
|
||||
comfyui-embedded-docs==0.5.2
|
||||
torch
|
||||
torchsde
|
||||
@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0
|
||||
filelock
|
||||
av>=16.0.0
|
||||
comfy-kitchen==0.2.10
|
||||
comfy-aimdo==0.4.7
|
||||
comfy-aimdo==0.4.8
|
||||
requests
|
||||
simpleeval>=1.0.0
|
||||
blake3
|
||||
|
||||
Loading…
Reference in New Issue
Block a user