mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-16 01:00:49 +08:00
Merge branch 'master' of github.com:comfyanonymous/ComfyUI
This commit is contained in:
commit
b183b27a87
@ -1,6 +1,6 @@
|
||||
# This file is automatically generated by the build process when version is
|
||||
# updated in pyproject.toml.
|
||||
__version__ = "0.4.0"
|
||||
__version__ = "0.5.0"
|
||||
|
||||
# This deals with workspace issues
|
||||
from comfy_compatibility.workspace import auto_patch_workspace_and_restart
|
||||
|
||||
@ -17,6 +17,13 @@ class LatentPreviewMethod(enum.Enum):
|
||||
Latent2RGB = "latent2rgb"
|
||||
TAESD = "taesd"
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, value: str):
|
||||
for member in cls:
|
||||
if member.value == value:
|
||||
return member
|
||||
return None
|
||||
|
||||
|
||||
ConfigObserver = Callable[[str, Any], None]
|
||||
|
||||
|
||||
@ -1,27 +1,28 @@
|
||||
from __future__ import annotations
|
||||
from .main_pre import tracer
|
||||
|
||||
from typing_extensions import NotRequired, TypedDict, NamedTuple
|
||||
import asyncio
|
||||
import copy
|
||||
import heapq
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import traceback
|
||||
import typing
|
||||
from contextlib import nullcontext
|
||||
from enum import Enum
|
||||
from os import PathLike
|
||||
from typing import List, Optional, Tuple, Literal
|
||||
from typing import List, Optional, Literal
|
||||
|
||||
import sys
|
||||
import time
|
||||
import torch
|
||||
from opentelemetry.trace import get_current_span, StatusCode, Status
|
||||
from typing_extensions import NotRequired, TypedDict, NamedTuple
|
||||
|
||||
from comfy_api.internal import _ComfyNodeInternal, _NodeOutputInternal, first_real_override, is_class, make_locked_method_func
|
||||
from comfy_api.internal import _ComfyNodeInternal, _NodeOutputInternal, first_real_override, is_class, \
|
||||
make_locked_method_func
|
||||
from comfy_api.latest import io, _io
|
||||
from comfy_compatibility.vanilla import vanilla_environment_node_execution_hooks
|
||||
from comfy_execution.caching import (
|
||||
@ -46,12 +47,14 @@ from comfy_execution.progress import get_progress_state, reset_progress_state, a
|
||||
ProgressRegistry
|
||||
from comfy_execution.utils import CurrentNodeContext
|
||||
from comfy_execution.validation import validate_node_input
|
||||
from .latent_preview import set_preview_method
|
||||
from .. import interruption
|
||||
from .. import model_management
|
||||
from ..component_model.abstract_prompt_queue import AbstractPromptQueue
|
||||
from ..component_model.executor_types import ExecutorToClientProgress, ValidationTuple, ValidateInputsTuple, \
|
||||
ValidationErrorDict, NodeErrorsDictValue, ValidationErrorExtraInfoDict, FormattedValue, RecursiveExecutionTuple, \
|
||||
RecursiveExecutionErrorDetails, RecursiveExecutionErrorDetailsInterrupted, ExecutionResult, HistoryResultDict, ExecutionErrorMessage, ExecutionInterruptedMessage, ComboOptions
|
||||
RecursiveExecutionErrorDetails, RecursiveExecutionErrorDetailsInterrupted, ExecutionResult, HistoryResultDict, \
|
||||
ExecutionErrorMessage, ExecutionInterruptedMessage, ComboOptions
|
||||
from ..component_model.files import canonicalize_path
|
||||
from ..component_model.module_property import create_module_properties
|
||||
from ..component_model.queue_types import QueueTuple, HistoryEntry, QueueItem, MAXIMUM_HISTORY_SIZE, ExecutionStatus, \
|
||||
@ -824,6 +827,9 @@ class PromptExecutor:
|
||||
if extra_data is None:
|
||||
extra_data = {}
|
||||
|
||||
extra_data_preview_method = extra_data.get("preview_method", None)
|
||||
if extra_data_preview_method is not None:
|
||||
set_preview_method(extra_data_preview_method)
|
||||
interruption.interrupt_current_processing(False)
|
||||
|
||||
if "client_id" in extra_data:
|
||||
|
||||
@ -18,6 +18,8 @@ from ..taesd.taesd import TAESD
|
||||
from ..sd import VAE
|
||||
from ..utils import load_torch_file
|
||||
|
||||
default_preview_method = args.preview_method
|
||||
|
||||
MAX_PREVIEW_RESOLUTION = args.preview_size
|
||||
VIDEO_TAES = ["taehv", "lighttaew2_2", "lighttaew2_1", "lighttaehy1_5"]
|
||||
|
||||
@ -145,3 +147,17 @@ def prepare_callback(model, steps, x0_output_dict=None):
|
||||
pbar.update_absolute(step + 1, total_steps, preview_bytes)
|
||||
|
||||
return callback
|
||||
|
||||
def set_preview_method(override: str = None):
|
||||
# todo: this should set a context var where it is called, which is exactly one place
|
||||
return
|
||||
|
||||
# if override and override != "default":
|
||||
# method = LatentPreviewMethod.from_string(override)
|
||||
# if method is not None:
|
||||
# args.preview_method = method
|
||||
# return
|
||||
#
|
||||
#
|
||||
# args.preview_method = default_preview_method
|
||||
|
||||
|
||||
@ -92,6 +92,7 @@ class IndexListCallbacks:
|
||||
COMBINE_CONTEXT_WINDOW_RESULTS = "combine_context_window_results"
|
||||
EXECUTE_START = "execute_start"
|
||||
EXECUTE_CLEANUP = "execute_cleanup"
|
||||
RESIZE_COND_ITEM = "resize_cond_item"
|
||||
|
||||
def init_callbacks(self):
|
||||
return {}
|
||||
@ -175,6 +176,18 @@ class IndexListContextHandler(ContextHandlerABC):
|
||||
new_cond_item = cond_item.copy()
|
||||
# when in dictionary, look for tensors and CONDCrossAttn [comfy/conds.py] (has cond attr that is a tensor)
|
||||
for cond_key, cond_value in new_cond_item.items():
|
||||
# Allow callbacks to handle custom conditioning items
|
||||
handled = False
|
||||
for callback in comfy.patcher_extension.get_all_callbacks(
|
||||
IndexListCallbacks.RESIZE_COND_ITEM, self.callbacks
|
||||
):
|
||||
result = callback(cond_key, cond_value, window, x_in, device, new_cond_item)
|
||||
if result is not None:
|
||||
new_cond_item[cond_key] = result
|
||||
handled = True
|
||||
break
|
||||
if handled:
|
||||
continue
|
||||
if isinstance(cond_value, torch.Tensor):
|
||||
if (self.dim < cond_value.ndim and cond_value(self.dim) == x_in.size(self.dim)) or \
|
||||
(cond_value.ndim < self.dim and cond_value.size(0) == x_in.size(self.dim)):
|
||||
|
||||
@ -1600,10 +1600,13 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5):
|
||||
def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=0.5, solver_type="phi_1"):
|
||||
"""SEEDS-2 - Stochastic Explicit Exponential Derivative-free Solvers (VP Data Prediction) stage 2.
|
||||
arXiv: https://arxiv.org/abs/2305.14267 (NeurIPS 2023)
|
||||
"""
|
||||
if solver_type not in {"phi_1", "phi_2"}:
|
||||
raise ValueError("solver_type must be 'phi_1' or 'phi_2'")
|
||||
|
||||
extra_args = {} if extra_args is None else extra_args
|
||||
seed = extra_args.get("seed", None)
|
||||
noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler
|
||||
@ -1643,8 +1646,14 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
|
||||
denoised_2 = model(x_2, sigma_s_1 * s_in, **extra_args)
|
||||
|
||||
# Step 2
|
||||
denoised_d = torch.lerp(denoised, denoised_2, fac)
|
||||
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
|
||||
if solver_type == "phi_1":
|
||||
denoised_d = torch.lerp(denoised, denoised_2, fac)
|
||||
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * ei_h_phi_1(-h_eta) * denoised_d
|
||||
elif solver_type == "phi_2":
|
||||
b2 = ei_h_phi_2(-h_eta) / r
|
||||
b1 = ei_h_phi_1(-h_eta) - b2
|
||||
x = sigmas[i + 1] / sigmas[i] * (-h * eta).exp() * x - alpha_t * (b1 * denoised + b2 * denoised_2)
|
||||
|
||||
if inject_noise:
|
||||
segment_factor = (r - 1) * h * eta
|
||||
sde_noise = sde_noise * segment_factor.exp()
|
||||
@ -1652,6 +1661,17 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
|
||||
x = x + sde_noise * sigmas[i + 1] * s_noise
|
||||
return x
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_exp_heun_2_x0(model, x, sigmas, extra_args=None, callback=None, disable=None, solver_type="phi_2"):
|
||||
"""Deterministic exponential Heun second order method in data prediction (x0) and logSNR time."""
|
||||
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None, r=1.0, solver_type=solver_type)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_exp_heun_2_x0_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type="phi_2"):
|
||||
"""Stochastic exponential Heun second order method in data prediction (x0) and logSNR time."""
|
||||
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=1.0, solver_type=solver_type)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1. / 3, r_2=2. / 3):
|
||||
|
||||
@ -41,6 +41,11 @@ class ZImage_Control(torch.nn.Module):
|
||||
ffn_dim_multiplier: float = (8.0 / 3.0),
|
||||
norm_eps: float = 1e-5,
|
||||
qk_norm: bool = True,
|
||||
n_control_layers=6,
|
||||
control_in_dim=16,
|
||||
additional_in_dim=0,
|
||||
broken=False,
|
||||
refiner_control=False,
|
||||
dtype=None,
|
||||
device=None,
|
||||
operations=None,
|
||||
@ -49,10 +54,11 @@ class ZImage_Control(torch.nn.Module):
|
||||
super().__init__()
|
||||
operation_settings = {"operations": operations, "device": device, "dtype": dtype}
|
||||
|
||||
self.additional_in_dim = 0
|
||||
self.control_in_dim = 16
|
||||
self.broken = broken
|
||||
self.additional_in_dim = additional_in_dim
|
||||
self.control_in_dim = control_in_dim
|
||||
n_refiner_layers = 2
|
||||
self.n_control_layers = 6
|
||||
self.n_control_layers = n_control_layers
|
||||
self.control_layers = nn.ModuleList(
|
||||
[
|
||||
ZImageControlTransformerBlock(
|
||||
@ -74,28 +80,49 @@ class ZImage_Control(torch.nn.Module):
|
||||
all_x_embedder = {}
|
||||
patch_size = 2
|
||||
f_patch_size = 1
|
||||
x_embedder = operations.Linear(f_patch_size * patch_size * patch_size * self.control_in_dim, dim, bias=True, device=device, dtype=dtype)
|
||||
x_embedder = operations.Linear(f_patch_size * patch_size * patch_size * (self.control_in_dim + self.additional_in_dim), dim, bias=True, device=device, dtype=dtype)
|
||||
all_x_embedder[f"{patch_size}-{f_patch_size}"] = x_embedder
|
||||
|
||||
self.refiner_control = refiner_control
|
||||
|
||||
self.control_all_x_embedder = nn.ModuleDict(all_x_embedder)
|
||||
self.control_noise_refiner = nn.ModuleList(
|
||||
[
|
||||
JointTransformerBlock(
|
||||
layer_id,
|
||||
dim,
|
||||
n_heads,
|
||||
n_kv_heads,
|
||||
multiple_of,
|
||||
ffn_dim_multiplier,
|
||||
norm_eps,
|
||||
qk_norm,
|
||||
modulation=True,
|
||||
z_image_modulation=True,
|
||||
operation_settings=operation_settings,
|
||||
)
|
||||
for layer_id in range(n_refiner_layers)
|
||||
]
|
||||
)
|
||||
if self.refiner_control:
|
||||
self.control_noise_refiner = nn.ModuleList(
|
||||
[
|
||||
ZImageControlTransformerBlock(
|
||||
layer_id,
|
||||
dim,
|
||||
n_heads,
|
||||
n_kv_heads,
|
||||
multiple_of,
|
||||
ffn_dim_multiplier,
|
||||
norm_eps,
|
||||
qk_norm,
|
||||
block_id=layer_id,
|
||||
operation_settings=operation_settings,
|
||||
)
|
||||
for layer_id in range(n_refiner_layers)
|
||||
]
|
||||
)
|
||||
else:
|
||||
self.control_noise_refiner = nn.ModuleList(
|
||||
[
|
||||
JointTransformerBlock(
|
||||
layer_id,
|
||||
dim,
|
||||
n_heads,
|
||||
n_kv_heads,
|
||||
multiple_of,
|
||||
ffn_dim_multiplier,
|
||||
norm_eps,
|
||||
qk_norm,
|
||||
modulation=True,
|
||||
z_image_modulation=True,
|
||||
operation_settings=operation_settings,
|
||||
)
|
||||
for layer_id in range(n_refiner_layers)
|
||||
]
|
||||
)
|
||||
|
||||
def forward(self, cap_feats, control_context, x_freqs_cis, adaln_input):
|
||||
patch_size = 2
|
||||
@ -105,9 +132,29 @@ class ZImage_Control(torch.nn.Module):
|
||||
control_context = self.control_all_x_embedder[f"{patch_size}-{f_patch_size}"](control_context.view(B, C, H // pH, pH, W // pW, pW).permute(0, 2, 4, 3, 5, 1).flatten(3).flatten(1, 2))
|
||||
|
||||
x_attn_mask = None
|
||||
for layer in self.control_noise_refiner:
|
||||
control_context = layer(control_context, x_attn_mask, x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input)
|
||||
if not self.refiner_control:
|
||||
for layer in self.control_noise_refiner:
|
||||
control_context = layer(control_context, x_attn_mask, x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input)
|
||||
|
||||
return control_context
|
||||
|
||||
def forward_noise_refiner_block(self, layer_id, control_context, x, x_attn_mask, x_freqs_cis, adaln_input):
|
||||
if self.refiner_control:
|
||||
if self.broken:
|
||||
if layer_id == 0:
|
||||
return self.control_layers[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
|
||||
if layer_id > 0:
|
||||
out = None
|
||||
for i in range(1, len(self.control_layers)):
|
||||
o, control_context = self.control_layers[i](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
|
||||
if out is None:
|
||||
out = o
|
||||
|
||||
return (out, control_context)
|
||||
else:
|
||||
return self.control_noise_refiner[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
|
||||
else:
|
||||
return (None, control_context)
|
||||
|
||||
def forward_control_block(self, layer_id, control_context, x, x_attn_mask, x_freqs_cis, adaln_input):
|
||||
return self.control_layers[layer_id](control_context, x, x_mask=x_attn_mask, freqs_cis=x_freqs_cis[:control_context.shape[0], :control_context.shape[1]], adaln_input=adaln_input)
|
||||
|
||||
@ -552,6 +552,7 @@ class NextDiT(nn.Module):
|
||||
bsz = len(x)
|
||||
pH = pW = self.patch_size
|
||||
device = x[0].device
|
||||
orig_x = x
|
||||
|
||||
if self.pad_tokens_multiple is not None:
|
||||
pad_extra = (-cap_feats.shape[1]) % self.pad_tokens_multiple
|
||||
@ -588,13 +589,21 @@ class NextDiT(nn.Module):
|
||||
|
||||
freqs_cis = self.rope_embedder(torch.cat((cap_pos_ids, x_pos_ids), dim=1)).movedim(1, 2)
|
||||
|
||||
patches = transformer_options.get("patches", {})
|
||||
|
||||
# refine context
|
||||
for layer in self.context_refiner:
|
||||
cap_feats = layer(cap_feats, cap_mask, freqs_cis[:, :cap_pos_ids.shape[1]], transformer_options=transformer_options)
|
||||
|
||||
padded_img_mask = None
|
||||
for layer in self.noise_refiner:
|
||||
x_input = x
|
||||
for i, layer in enumerate(self.noise_refiner):
|
||||
x = layer(x, padded_img_mask, freqs_cis[:, cap_pos_ids.shape[1]:], t, transformer_options=transformer_options)
|
||||
if "noise_refiner" in patches:
|
||||
for p in patches["noise_refiner"]:
|
||||
out = p({"img": x, "img_input": x_input, "txt": cap_feats, "pe": freqs_cis[:, cap_pos_ids.shape[1]:], "vec": t, "x": orig_x, "block_index": i, "transformer_options": transformer_options, "block_type": "noise_refiner"})
|
||||
if "img" in out:
|
||||
x = out["img"]
|
||||
|
||||
padded_full_embed = torch.cat((cap_feats, x), dim=1)
|
||||
mask = None
|
||||
@ -640,14 +649,18 @@ class NextDiT(nn.Module):
|
||||
|
||||
patches = transformer_options.get("patches", {})
|
||||
x_is_tensor = isinstance(x, torch.Tensor)
|
||||
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, t, num_tokens, transformer_options=transformer_options)
|
||||
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
|
||||
freqs_cis = freqs_cis.to(img.device)
|
||||
|
||||
transformer_options["total_blocks"] = len(self.layers)
|
||||
transformer_options["block_type"] = "double"
|
||||
img_input = img
|
||||
for i, layer in enumerate(self.layers):
|
||||
transformer_options["block_index"] = i
|
||||
img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
|
||||
if "double_block" in patches:
|
||||
for p in patches["double_block"]:
|
||||
out = p({"img": img[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
|
||||
out = p({"img": img[:, cap_size[0]:], "img_input": img_input[:, cap_size[0]:], "txt": img[:, :cap_size[0]], "pe": freqs_cis[:, cap_size[0]:], "vec": adaln_input, "x": x, "block_index": i, "transformer_options": transformer_options})
|
||||
if "img" in out:
|
||||
img[:, cap_size[0]:] = out["img"]
|
||||
if "txt" in out:
|
||||
|
||||
@ -221,9 +221,24 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
operations=operations,
|
||||
)
|
||||
|
||||
def _modulate(self, x: torch.Tensor, mod_params: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
def _apply_gate(self, x, y, gate, timestep_zero_index=None):
|
||||
if timestep_zero_index is not None:
|
||||
return y + torch.cat((x[:, :timestep_zero_index] * gate[0], x[:, timestep_zero_index:] * gate[1]), dim=1)
|
||||
else:
|
||||
return torch.addcmul(y, gate, x)
|
||||
|
||||
def _modulate(self, x: torch.Tensor, mod_params: torch.Tensor, timestep_zero_index=None) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
shift, scale, gate = torch.chunk(mod_params, 3, dim=-1)
|
||||
return torch.addcmul(shift.unsqueeze(1), x, 1 + scale.unsqueeze(1)), gate.unsqueeze(1)
|
||||
if timestep_zero_index is not None:
|
||||
actual_batch = shift.size(0) // 2
|
||||
shift, shift_0 = shift[:actual_batch], shift[actual_batch:]
|
||||
scale, scale_0 = scale[:actual_batch], scale[actual_batch:]
|
||||
gate, gate_0 = gate[:actual_batch], gate[actual_batch:]
|
||||
reg = torch.addcmul(shift.unsqueeze(1), x[:, :timestep_zero_index], 1 + scale.unsqueeze(1))
|
||||
zero = torch.addcmul(shift_0.unsqueeze(1), x[:, timestep_zero_index:], 1 + scale_0.unsqueeze(1))
|
||||
return torch.cat((reg, zero), dim=1), (gate.unsqueeze(1), gate_0.unsqueeze(1))
|
||||
else:
|
||||
return torch.addcmul(shift.unsqueeze(1), x, 1 + scale.unsqueeze(1)), gate.unsqueeze(1)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@ -232,16 +247,21 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
encoder_hidden_states_mask: torch.Tensor,
|
||||
temb: torch.Tensor,
|
||||
image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
|
||||
transformer_options=None,
|
||||
timestep_zero_index=None,
|
||||
transformer_options=None,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
if transformer_options is None:
|
||||
transformer_options = {}
|
||||
img_mod_params = self.img_mod(temb)
|
||||
|
||||
if timestep_zero_index is not None:
|
||||
temb = temb.chunk(2, dim=0)[0]
|
||||
|
||||
txt_mod_params = self.txt_mod(temb)
|
||||
img_mod1, img_mod2 = img_mod_params.chunk(2, dim=-1)
|
||||
txt_mod1, txt_mod2 = txt_mod_params.chunk(2, dim=-1)
|
||||
|
||||
img_modulated, img_gate1 = self._modulate(self.img_norm1(hidden_states), img_mod1)
|
||||
img_modulated, img_gate1 = self._modulate(self.img_norm1(hidden_states), img_mod1, timestep_zero_index)
|
||||
del img_mod1
|
||||
txt_modulated, txt_gate1 = self._modulate(self.txt_norm1(encoder_hidden_states), txt_mod1)
|
||||
del txt_mod1
|
||||
@ -256,15 +276,15 @@ class QwenImageTransformerBlock(nn.Module):
|
||||
del img_modulated
|
||||
del txt_modulated
|
||||
|
||||
hidden_states = hidden_states + img_gate1 * img_attn_output
|
||||
hidden_states = self._apply_gate(img_attn_output, hidden_states, img_gate1, timestep_zero_index)
|
||||
encoder_hidden_states = encoder_hidden_states + txt_gate1 * txt_attn_output
|
||||
del img_attn_output
|
||||
del txt_attn_output
|
||||
del img_gate1
|
||||
del txt_gate1
|
||||
|
||||
img_modulated2, img_gate2 = self._modulate(self.img_norm2(hidden_states), img_mod2)
|
||||
hidden_states = torch.addcmul(hidden_states, img_gate2, self.img_mlp(img_modulated2))
|
||||
img_modulated2, img_gate2 = self._modulate(self.img_norm2(hidden_states), img_mod2, timestep_zero_index)
|
||||
hidden_states = self._apply_gate(self.img_mlp(img_modulated2), hidden_states, img_gate2, timestep_zero_index)
|
||||
|
||||
txt_modulated2, txt_gate2 = self._modulate(self.txt_norm2(encoder_hidden_states), txt_mod2)
|
||||
encoder_hidden_states = torch.addcmul(encoder_hidden_states, txt_gate2, self.txt_mlp(txt_modulated2))
|
||||
@ -307,7 +327,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
pooled_projection_dim: int = 768,
|
||||
guidance_embeds: bool = False,
|
||||
axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
|
||||
image_model=None,
|
||||
default_ref_method="index",image_model=None,
|
||||
final_layer=True, dtype=None,
|
||||
device=None,
|
||||
operations=None,
|
||||
@ -318,6 +338,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels or in_channels
|
||||
self.inner_dim = num_attention_heads * attention_head_dim
|
||||
self.default_ref_method = default_ref_method
|
||||
|
||||
self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))
|
||||
|
||||
@ -345,6 +366,9 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
for _ in range(num_layers)
|
||||
])
|
||||
|
||||
if self.default_ref_method == "index_timestep_zero":
|
||||
self.register_buffer("__index_timestep_zero__", torch.tensor([]))
|
||||
|
||||
if final_layer:
|
||||
self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
|
||||
self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device)
|
||||
@ -399,11 +423,14 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
hidden_states, img_ids, orig_shape = self.process_img(x)
|
||||
num_embeds = hidden_states.shape[1]
|
||||
|
||||
timestep_zero_index = None
|
||||
if ref_latents is not None:
|
||||
h = 0
|
||||
w = 0
|
||||
index = 0
|
||||
index_ref_method = kwargs.get("ref_latents_method", "index") == "index"
|
||||
ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
|
||||
index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
|
||||
timestep_zero = ref_method == "index_timestep_zero"
|
||||
for ref in ref_latents:
|
||||
if index_ref_method:
|
||||
index += 1
|
||||
@ -423,6 +450,10 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
kontext, kontext_ids, _ = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
|
||||
hidden_states = torch.cat([hidden_states, kontext], dim=1)
|
||||
img_ids = torch.cat([img_ids, kontext_ids], dim=1)
|
||||
if timestep_zero:
|
||||
if index > 0:
|
||||
timestep = torch.cat([timestep, timestep * 0], dim=0)
|
||||
timestep_zero_index = num_embeds
|
||||
|
||||
txt_start = round(max(((x.shape[-1] + (self.patch_size // 2)) // self.patch_size) // 2, ((x.shape[-2] + (self.patch_size // 2)) // self.patch_size) // 2))
|
||||
txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3)
|
||||
@ -454,7 +485,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
out["txt"], out["img"] = block(hidden_states=args["img"], encoder_hidden_states=args["txt"], encoder_hidden_states_mask=encoder_hidden_states_mask, temb=args["vec"], image_rotary_emb=args["pe"], transformer_options=args["transformer_options"])
|
||||
out["txt"], out["img"] = block(hidden_states=args["img"], encoder_hidden_states=args["txt"], encoder_hidden_states_mask=encoder_hidden_states_mask, temb=args["vec"], image_rotary_emb=args["pe"], timestep_zero_index=timestep_zero_index, transformer_options=args["transformer_options"])
|
||||
return out
|
||||
|
||||
out = blocks_replace[("double_block", i)]({"img": hidden_states, "txt": encoder_hidden_states, "vec": temb, "pe": image_rotary_emb, "transformer_options": transformer_options}, {"original_block": block_wrap})
|
||||
@ -467,6 +498,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
encoder_hidden_states_mask=encoder_hidden_states_mask,
|
||||
temb=temb,
|
||||
image_rotary_emb=image_rotary_emb,
|
||||
timestep_zero_index=timestep_zero_index,
|
||||
transformer_options=transformer_options,
|
||||
)
|
||||
|
||||
@ -483,6 +515,9 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
if add is not None:
|
||||
hidden_states[:, :add.shape[1]] += add
|
||||
|
||||
if timestep_zero_index is not None:
|
||||
temb = temb.chunk(2, dim=0)[0]
|
||||
|
||||
hidden_states = self.norm_out(hidden_states, temb)
|
||||
hidden_states = self.proj_out(hidden_states)
|
||||
|
||||
|
||||
@ -594,7 +594,10 @@ class WanModel(torch.nn.Module):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@ -796,7 +799,10 @@ class VaceWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@ -898,7 +904,10 @@ class CameraWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@ -1372,17 +1381,20 @@ class WanModel_S2V(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"])
|
||||
out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], transformer_options=args["transformer_options"])
|
||||
return out
|
||||
|
||||
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap})
|
||||
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap})
|
||||
x = out["img"]
|
||||
else:
|
||||
x = block(x, e=e0, freqs=freqs, context=context)
|
||||
x = block(x, e=e0, freqs=freqs, context=context, transformer_options=transformer_options)
|
||||
if audio_emb is not None and audio_emb_global is not None:
|
||||
x = self.audio_injector(x, i, audio_emb, audio_emb_global, seq_len)
|
||||
# head
|
||||
@ -1634,7 +1646,10 @@ class HumoWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
|
||||
@ -534,7 +534,10 @@ class AnimateWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
|
||||
@ -268,7 +268,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["nerf_tile_size"] = 512
|
||||
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
|
||||
dit_config["nerf_embedder_dtype"] = torch.float32
|
||||
if "__x0__" in state_dict_keys: # x0 pred
|
||||
if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
|
||||
dit_config["use_x0"] = True
|
||||
else:
|
||||
dit_config["use_x0"] = False
|
||||
@ -627,6 +627,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["image_model"] = "qwen_image"
|
||||
dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1]
|
||||
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.')
|
||||
if "{}__index_timestep_zero__".format(key_prefix) in state_dict_keys: # 2511
|
||||
dit_config["default_ref_method"] = "index_timestep_zero"
|
||||
return dit_config
|
||||
|
||||
if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5
|
||||
|
||||
@ -522,6 +522,9 @@ class ModelPatcher(ModelManageable, PatchSupport):
|
||||
def set_model_post_input_patch(self, patch):
|
||||
self.set_model_patch(patch, "post_input")
|
||||
|
||||
def set_model_noise_refiner_patch(self, patch):
|
||||
self.set_model_patch(patch, "noise_refiner")
|
||||
|
||||
def set_model_rope_options(self, scale_x, shift_x, scale_y, shift_y, scale_t, shift_t, **kwargs):
|
||||
rope_options = self.model_options["transformer_options"].get("rope_options", {})
|
||||
rope_options["scale_x"] = scale_x
|
||||
|
||||
@ -681,7 +681,7 @@ def mixed_precision_ops(quant_config=None, compute_dtype=torch.bfloat16, full_pr
|
||||
quant_conf = {"format": self.quant_format}
|
||||
if self._full_precision_mm:
|
||||
quant_conf["full_precision_matrix_mult"] = True
|
||||
sd["{}comfy_quant".format(prefix)] = torch.frombuffer(json.dumps(quant_conf).encode('utf-8'), dtype=torch.uint8) # pylint: disable=unsupported-assignment-operation
|
||||
sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8) # pylint: disable=unsupported-assignment-operation
|
||||
return sd
|
||||
|
||||
def _forward(self, input, weight, bias):
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2", "dpm_2", "dpm_2_ancestral",
|
||||
KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2", "exp_heun_2_x0", "exp_heun_2_x0_sde", "dpm_2", "dpm_2_ancestral",
|
||||
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
|
||||
"dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_2m_sde_heun", "dpmpp_2m_sde_heun_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
|
||||
"ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",
|
||||
|
||||
@ -10,6 +10,7 @@ from . import sd1_clip
|
||||
from . import sdxl_clip
|
||||
from . import supported_models_base
|
||||
from . import utils
|
||||
from .model_management import extended_fp16_support
|
||||
from .text_encoders import ace
|
||||
from .text_encoders import aura_t5
|
||||
from .text_encoders import cosmos
|
||||
@ -1110,7 +1111,13 @@ class ZImage(Lumina2):
|
||||
|
||||
memory_usage_factor = 2.0
|
||||
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
def __init__(self, unet_config):
|
||||
super().__init__(unet_config)
|
||||
if extended_fp16_support():
|
||||
self.supported_inference_dtypes = self.supported_inference_dtypes.copy()
|
||||
self.supported_inference_dtypes.insert(1, torch.float16)
|
||||
|
||||
def clip_target(self, state_dict={}):
|
||||
pref = self.text_encoder_key_prefix[0]
|
||||
|
||||
@ -1539,6 +1539,5 @@ def convert_old_quants(state_dict, model_prefix="", metadata={}):
|
||||
if quant_metadata is not None:
|
||||
layers = quant_metadata["layers"]
|
||||
for k, v in layers.items():
|
||||
state_dict["{}.comfy_quant".format(k)] = torch.frombuffer(bytearray(json.dumps(v).encode('utf-8')), dtype=torch.uint8)
|
||||
|
||||
state_dict["{}.comfy_quant".format(k)] = torch.tensor(list(json.dumps(v).encode('utf-8')), dtype=torch.uint8)
|
||||
return state_dict, metadata
|
||||
|
||||
@ -5,12 +5,12 @@ This module handles capability negotiation between frontend and backend,
|
||||
allowing graceful protocol evolution while maintaining backward compatibility.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
from comfy.cli_args import args
|
||||
|
||||
# Default server capabilities
|
||||
SERVER_FEATURE_FLAGS: Dict[str, Any] = {
|
||||
SERVER_FEATURE_FLAGS: dict[str, Any] = {
|
||||
"supports_preview_metadata": True,
|
||||
"max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes
|
||||
"extension": {"manager": {"supports_v4": True}},
|
||||
@ -18,7 +18,7 @@ SERVER_FEATURE_FLAGS: Dict[str, Any] = {
|
||||
|
||||
|
||||
def get_connection_feature(
|
||||
sockets_metadata: Dict[str, Dict[str, Any]],
|
||||
sockets_metadata: dict[str, dict[str, Any]],
|
||||
sid: str,
|
||||
feature_name: str,
|
||||
default: Any = False
|
||||
@ -42,7 +42,7 @@ def get_connection_feature(
|
||||
|
||||
|
||||
def supports_feature(
|
||||
sockets_metadata: Dict[str, Dict[str, Any]],
|
||||
sockets_metadata: dict[str, dict[str, Any]],
|
||||
sid: str,
|
||||
feature_name: str,
|
||||
force=True,
|
||||
@ -64,7 +64,7 @@ def supports_feature(
|
||||
return get_connection_feature(sockets_metadata, sid, feature_name, False) is True
|
||||
|
||||
|
||||
def get_server_features() -> Dict[str, Any]:
|
||||
def get_server_features() -> dict[str, Any]:
|
||||
"""
|
||||
Get the server's feature flags.
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from typing import Type, List, NamedTuple
|
||||
from typing import NamedTuple
|
||||
from comfy_api.internal.singleton import ProxiedSingleton
|
||||
from packaging import version as packaging_version
|
||||
|
||||
@ -10,7 +10,7 @@ class ComfyAPIBase(ProxiedSingleton):
|
||||
|
||||
class ComfyAPIWithVersion(NamedTuple):
|
||||
version: str
|
||||
api_class: Type[ComfyAPIBase]
|
||||
api_class: type[ComfyAPIBase]
|
||||
|
||||
|
||||
def parse_version(version_str: str) -> packaging_version.Version:
|
||||
@ -23,16 +23,16 @@ def parse_version(version_str: str) -> packaging_version.Version:
|
||||
return packaging_version.parse(version_str)
|
||||
|
||||
|
||||
registered_versions: List[ComfyAPIWithVersion] = []
|
||||
registered_versions: list[ComfyAPIWithVersion] = []
|
||||
|
||||
|
||||
def register_versions(versions: List[ComfyAPIWithVersion]):
|
||||
def register_versions(versions: list[ComfyAPIWithVersion]):
|
||||
versions.sort(key=lambda x: parse_version(x.version))
|
||||
global registered_versions
|
||||
registered_versions = versions
|
||||
|
||||
|
||||
def get_all_versions() -> List[ComfyAPIWithVersion]:
|
||||
def get_all_versions() -> list[ComfyAPIWithVersion]:
|
||||
"""
|
||||
Returns a list of all registered ComfyAPI versions.
|
||||
"""
|
||||
|
||||
@ -8,7 +8,7 @@ import os
|
||||
import textwrap
|
||||
import threading
|
||||
from enum import Enum
|
||||
from typing import Optional, Type, get_origin, get_args, get_type_hints
|
||||
from typing import Optional, get_origin, get_args, get_type_hints
|
||||
|
||||
|
||||
class TypeTracker:
|
||||
@ -198,7 +198,7 @@ class AsyncToSyncConverter:
|
||||
return result_container["result"]
|
||||
|
||||
@classmethod
|
||||
def create_sync_class(cls, async_class: Type, thread_pool_size=10) -> Type:
|
||||
def create_sync_class(cls, async_class: type, thread_pool_size=10) -> type:
|
||||
"""
|
||||
Creates a new class with synchronous versions of all async methods.
|
||||
|
||||
@ -568,7 +568,7 @@ class AsyncToSyncConverter:
|
||||
|
||||
@classmethod
|
||||
def _generate_imports(
|
||||
cls, async_class: Type, type_tracker: TypeTracker
|
||||
cls, async_class: type, type_tracker: TypeTracker
|
||||
) -> list[str]:
|
||||
"""Generate import statements for the stub file."""
|
||||
imports = []
|
||||
@ -633,7 +633,7 @@ class AsyncToSyncConverter:
|
||||
return imports
|
||||
|
||||
@classmethod
|
||||
def _get_class_attributes(cls, async_class: Type) -> list[tuple[str, Type]]:
|
||||
def _get_class_attributes(cls, async_class: type) -> list[tuple[str, type]]:
|
||||
"""Extract class attributes that are classes themselves."""
|
||||
class_attributes = []
|
||||
|
||||
@ -659,7 +659,7 @@ class AsyncToSyncConverter:
|
||||
def _generate_inner_class_stub(
|
||||
cls,
|
||||
name: str,
|
||||
attr: Type,
|
||||
attr: type,
|
||||
indent: str = " ",
|
||||
type_tracker: Optional[TypeTracker] = None,
|
||||
) -> list[str]:
|
||||
@ -787,7 +787,7 @@ class AsyncToSyncConverter:
|
||||
return processed
|
||||
|
||||
@classmethod
|
||||
def generate_stub_file(cls, async_class: Type, sync_class: Type) -> None:
|
||||
def generate_stub_file(cls, async_class: type, sync_class: type) -> None:
|
||||
"""
|
||||
Generate a .pyi stub file for the sync class to help IDEs with type checking.
|
||||
"""
|
||||
@ -993,7 +993,7 @@ class AsyncToSyncConverter:
|
||||
logging.error(traceback.format_exc())
|
||||
|
||||
|
||||
def create_sync_class(async_class: Type, thread_pool_size=10) -> Type:
|
||||
def create_sync_class(async_class: type, thread_pool_size=10) -> type:
|
||||
"""
|
||||
Creates a sync version of an async class
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from typing import Type, TypeVar
|
||||
from typing import TypeVar
|
||||
|
||||
class SingletonMetaclass(type):
|
||||
T = TypeVar("T", bound="SingletonMetaclass")
|
||||
@ -11,13 +11,13 @@ class SingletonMetaclass(type):
|
||||
)
|
||||
return cls._instances[cls]
|
||||
|
||||
def inject_instance(cls: Type[T], instance: T) -> None:
|
||||
def inject_instance(cls: type[T], instance: T) -> None:
|
||||
assert cls not in SingletonMetaclass._instances, (
|
||||
"Cannot inject instance after first instantiation"
|
||||
)
|
||||
SingletonMetaclass._instances[cls] = instance
|
||||
|
||||
def get_instance(cls: Type[T], *args, **kwargs) -> T:
|
||||
def get_instance(cls: type[T], *args, **kwargs) -> T:
|
||||
"""
|
||||
Gets the singleton instance of the class, creating it if it doesn't exist.
|
||||
"""
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Type, TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING
|
||||
from comfy_api.internal import ComfyAPIBase
|
||||
from comfy_api.internal.singleton import ProxiedSingleton
|
||||
from comfy_api.internal.async_to_sync import create_sync_class
|
||||
@ -113,7 +113,7 @@ ComfyAPI = ComfyAPI_latest
|
||||
if TYPE_CHECKING:
|
||||
import comfy_api.latest.generated.ComfyAPISyncStub # type: ignore
|
||||
|
||||
ComfyAPISync: Type[comfy_api.latest.generated.ComfyAPISyncStub.ComfyAPISyncStub]
|
||||
ComfyAPISync: type[comfy_api.latest.generated.ComfyAPISyncStub.ComfyAPISyncStub]
|
||||
ComfyAPISync = create_sync_class(ComfyAPI_latest)
|
||||
|
||||
# create new aliases for io and ui
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import torch
|
||||
from typing import TypedDict, List, Optional
|
||||
from typing import TypedDict, Optional
|
||||
|
||||
ImageInput = torch.Tensor
|
||||
"""
|
||||
@ -39,4 +39,4 @@ class LatentInput(TypedDict):
|
||||
Optional noise mask tensor in the same format as samples.
|
||||
"""
|
||||
|
||||
batch_index: Optional[List[int]]
|
||||
batch_index: Optional[list[int]]
|
||||
|
||||
@ -6,7 +6,6 @@ import os
|
||||
import random
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from typing import Type
|
||||
|
||||
import av
|
||||
import numpy as np
|
||||
@ -87,7 +86,7 @@ class ImageSaveHelper:
|
||||
return PILImage.fromarray(np.clip(255.0 * image_tensor.cpu().numpy(), 0, 255).astype(np.uint8))
|
||||
|
||||
@staticmethod
|
||||
def _create_png_metadata(cls: Type[ComfyNode] | None) -> PngInfo | None:
|
||||
def _create_png_metadata(cls: type[ComfyNode] | None) -> PngInfo | None:
|
||||
"""Creates a PngInfo object with prompt and extra_pnginfo."""
|
||||
if args.disable_metadata or cls is None or not cls.hidden:
|
||||
return None
|
||||
@ -100,7 +99,7 @@ class ImageSaveHelper:
|
||||
return metadata
|
||||
|
||||
@staticmethod
|
||||
def _create_animated_png_metadata(cls: Type[ComfyNode] | None) -> PngInfo | None:
|
||||
def _create_animated_png_metadata(cls: type[ComfyNode] | None) -> PngInfo | None:
|
||||
"""Creates a PngInfo object with prompt and extra_pnginfo for animated PNGs (APNG)."""
|
||||
if args.disable_metadata or cls is None or not cls.hidden:
|
||||
return None
|
||||
@ -125,7 +124,7 @@ class ImageSaveHelper:
|
||||
return metadata
|
||||
|
||||
@staticmethod
|
||||
def _create_webp_metadata(pil_image: PILImage.Image, cls: Type[ComfyNode] | None) -> PILImage.Exif:
|
||||
def _create_webp_metadata(pil_image: PILImage.Image, cls: type[ComfyNode] | None) -> PILImage.Exif:
|
||||
"""Creates EXIF metadata bytes for WebP images."""
|
||||
exif_data = pil_image.getexif()
|
||||
if args.disable_metadata or cls is None or cls.hidden is None:
|
||||
@ -141,7 +140,7 @@ class ImageSaveHelper:
|
||||
|
||||
@staticmethod
|
||||
def save_images(
|
||||
images, filename_prefix: str, folder_type: FolderType, cls: Type[ComfyNode] | None, compress_level=4,
|
||||
images, filename_prefix: str, folder_type: FolderType, cls: type[ComfyNode] | None, compress_level=4,
|
||||
) -> list[SavedResult]:
|
||||
"""Saves a batch of images as individual PNG files."""
|
||||
full_output_folder, filename, counter, subfolder, _ = folder_paths.get_save_image_path(
|
||||
@ -159,7 +158,7 @@ class ImageSaveHelper:
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def get_save_images_ui(images, filename_prefix: str, cls: Type[ComfyNode] | None, compress_level=4) -> SavedImages:
|
||||
def get_save_images_ui(images, filename_prefix: str, cls: type[ComfyNode] | None, compress_level=4) -> SavedImages:
|
||||
"""Saves a batch of images and returns a UI object for the node output."""
|
||||
return SavedImages(
|
||||
ImageSaveHelper.save_images(
|
||||
@ -173,7 +172,7 @@ class ImageSaveHelper:
|
||||
|
||||
@staticmethod
|
||||
def save_animated_png(
|
||||
images, filename_prefix: str, folder_type: FolderType, cls: Type[ComfyNode] | None, fps: float, compress_level: int
|
||||
images, filename_prefix: str, folder_type: FolderType, cls: type[ComfyNode] | None, fps: float, compress_level: int
|
||||
) -> SavedResult:
|
||||
"""Saves a batch of images as a single animated PNG."""
|
||||
full_output_folder, filename, counter, subfolder, _ = folder_paths.get_save_image_path(
|
||||
@ -195,7 +194,7 @@ class ImageSaveHelper:
|
||||
|
||||
@staticmethod
|
||||
def get_save_animated_png_ui(
|
||||
images, filename_prefix: str, cls: Type[ComfyNode] | None, fps: float, compress_level: int
|
||||
images, filename_prefix: str, cls: type[ComfyNode] | None, fps: float, compress_level: int
|
||||
) -> SavedImages:
|
||||
"""Saves an animated PNG and returns a UI object for the node output."""
|
||||
result = ImageSaveHelper.save_animated_png(
|
||||
@ -213,7 +212,7 @@ class ImageSaveHelper:
|
||||
images,
|
||||
filename_prefix: str,
|
||||
folder_type: FolderType,
|
||||
cls: Type[ComfyNode] | None,
|
||||
cls: type[ComfyNode] | None,
|
||||
fps: float,
|
||||
lossless: bool,
|
||||
quality: int,
|
||||
@ -242,7 +241,7 @@ class ImageSaveHelper:
|
||||
def get_save_animated_webp_ui(
|
||||
images,
|
||||
filename_prefix: str,
|
||||
cls: Type[ComfyNode] | None,
|
||||
cls: type[ComfyNode] | None,
|
||||
fps: float,
|
||||
lossless: bool,
|
||||
quality: int,
|
||||
@ -271,7 +270,7 @@ class AudioSaveHelper:
|
||||
audio: dict,
|
||||
filename_prefix: str,
|
||||
folder_type: FolderType,
|
||||
cls: Type[ComfyNode] | None,
|
||||
cls: type[ComfyNode] | None,
|
||||
format: str = "flac",
|
||||
quality: str = "128k",
|
||||
) -> list[SavedResult]:
|
||||
@ -378,7 +377,7 @@ class AudioSaveHelper:
|
||||
|
||||
@staticmethod
|
||||
def get_save_audio_ui(
|
||||
audio, filename_prefix: str, cls: Type[ComfyNode] | None, format: str = "flac", quality: str = "128k",
|
||||
audio, filename_prefix: str, cls: type[ComfyNode] | None, format: str = "flac", quality: str = "128k",
|
||||
) -> SavedAudios:
|
||||
"""Save and instantly wrap for UI."""
|
||||
return SavedAudios(
|
||||
@ -394,7 +393,7 @@ class AudioSaveHelper:
|
||||
|
||||
|
||||
class PreviewImage(_UIOutput):
|
||||
def __init__(self, image: Image.Type, animated: bool = False, cls: Type[ComfyNode] = None, **kwargs):
|
||||
def __init__(self, image: Image.Type, animated: bool = False, cls: type[ComfyNode] = None, **kwargs):
|
||||
self.values = ImageSaveHelper.save_images(
|
||||
image,
|
||||
filename_prefix="ComfyUI_temp_" + ''.join(random.choice("abcdefghijklmnopqrstupvxyz") for _ in range(5)),
|
||||
@ -418,7 +417,7 @@ class PreviewMask(PreviewImage):
|
||||
|
||||
|
||||
class PreviewAudio(_UIOutput):
|
||||
def __init__(self, audio: dict, cls: Type[ComfyNode] = None, **kwargs):
|
||||
def __init__(self, audio: dict, cls: type[ComfyNode] = None, **kwargs):
|
||||
self.values = AudioSaveHelper.save_audio(
|
||||
audio,
|
||||
filename_prefix="ComfyUI_temp_" + "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(5)),
|
||||
|
||||
@ -37,7 +37,7 @@ def apply_torch_compile_factory(compiled_module_dict: dict[str, Callable]) -> Ca
|
||||
|
||||
def set_torch_compile_wrapper(model: ModelPatcher, backend: str, options: Optional[dict[str, str]] = None,
|
||||
mode: Optional[str] = None, fullgraph=False, dynamic: Optional[bool] = None,
|
||||
keys: list[str] = ["diffusion_model"], *args, **kwargs):
|
||||
keys: list[str] = None, *args, **kwargs):
|
||||
'''
|
||||
Perform torch.compile that will be applied at sample time for either the whole model or specific params of the BaseModel instance.
|
||||
|
||||
|
||||
@ -2,9 +2,8 @@ from comfy_api.latest import ComfyAPI_latest
|
||||
from comfy_api.v0_0_2 import ComfyAPIAdapter_v0_0_2
|
||||
from comfy_api.v0_0_1 import ComfyAPIAdapter_v0_0_1
|
||||
from comfy_api.internal import ComfyAPIBase
|
||||
from typing import List, Type
|
||||
|
||||
supported_versions: List[Type[ComfyAPIBase]] = [
|
||||
supported_versions: list[type[ComfyAPIBase]] = [
|
||||
ComfyAPI_latest,
|
||||
ComfyAPIAdapter_v0_0_2,
|
||||
ComfyAPIAdapter_v0_0_1,
|
||||
|
||||
@ -1,100 +0,0 @@
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class Pikaffect(str, Enum):
|
||||
Cake_ify = "Cake-ify"
|
||||
Crumble = "Crumble"
|
||||
Crush = "Crush"
|
||||
Decapitate = "Decapitate"
|
||||
Deflate = "Deflate"
|
||||
Dissolve = "Dissolve"
|
||||
Explode = "Explode"
|
||||
Eye_pop = "Eye-pop"
|
||||
Inflate = "Inflate"
|
||||
Levitate = "Levitate"
|
||||
Melt = "Melt"
|
||||
Peel = "Peel"
|
||||
Poke = "Poke"
|
||||
Squish = "Squish"
|
||||
Ta_da = "Ta-da"
|
||||
Tear = "Tear"
|
||||
|
||||
|
||||
class PikaBodyGenerate22C2vGenerate22PikascenesPost(BaseModel):
|
||||
aspectRatio: Optional[float] = Field(None, description='Aspect ratio (width / height)')
|
||||
duration: Optional[int] = Field(5)
|
||||
ingredientsMode: str = Field(...)
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: Optional[str] = Field(None)
|
||||
resolution: Optional[str] = Field('1080p')
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaGenerateResponse(BaseModel):
|
||||
video_id: str = Field(...)
|
||||
|
||||
|
||||
class PikaBodyGenerate22I2vGenerate22I2vPost(BaseModel):
|
||||
duration: Optional[int] = 5
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: Optional[str] = Field(None)
|
||||
resolution: Optional[str] = '1080p'
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaBodyGenerate22KeyframeGenerate22PikaframesPost(BaseModel):
|
||||
duration: Optional[int] = Field(None, ge=5, le=10)
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: str = Field(...)
|
||||
resolution: Optional[str] = '1080p'
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel):
|
||||
aspectRatio: Optional[float] = Field(
|
||||
1.7777777777777777,
|
||||
description='Aspect ratio (width / height)',
|
||||
ge=0.4,
|
||||
le=2.5,
|
||||
)
|
||||
duration: Optional[int] = 5
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: str = Field(...)
|
||||
resolution: Optional[str] = '1080p'
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaBodyGeneratePikadditionsGeneratePikadditionsPost(BaseModel):
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: Optional[str] = Field(None)
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaBodyGeneratePikaffectsGeneratePikaffectsPost(BaseModel):
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
pikaffect: Optional[str] = None
|
||||
promptText: Optional[str] = Field(None)
|
||||
seed: Optional[int] = Field(None)
|
||||
|
||||
|
||||
class PikaBodyGeneratePikaswapsGeneratePikaswapsPost(BaseModel):
|
||||
negativePrompt: Optional[str] = Field(None)
|
||||
promptText: Optional[str] = Field(None)
|
||||
seed: Optional[int] = Field(None)
|
||||
modifyRegionRoi: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class PikaStatusEnum(str, Enum):
|
||||
queued = "queued"
|
||||
started = "started"
|
||||
finished = "finished"
|
||||
failed = "failed"
|
||||
|
||||
|
||||
class PikaVideoResponse(BaseModel):
|
||||
id: str = Field(...)
|
||||
progress: Optional[int] = Field(None)
|
||||
status: PikaStatusEnum
|
||||
url: Optional[str] = Field(None)
|
||||
@ -5,11 +5,17 @@ from typing import Optional, List, Dict, Any, Union
|
||||
from pydantic import BaseModel, Field, RootModel
|
||||
|
||||
class TripoModelVersion(str, Enum):
|
||||
v3_0_20250812 = 'v3.0-20250812'
|
||||
v2_5_20250123 = 'v2.5-20250123'
|
||||
v2_0_20240919 = 'v2.0-20240919'
|
||||
v1_4_20240625 = 'v1.4-20240625'
|
||||
|
||||
|
||||
class TripoGeometryQuality(str, Enum):
|
||||
standard = 'standard'
|
||||
detailed = 'detailed'
|
||||
|
||||
|
||||
class TripoTextureQuality(str, Enum):
|
||||
standard = 'standard'
|
||||
detailed = 'detailed'
|
||||
@ -61,14 +67,20 @@ class TripoSpec(str, Enum):
|
||||
class TripoAnimation(str, Enum):
|
||||
IDLE = "preset:idle"
|
||||
WALK = "preset:walk"
|
||||
RUN = "preset:run"
|
||||
DIVE = "preset:dive"
|
||||
CLIMB = "preset:climb"
|
||||
JUMP = "preset:jump"
|
||||
RUN = "preset:run"
|
||||
SLASH = "preset:slash"
|
||||
SHOOT = "preset:shoot"
|
||||
HURT = "preset:hurt"
|
||||
FALL = "preset:fall"
|
||||
TURN = "preset:turn"
|
||||
QUADRUPED_WALK = "preset:quadruped:walk"
|
||||
HEXAPOD_WALK = "preset:hexapod:walk"
|
||||
OCTOPOD_WALK = "preset:octopod:walk"
|
||||
SERPENTINE_MARCH = "preset:serpentine:march"
|
||||
AQUATIC_MARCH = "preset:aquatic:march"
|
||||
|
||||
class TripoStylizeStyle(str, Enum):
|
||||
LEGO = "lego"
|
||||
@ -105,6 +117,11 @@ class TripoTaskStatus(str, Enum):
|
||||
BANNED = "banned"
|
||||
EXPIRED = "expired"
|
||||
|
||||
class TripoFbxPreset(str, Enum):
|
||||
BLENDER = "blender"
|
||||
MIXAMO = "mixamo"
|
||||
_3DSMAX = "3dsmax"
|
||||
|
||||
class TripoFileTokenReference(BaseModel):
|
||||
type: Optional[str] = Field(None, description='The type of the reference')
|
||||
file_token: str
|
||||
@ -142,6 +159,7 @@ class TripoTextToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
style: Optional[TripoStyle] = None
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model')
|
||||
@ -156,6 +174,7 @@ class TripoImageToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method')
|
||||
style: Optional[TripoStyle] = Field(None, description='The style to apply to the generated model')
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
@ -173,6 +192,7 @@ class TripoMultiviewToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
texture_alignment: Optional[TripoTextureAlignment] = TripoTextureAlignment.ORIGINAL_IMAGE
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
orientation: Optional[TripoOrientation] = Field(TripoOrientation.DEFAULT, description='The orientation for the model')
|
||||
@ -219,14 +239,24 @@ class TripoConvertModelRequest(BaseModel):
|
||||
type: TripoTaskType = Field(TripoTaskType.CONVERT_MODEL, description='Type of task')
|
||||
format: TripoConvertFormat = Field(..., description='The format to convert to')
|
||||
original_model_task_id: str = Field(..., description='The task ID of the original model')
|
||||
quad: Optional[bool] = Field(False, description='Whether to apply quad to the model')
|
||||
force_symmetry: Optional[bool] = Field(False, description='Whether to force symmetry')
|
||||
face_limit: Optional[int] = Field(10000, description='The number of faces to limit the conversion to')
|
||||
flatten_bottom: Optional[bool] = Field(False, description='Whether to flatten the bottom of the model')
|
||||
flatten_bottom_threshold: Optional[float] = Field(0.01, description='The threshold for flattening the bottom')
|
||||
texture_size: Optional[int] = Field(4096, description='The size of the texture')
|
||||
quad: Optional[bool] = Field(None, description='Whether to apply quad to the model')
|
||||
force_symmetry: Optional[bool] = Field(None, description='Whether to force symmetry')
|
||||
face_limit: Optional[int] = Field(None, description='The number of faces to limit the conversion to')
|
||||
flatten_bottom: Optional[bool] = Field(None, description='Whether to flatten the bottom of the model')
|
||||
flatten_bottom_threshold: Optional[float] = Field(None, description='The threshold for flattening the bottom')
|
||||
texture_size: Optional[int] = Field(None, description='The size of the texture')
|
||||
texture_format: Optional[TripoTextureFormat] = Field(TripoTextureFormat.JPEG, description='The format of the texture')
|
||||
pivot_to_center_bottom: Optional[bool] = Field(False, description='Whether to pivot to the center bottom')
|
||||
pivot_to_center_bottom: Optional[bool] = Field(None, description='Whether to pivot to the center bottom')
|
||||
scale_factor: Optional[float] = Field(None, description='The scale factor for the model')
|
||||
with_animation: Optional[bool] = Field(None, description='Whether to include animations')
|
||||
pack_uv: Optional[bool] = Field(None, description='Whether to pack the UVs')
|
||||
bake: Optional[bool] = Field(None, description='Whether to bake the model')
|
||||
part_names: Optional[List[str]] = Field(None, description='The names of the parts to include')
|
||||
fbx_preset: Optional[TripoFbxPreset] = Field(None, description='The preset for the FBX export')
|
||||
export_vertex_colors: Optional[bool] = Field(None, description='Whether to export the vertex colors')
|
||||
export_orientation: Optional[TripoOrientation] = Field(None, description='The orientation for the export')
|
||||
animate_in_place: Optional[bool] = Field(None, description='Whether to animate in place')
|
||||
|
||||
|
||||
class TripoTaskRequest(RootModel):
|
||||
root: Union[
|
||||
|
||||
@ -105,10 +105,6 @@ AVERAGE_DURATION_VIDEO_EXTEND = 320
|
||||
|
||||
|
||||
MODE_TEXT2VIDEO = {
|
||||
"standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
|
||||
"standard mode / 10s duration / kling-v1": ("std", "10", "kling-v1"),
|
||||
"pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
|
||||
"pro mode / 10s duration / kling-v1": ("pro", "10", "kling-v1"),
|
||||
"standard mode / 5s duration / kling-v1-6": ("std", "5", "kling-v1-6"),
|
||||
"standard mode / 10s duration / kling-v1-6": ("std", "10", "kling-v1-6"),
|
||||
"pro mode / 5s duration / kling-v2-master": ("pro", "5", "kling-v2-master"),
|
||||
@ -129,8 +125,6 @@ See: [Kling API Docs Capability Map](https://app.klingai.com/global/dev/document
|
||||
|
||||
|
||||
MODE_START_END_FRAME = {
|
||||
"standard mode / 5s duration / kling-v1": ("std", "5", "kling-v1"),
|
||||
"pro mode / 5s duration / kling-v1": ("pro", "5", "kling-v1"),
|
||||
"pro mode / 5s duration / kling-v1-5": ("pro", "5", "kling-v1-5"),
|
||||
"pro mode / 10s duration / kling-v1-5": ("pro", "10", "kling-v1-5"),
|
||||
"pro mode / 5s duration / kling-v1-6": ("pro", "5", "kling-v1-6"),
|
||||
@ -754,7 +748,7 @@ class KlingTextToVideoNode(IO.ComfyNode):
|
||||
IO.Combo.Input(
|
||||
"mode",
|
||||
options=modes,
|
||||
default=modes[4],
|
||||
default=modes[8],
|
||||
tooltip="The configuration to use for the video generation following the format: mode / duration / model_name.",
|
||||
),
|
||||
],
|
||||
@ -1489,7 +1483,7 @@ class KlingStartEndFrameNode(IO.ComfyNode):
|
||||
IO.Combo.Input(
|
||||
"mode",
|
||||
options=modes,
|
||||
default=modes[8],
|
||||
default=modes[6],
|
||||
tooltip="The configuration to use for the video generation following the format: mode / duration / model_name.",
|
||||
),
|
||||
],
|
||||
@ -1952,7 +1946,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
|
||||
IO.Combo.Input(
|
||||
"model_name",
|
||||
options=[i.value for i in KlingImageGenModelName],
|
||||
default="kling-v1",
|
||||
default="kling-v2",
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"aspect_ratio",
|
||||
|
||||
@ -1,575 +0,0 @@
|
||||
"""
|
||||
Pika x ComfyUI API Nodes
|
||||
|
||||
Pika API docs: https://pika-827374fb.mintlify.app/api-reference
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from io import BytesIO
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
|
||||
from typing_extensions import override
|
||||
from comfy_api.latest import ComfyExtension, IO
|
||||
from comfy_api.input_impl.video_types import VideoCodec, VideoContainer, VideoInput
|
||||
from comfy_api_nodes.apis import pika_api as pika_defs
|
||||
from comfy_api_nodes.util import (
|
||||
validate_string,
|
||||
download_url_to_video_output,
|
||||
tensor_to_bytesio,
|
||||
ApiEndpoint,
|
||||
sync_op,
|
||||
poll_op,
|
||||
)
|
||||
|
||||
|
||||
PATH_PIKADDITIONS = "/proxy/pika/generate/pikadditions"
|
||||
PATH_PIKASWAPS = "/proxy/pika/generate/pikaswaps"
|
||||
PATH_PIKAFFECTS = "/proxy/pika/generate/pikaffects"
|
||||
|
||||
PIKA_API_VERSION = "2.2"
|
||||
PATH_TEXT_TO_VIDEO = f"/proxy/pika/generate/{PIKA_API_VERSION}/t2v"
|
||||
PATH_IMAGE_TO_VIDEO = f"/proxy/pika/generate/{PIKA_API_VERSION}/i2v"
|
||||
PATH_PIKAFRAMES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikaframes"
|
||||
PATH_PIKASCENES = f"/proxy/pika/generate/{PIKA_API_VERSION}/pikascenes"
|
||||
|
||||
PATH_VIDEO_GET = "/proxy/pika/videos"
|
||||
|
||||
|
||||
async def execute_task(
|
||||
task_id: str,
|
||||
cls: type[IO.ComfyNode],
|
||||
) -> IO.NodeOutput:
|
||||
final_response: pika_defs.PikaVideoResponse = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"{PATH_VIDEO_GET}/{task_id}"),
|
||||
response_model=pika_defs.PikaVideoResponse,
|
||||
status_extractor=lambda response: (response.status.value if response.status else None),
|
||||
progress_extractor=lambda response: (response.progress if hasattr(response, "progress") else None),
|
||||
estimated_duration=60,
|
||||
max_poll_attempts=240,
|
||||
)
|
||||
if not final_response.url:
|
||||
error_msg = f"Pika task {task_id} succeeded but no video data found in response:\n{final_response}"
|
||||
logging.error(error_msg)
|
||||
raise Exception(error_msg)
|
||||
video_url = final_response.url
|
||||
logging.info("Pika task %s succeeded. Video URL: %s", task_id, video_url)
|
||||
return IO.NodeOutput(await download_url_to_video_output(video_url))
|
||||
|
||||
|
||||
def get_base_inputs_types() -> list[IO.Input]:
|
||||
"""Get the base required inputs types common to all Pika nodes."""
|
||||
return [
|
||||
IO.String.Input("prompt_text", multiline=True),
|
||||
IO.String.Input("negative_prompt", multiline=True),
|
||||
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
|
||||
IO.Combo.Input("resolution", options=["1080p", "720p"], default="1080p"),
|
||||
IO.Combo.Input("duration", options=[5, 10], default=5),
|
||||
]
|
||||
|
||||
|
||||
class PikaImageToVideo(IO.ComfyNode):
|
||||
"""Pika 2.2 Image to Video Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="PikaImageToVideoNode2_2",
|
||||
display_name="Pika Image to Video",
|
||||
description="Sends an image and prompt to the Pika API v2.2 to generate a video.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The image to convert to video"),
|
||||
*get_base_inputs_types(),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
image: torch.Tensor,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
resolution: str,
|
||||
duration: int,
|
||||
) -> IO.NodeOutput:
|
||||
image_bytes_io = tensor_to_bytesio(image)
|
||||
pika_files = {"image": ("image.png", image_bytes_io, "image/png")}
|
||||
pika_request_data = pika_defs.PikaBodyGenerate22I2vGenerate22I2vPost(
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
)
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_IMAGE_TO_VIDEO, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_request_data,
|
||||
files=pika_files,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaTextToVideoNode(IO.ComfyNode):
|
||||
"""Pika Text2Video v2.2 Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="PikaTextToVideoNode2_2",
|
||||
display_name="Pika Text to Video",
|
||||
description="Sends a text prompt to the Pika API v2.2 to generate a video.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
*get_base_inputs_types(),
|
||||
IO.Float.Input(
|
||||
"aspect_ratio",
|
||||
step=0.001,
|
||||
min=0.4,
|
||||
max=2.5,
|
||||
default=1.7777777777777777,
|
||||
tooltip="Aspect ratio (width / height)",
|
||||
)
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
resolution: str,
|
||||
duration: int,
|
||||
aspect_ratio: float,
|
||||
) -> IO.NodeOutput:
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_TEXT_TO_VIDEO, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_defs.PikaBodyGenerate22T2vGenerate22T2vPost(
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
aspectRatio=aspect_ratio,
|
||||
),
|
||||
content_type="application/x-www-form-urlencoded",
|
||||
)
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaScenes(IO.ComfyNode):
|
||||
"""PikaScenes v2.2 Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="PikaScenesV2_2",
|
||||
display_name="Pika Scenes (Video Image Composition)",
|
||||
description="Combine your images to create a video with the objects in them. Upload multiple images as ingredients and generate a high-quality video that incorporates all of them.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
*get_base_inputs_types(),
|
||||
IO.Combo.Input(
|
||||
"ingredients_mode",
|
||||
options=["creative", "precise"],
|
||||
default="creative",
|
||||
),
|
||||
IO.Float.Input(
|
||||
"aspect_ratio",
|
||||
step=0.001,
|
||||
min=0.4,
|
||||
max=2.5,
|
||||
default=1.7777777777777777,
|
||||
tooltip="Aspect ratio (width / height)",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image_ingredient_1",
|
||||
optional=True,
|
||||
tooltip="Image that will be used as ingredient to create a video.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image_ingredient_2",
|
||||
optional=True,
|
||||
tooltip="Image that will be used as ingredient to create a video.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image_ingredient_3",
|
||||
optional=True,
|
||||
tooltip="Image that will be used as ingredient to create a video.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image_ingredient_4",
|
||||
optional=True,
|
||||
tooltip="Image that will be used as ingredient to create a video.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image_ingredient_5",
|
||||
optional=True,
|
||||
tooltip="Image that will be used as ingredient to create a video.",
|
||||
),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
resolution: str,
|
||||
duration: int,
|
||||
ingredients_mode: str,
|
||||
aspect_ratio: float,
|
||||
image_ingredient_1: Optional[torch.Tensor] = None,
|
||||
image_ingredient_2: Optional[torch.Tensor] = None,
|
||||
image_ingredient_3: Optional[torch.Tensor] = None,
|
||||
image_ingredient_4: Optional[torch.Tensor] = None,
|
||||
image_ingredient_5: Optional[torch.Tensor] = None,
|
||||
) -> IO.NodeOutput:
|
||||
all_image_bytes_io = []
|
||||
for image in [
|
||||
image_ingredient_1,
|
||||
image_ingredient_2,
|
||||
image_ingredient_3,
|
||||
image_ingredient_4,
|
||||
image_ingredient_5,
|
||||
]:
|
||||
if image is not None:
|
||||
all_image_bytes_io.append(tensor_to_bytesio(image))
|
||||
|
||||
pika_files = [
|
||||
("images", (f"image_{i}.png", image_bytes_io, "image/png"))
|
||||
for i, image_bytes_io in enumerate(all_image_bytes_io)
|
||||
]
|
||||
|
||||
pika_request_data = pika_defs.PikaBodyGenerate22C2vGenerate22PikascenesPost(
|
||||
ingredientsMode=ingredients_mode,
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
aspectRatio=aspect_ratio,
|
||||
)
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_PIKASCENES, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_request_data,
|
||||
files=pika_files,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikAdditionsNode(IO.ComfyNode):
|
||||
"""Pika Pikadditions Node. Add an image into a video."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="Pikadditions",
|
||||
display_name="Pikadditions (Video Object Insertion)",
|
||||
description="Add any object or image into your video. Upload a video and specify what you'd like to add to create a seamlessly integrated result.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
IO.Video.Input("video", tooltip="The video to add an image to."),
|
||||
IO.Image.Input("image", tooltip="The image to add to the video."),
|
||||
IO.String.Input("prompt_text", multiline=True),
|
||||
IO.String.Input("negative_prompt", multiline=True),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
min=0,
|
||||
max=0xFFFFFFFF,
|
||||
control_after_generate=True,
|
||||
),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
video: VideoInput,
|
||||
image: torch.Tensor,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
) -> IO.NodeOutput:
|
||||
video_bytes_io = BytesIO()
|
||||
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
||||
video_bytes_io.seek(0)
|
||||
|
||||
image_bytes_io = tensor_to_bytesio(image)
|
||||
pika_files = {
|
||||
"video": ("video.mp4", video_bytes_io, "video/mp4"),
|
||||
"image": ("image.png", image_bytes_io, "image/png"),
|
||||
}
|
||||
pika_request_data = pika_defs.PikaBodyGeneratePikadditionsGeneratePikadditionsPost(
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
)
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_PIKADDITIONS, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_request_data,
|
||||
files=pika_files,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaSwapsNode(IO.ComfyNode):
|
||||
"""Pika Pikaswaps Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="Pikaswaps",
|
||||
display_name="Pika Swaps (Video Object Replacement)",
|
||||
description="Swap out any object or region of your video with a new image or object. Define areas to replace either with a mask or coordinates.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
IO.Video.Input("video", tooltip="The video to swap an object in."),
|
||||
IO.Image.Input(
|
||||
"image",
|
||||
tooltip="The image used to replace the masked object in the video.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Mask.Input(
|
||||
"mask",
|
||||
tooltip="Use the mask to define areas in the video to replace.",
|
||||
optional=True,
|
||||
),
|
||||
IO.String.Input("prompt_text", multiline=True, optional=True),
|
||||
IO.String.Input("negative_prompt", multiline=True, optional=True),
|
||||
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True, optional=True),
|
||||
IO.String.Input(
|
||||
"region_to_modify",
|
||||
multiline=True,
|
||||
optional=True,
|
||||
tooltip="Plaintext description of the object / region to modify.",
|
||||
),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
video: VideoInput,
|
||||
image: Optional[torch.Tensor] = None,
|
||||
mask: Optional[torch.Tensor] = None,
|
||||
prompt_text: str = "",
|
||||
negative_prompt: str = "",
|
||||
seed: int = 0,
|
||||
region_to_modify: str = "",
|
||||
) -> IO.NodeOutput:
|
||||
video_bytes_io = BytesIO()
|
||||
video.save_to(video_bytes_io, format=VideoContainer.MP4, codec=VideoCodec.H264)
|
||||
video_bytes_io.seek(0)
|
||||
pika_files = {
|
||||
"video": ("video.mp4", video_bytes_io, "video/mp4"),
|
||||
}
|
||||
if mask is not None:
|
||||
pika_files["modifyRegionMask"] = ("mask.png", tensor_to_bytesio(mask), "image/png")
|
||||
if image is not None:
|
||||
pika_files["image"] = ("image.png", tensor_to_bytesio(image), "image/png")
|
||||
|
||||
pika_request_data = pika_defs.PikaBodyGeneratePikaswapsGeneratePikaswapsPost(
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
modifyRegionRoi=region_to_modify if region_to_modify else None,
|
||||
)
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_PIKASWAPS, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_request_data,
|
||||
files=pika_files,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaffectsNode(IO.ComfyNode):
|
||||
"""Pika Pikaffects Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="Pikaffects",
|
||||
display_name="Pikaffects (Video Effects)",
|
||||
description="Generate a video with a specific Pikaffect. Supported Pikaffects: Cake-ify, Crumble, Crush, Decapitate, Deflate, Dissolve, Explode, Eye-pop, Inflate, Levitate, Melt, Peel, Poke, Squish, Ta-da, Tear",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
IO.Image.Input("image", tooltip="The reference image to apply the Pikaffect to."),
|
||||
IO.Combo.Input(
|
||||
"pikaffect", options=pika_defs.Pikaffect, default="Cake-ify"
|
||||
),
|
||||
IO.String.Input("prompt_text", multiline=True),
|
||||
IO.String.Input("negative_prompt", multiline=True),
|
||||
IO.Int.Input("seed", min=0, max=0xFFFFFFFF, control_after_generate=True),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
image: torch.Tensor,
|
||||
pikaffect: str,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
) -> IO.NodeOutput:
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_PIKAFFECTS, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_defs.PikaBodyGeneratePikaffectsGeneratePikaffectsPost(
|
||||
pikaffect=pikaffect,
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
),
|
||||
files={"image": ("image.png", tensor_to_bytesio(image), "image/png")},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaStartEndFrameNode(IO.ComfyNode):
|
||||
"""PikaFrames v2.2 Node."""
|
||||
|
||||
@classmethod
|
||||
def define_schema(cls) -> IO.Schema:
|
||||
return IO.Schema(
|
||||
node_id="PikaStartEndFrameNode2_2",
|
||||
display_name="Pika Start and End Frame to Video",
|
||||
description="Generate a video by combining your first and last frame. Upload two images to define the start and end points, and let the AI create a smooth transition between them.",
|
||||
category="api node/video/Pika",
|
||||
inputs=[
|
||||
IO.Image.Input("image_start", tooltip="The first image to combine."),
|
||||
IO.Image.Input("image_end", tooltip="The last image to combine."),
|
||||
*get_base_inputs_types(),
|
||||
],
|
||||
outputs=[IO.Video.Output()],
|
||||
hidden=[
|
||||
IO.Hidden.auth_token_comfy_org,
|
||||
IO.Hidden.api_key_comfy_org,
|
||||
IO.Hidden.unique_id,
|
||||
],
|
||||
is_api_node=True,
|
||||
is_deprecated=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def execute(
|
||||
cls,
|
||||
image_start: torch.Tensor,
|
||||
image_end: torch.Tensor,
|
||||
prompt_text: str,
|
||||
negative_prompt: str,
|
||||
seed: int,
|
||||
resolution: str,
|
||||
duration: int,
|
||||
) -> IO.NodeOutput:
|
||||
validate_string(prompt_text, field_name="prompt_text", min_length=1)
|
||||
pika_files = [
|
||||
("keyFrames", ("image_start.png", tensor_to_bytesio(image_start), "image/png")),
|
||||
("keyFrames", ("image_end.png", tensor_to_bytesio(image_end), "image/png")),
|
||||
]
|
||||
initial_operation = await sync_op(
|
||||
cls,
|
||||
ApiEndpoint(path=PATH_PIKAFRAMES, method="POST"),
|
||||
response_model=pika_defs.PikaGenerateResponse,
|
||||
data=pika_defs.PikaBodyGenerate22KeyframeGenerate22PikaframesPost(
|
||||
promptText=prompt_text,
|
||||
negativePrompt=negative_prompt,
|
||||
seed=seed,
|
||||
resolution=resolution,
|
||||
duration=duration,
|
||||
),
|
||||
files=pika_files,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
return await execute_task(initial_operation.video_id, cls)
|
||||
|
||||
|
||||
class PikaApiNodesExtension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
return [
|
||||
PikaImageToVideo,
|
||||
PikaTextToVideoNode,
|
||||
PikaScenes,
|
||||
PikAdditionsNode,
|
||||
PikaSwapsNode,
|
||||
PikaffectsNode,
|
||||
PikaStartEndFrameNode,
|
||||
]
|
||||
|
||||
|
||||
async def comfy_entrypoint() -> PikaApiNodesExtension:
|
||||
return PikaApiNodesExtension()
|
||||
@ -102,8 +102,9 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
IO.Int.Input("model_seed", default=42, optional=True),
|
||||
IO.Int.Input("texture_seed", default=42, optional=True),
|
||||
IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=2000000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@ -131,7 +132,7 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
model_seed: Optional[int] = None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
geometry_quality: Optional[str] = None,face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
) -> IO.NodeOutput:
|
||||
style_enum = None if style == "None" else style
|
||||
@ -154,6 +155,7 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
face_limit=face_limit,
|
||||
geometry_quality=geometry_quality,
|
||||
auto_size=True,
|
||||
quad=quad,
|
||||
),
|
||||
@ -194,6 +196,7 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@ -220,7 +223,7 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
orientation=None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
texture_alignment: Optional[str] = None,
|
||||
geometry_quality: Optional[str] = None,texture_alignment: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
) -> IO.NodeOutput:
|
||||
@ -246,6 +249,7 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
pbr=pbr,
|
||||
model_seed=model_seed,
|
||||
orientation=orientation,
|
||||
geometry_quality=geometry_quality,
|
||||
texture_alignment=texture_alignment,
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
@ -295,6 +299,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@ -323,7 +328,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
model_seed: Optional[int] = None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
texture_alignment: Optional[str] = None,
|
||||
geometry_quality: Optional[str] = None,texture_alignment: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
) -> IO.NodeOutput:
|
||||
@ -359,6 +364,7 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
model_seed=model_seed,
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
geometry_quality=geometry_quality,
|
||||
texture_alignment=texture_alignment,
|
||||
face_limit=face_limit,
|
||||
quad=quad,
|
||||
@ -508,6 +514,8 @@ class TripoRetargetNode(IO.ComfyNode):
|
||||
options=[
|
||||
"preset:idle",
|
||||
"preset:walk",
|
||||
"preset:run",
|
||||
"preset:dive",
|
||||
"preset:climb",
|
||||
"preset:jump",
|
||||
"preset:slash",
|
||||
@ -515,6 +523,11 @@ class TripoRetargetNode(IO.ComfyNode):
|
||||
"preset:hurt",
|
||||
"preset:fall",
|
||||
"preset:turn",
|
||||
"preset:quadruped:walk",
|
||||
"preset:hexapod:walk",
|
||||
"preset:octopod:walk",
|
||||
"preset:serpentine:march",
|
||||
"preset:aquatic:march"
|
||||
],
|
||||
),
|
||||
],
|
||||
@ -563,7 +576,7 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
"face_limit",
|
||||
default=-1,
|
||||
min=-1,
|
||||
max=500000,
|
||||
max=2000000,
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
@ -579,6 +592,40 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
default="JPEG",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("force_symmetry", default=False, optional=True),
|
||||
IO.Boolean.Input("flatten_bottom", default=False, optional=True),
|
||||
IO.Float.Input(
|
||||
"flatten_bottom_threshold",
|
||||
default=0.0,
|
||||
min=0.0,
|
||||
max=1.0,
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("pivot_to_center_bottom", default=False, optional=True),
|
||||
IO.Float.Input(
|
||||
"scale_factor",
|
||||
default=1.0,
|
||||
min=0.0,
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("with_animation", default=False, optional=True),
|
||||
IO.Boolean.Input("pack_uv", default=False, optional=True),
|
||||
IO.Boolean.Input("bake", default=False, optional=True),
|
||||
IO.String.Input("part_names", default="", optional=True), # comma-separated list
|
||||
IO.Combo.Input(
|
||||
"fbx_preset",
|
||||
options=["blender", "mixamo", "3dsmax"],
|
||||
default="blender",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("export_vertex_colors", default=False, optional=True),
|
||||
IO.Combo.Input(
|
||||
"export_orientation",
|
||||
options=["align_image", "default"],
|
||||
default="default",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("animate_in_place", default=False, optional=True),
|
||||
],
|
||||
outputs=[],
|
||||
hidden=[
|
||||
@ -604,12 +651,30 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
original_model_task_id,
|
||||
format: str,
|
||||
quad: bool,
|
||||
face_limit: int,
|
||||
force_symmetry: bool,
|
||||
face_limit: int,
|
||||
flatten_bottom: bool,
|
||||
flatten_bottom_threshold: float,
|
||||
texture_size: int,
|
||||
texture_format: str,
|
||||
texture_format: str,pivot_to_center_bottom: bool,
|
||||
scale_factor: float,
|
||||
with_animation: bool,
|
||||
pack_uv: bool,
|
||||
bake: bool,
|
||||
part_names: str,
|
||||
fbx_preset: str,
|
||||
export_vertex_colors: bool,
|
||||
export_orientation: str,
|
||||
animate_in_place: bool,
|
||||
) -> IO.NodeOutput:
|
||||
if not original_model_task_id:
|
||||
raise RuntimeError("original_model_task_id is required")
|
||||
|
||||
# Parse part_names from comma-separated string to list
|
||||
part_names_list = None
|
||||
if part_names and part_names.strip():
|
||||
part_names_list = [name.strip() for name in part_names.split(',') if name.strip()]
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/task", method="POST"),
|
||||
@ -618,9 +683,22 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
original_model_task_id=original_model_task_id,
|
||||
format=format,
|
||||
quad=quad if quad else None,
|
||||
force_symmetry=force_symmetry if force_symmetry else None,
|
||||
face_limit=face_limit if face_limit != -1 else None,
|
||||
flatten_bottom=flatten_bottom if flatten_bottom else None,
|
||||
flatten_bottom_threshold=flatten_bottom_threshold if flatten_bottom_threshold != 0.0 else None,
|
||||
texture_size=texture_size if texture_size != 4096 else None,
|
||||
texture_format=texture_format if texture_format != "JPEG" else None,
|
||||
pivot_to_center_bottom=pivot_to_center_bottom if pivot_to_center_bottom else None,
|
||||
scale_factor=scale_factor if scale_factor != 1.0 else None,
|
||||
with_animation=with_animation if with_animation else None,
|
||||
pack_uv=pack_uv if pack_uv else None,
|
||||
bake=bake if bake else None,
|
||||
part_names=part_names_list,
|
||||
fbx_preset=fbx_preset if fbx_preset != "blender" else None,
|
||||
export_vertex_colors=export_vertex_colors if export_vertex_colors else None,
|
||||
export_orientation=export_orientation if export_orientation != "default" else None,
|
||||
animate_in_place=animate_in_place if animate_in_place else None,
|
||||
),
|
||||
)
|
||||
return await poll_until_finished(cls, response, average_duration=30)
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import override
|
||||
|
||||
@ -21,26 +19,26 @@ from comfy_api_nodes.util import (
|
||||
|
||||
class Text2ImageInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
negative_prompt: str | None = Field(None)
|
||||
|
||||
|
||||
class Image2ImageInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
negative_prompt: str | None = Field(None)
|
||||
images: list[str] = Field(..., min_length=1, max_length=2)
|
||||
|
||||
|
||||
class Text2VideoInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
audio_url: Optional[str] = Field(None)
|
||||
negative_prompt: str | None = Field(None)
|
||||
audio_url: str | None = Field(None)
|
||||
|
||||
|
||||
class Image2VideoInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
negative_prompt: str | None = Field(None)
|
||||
img_url: str = Field(...)
|
||||
audio_url: Optional[str] = Field(None)
|
||||
audio_url: str | None = Field(None)
|
||||
|
||||
|
||||
class Txt2ImageParametersField(BaseModel):
|
||||
@ -52,7 +50,7 @@ class Txt2ImageParametersField(BaseModel):
|
||||
|
||||
|
||||
class Image2ImageParametersField(BaseModel):
|
||||
size: Optional[str] = Field(None)
|
||||
size: str | None = Field(None)
|
||||
n: int = Field(1, description="Number of images to generate.") # we support only value=1
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
watermark: bool = Field(True)
|
||||
@ -61,19 +59,21 @@ class Image2ImageParametersField(BaseModel):
|
||||
class Text2VideoParametersField(BaseModel):
|
||||
size: str = Field(...)
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
duration: int = Field(5, ge=5, le=10)
|
||||
duration: int = Field(5, ge=5, le=15)
|
||||
prompt_extend: bool = Field(True)
|
||||
watermark: bool = Field(True)
|
||||
audio: bool = Field(False, description="Should be audio generated automatically")
|
||||
audio: bool = Field(False, description="Whether to generate audio automatically.")
|
||||
shot_type: str = Field("single")
|
||||
|
||||
|
||||
class Image2VideoParametersField(BaseModel):
|
||||
resolution: str = Field(...)
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
duration: int = Field(5, ge=5, le=10)
|
||||
duration: int = Field(5, ge=5, le=15)
|
||||
prompt_extend: bool = Field(True)
|
||||
watermark: bool = Field(True)
|
||||
audio: bool = Field(False, description="Should be audio generated automatically")
|
||||
audio: bool = Field(False, description="Whether to generate audio automatically.")
|
||||
shot_type: str = Field("single")
|
||||
|
||||
|
||||
class Text2ImageTaskCreationRequest(BaseModel):
|
||||
@ -106,39 +106,39 @@ class TaskCreationOutputField(BaseModel):
|
||||
|
||||
|
||||
class TaskCreationResponse(BaseModel):
|
||||
output: Optional[TaskCreationOutputField] = Field(None)
|
||||
output: TaskCreationOutputField | None = Field(None)
|
||||
request_id: str = Field(...)
|
||||
code: Optional[str] = Field(None, description="The error code of the failed request.")
|
||||
message: Optional[str] = Field(None, description="Details of the failed request.")
|
||||
code: str | None = Field(None, description="Error code for the failed request.")
|
||||
message: str | None = Field(None, description="Details about the failed request.")
|
||||
|
||||
|
||||
class TaskResult(BaseModel):
|
||||
url: Optional[str] = Field(None)
|
||||
code: Optional[str] = Field(None)
|
||||
message: Optional[str] = Field(None)
|
||||
url: str | None = Field(None)
|
||||
code: str | None = Field(None)
|
||||
message: str | None = Field(None)
|
||||
|
||||
|
||||
class ImageTaskStatusOutputField(TaskCreationOutputField):
|
||||
task_id: str = Field(...)
|
||||
task_status: str = Field(...)
|
||||
results: Optional[list[TaskResult]] = Field(None)
|
||||
results: list[TaskResult] | None = Field(None)
|
||||
|
||||
|
||||
class VideoTaskStatusOutputField(TaskCreationOutputField):
|
||||
task_id: str = Field(...)
|
||||
task_status: str = Field(...)
|
||||
video_url: Optional[str] = Field(None)
|
||||
code: Optional[str] = Field(None)
|
||||
message: Optional[str] = Field(None)
|
||||
video_url: str | None = Field(None)
|
||||
code: str | None = Field(None)
|
||||
message: str | None = Field(None)
|
||||
|
||||
|
||||
class ImageTaskStatusResponse(BaseModel):
|
||||
output: Optional[ImageTaskStatusOutputField] = Field(None)
|
||||
output: ImageTaskStatusOutputField | None = Field(None)
|
||||
request_id: str = Field(...)
|
||||
|
||||
|
||||
class VideoTaskStatusResponse(BaseModel):
|
||||
output: Optional[VideoTaskStatusOutputField] = Field(None)
|
||||
output: VideoTaskStatusOutputField | None = Field(None)
|
||||
request_id: str = Field(...)
|
||||
|
||||
|
||||
@ -152,7 +152,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
node_id="WanTextToImageApi",
|
||||
display_name="Wan Text to Image",
|
||||
category="api node/image/Wan",
|
||||
description="Generates image based on text prompt.",
|
||||
description="Generates an image based on a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
@ -164,13 +164,13 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
@ -209,7 +209,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@ -252,7 +252,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@ -272,7 +272,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
display_name="Wan Image to Image",
|
||||
category="api node/image/Wan",
|
||||
description="Generates an image from one or two input images and a text prompt. "
|
||||
"The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).",
|
||||
"The output image is currently fixed at 1.6 MP, and its aspect ratio matches the input image(s).",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
@ -282,19 +282,19 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image",
|
||||
tooltip="Single-image editing or multi-image fusion, maximum 2 images.",
|
||||
tooltip="Single-image editing or multi-image fusion. Maximum 2 images.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
# redo this later as an optional combo of recommended resolutions
|
||||
@ -328,7 +328,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@ -347,7 +347,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
async def execute(
|
||||
cls,
|
||||
model: str,
|
||||
image: torch.Tensor,
|
||||
image: Input.Image,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
# width: int = 1024,
|
||||
@ -357,7 +357,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
):
|
||||
n_images = get_number_of_images(image)
|
||||
if n_images not in (1, 2):
|
||||
raise ValueError(f"Expected 1 or 2 input images, got {n_images}.")
|
||||
raise ValueError(f"Expected 1 or 2 input images, but got {n_images}.")
|
||||
images = []
|
||||
for i in image:
|
||||
images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096))
|
||||
@ -376,7 +376,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@ -395,25 +395,25 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
node_id="WanTextToVideoApi",
|
||||
display_name="Wan Text to Video",
|
||||
category="api node/video/Wan",
|
||||
description="Generates video based on text prompt.",
|
||||
description="Generates a video based on a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=["wan2.5-t2v-preview"],
|
||||
default="wan2.5-t2v-preview",
|
||||
options=["wan2.5-t2v-preview", "wan2.6-t2v"],
|
||||
default="wan2.6-t2v",
|
||||
tooltip="Model to use.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@ -433,23 +433,23 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
"1080p: 4:3 (1632x1248)",
|
||||
"1080p: 3:4 (1248x1632)",
|
||||
],
|
||||
default="480p: 1:1 (624x624)",
|
||||
default="720p: 1:1 (960x960)",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"duration",
|
||||
default=5,
|
||||
min=5,
|
||||
max=10,
|
||||
max=15,
|
||||
step=5,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
tooltip="Available durations: 5 and 10 seconds",
|
||||
tooltip="A 15-second duration is available only for the Wan 2.6 model.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Audio.Input(
|
||||
"audio",
|
||||
optional=True,
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@ -466,7 +466,7 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
"generate_audio",
|
||||
default=False,
|
||||
optional=True,
|
||||
tooltip="If there is no audio input, generate audio automatically.",
|
||||
tooltip="If no audio input is provided, generate audio automatically.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
"prompt_extend",
|
||||
@ -477,7 +477,15 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"shot_type",
|
||||
options=["single", "multi"],
|
||||
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
|
||||
"single continuous shot or multiple shots with cuts. "
|
||||
"This parameter takes effect only when prompt_extend is True.",
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@ -498,14 +506,19 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
model: str,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
size: str = "480p: 1:1 (624x624)",
|
||||
size: str = "720p: 1:1 (960x960)",
|
||||
duration: int = 5,
|
||||
audio: Optional[Input.Audio] = None,
|
||||
audio: Input.Audio | None = None,
|
||||
seed: int = 0,
|
||||
generate_audio: bool = False,
|
||||
prompt_extend: bool = True,
|
||||
watermark: bool = True,
|
||||
shot_type: str = "single",
|
||||
):
|
||||
if "480p" in size and model == "wan2.6-t2v":
|
||||
raise ValueError("The Wan 2.6 model does not support 480p.")
|
||||
if duration == 15 and model == "wan2.5-t2v-preview":
|
||||
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
|
||||
width, height = RES_IN_PARENS.search(size).groups()
|
||||
audio_url = None
|
||||
if audio is not None:
|
||||
@ -526,11 +539,12 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
audio=generate_audio,
|
||||
prompt_extend=prompt_extend,
|
||||
watermark=watermark,
|
||||
shot_type=shot_type,
|
||||
),
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@ -549,12 +563,12 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
node_id="WanImageToVideoApi",
|
||||
display_name="Wan Image to Video",
|
||||
category="api node/video/Wan",
|
||||
description="Generates video based on the first frame and text prompt.",
|
||||
description="Generates a video from the first frame and a text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=["wan2.5-i2v-preview"],
|
||||
default="wan2.5-i2v-preview",
|
||||
options=["wan2.5-i2v-preview", "wan2.6-i2v"],
|
||||
default="wan2.6-i2v",
|
||||
tooltip="Model to use.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
@ -564,13 +578,13 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@ -580,23 +594,23 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"720P",
|
||||
"1080P",
|
||||
],
|
||||
default="480P",
|
||||
default="720P",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"duration",
|
||||
default=5,
|
||||
min=5,
|
||||
max=10,
|
||||
max=15,
|
||||
step=5,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
tooltip="Available durations: 5 and 10 seconds",
|
||||
tooltip="Duration 15 available only for WAN2.6 model.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Audio.Input(
|
||||
"audio",
|
||||
optional=True,
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@ -613,7 +627,7 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"generate_audio",
|
||||
default=False,
|
||||
optional=True,
|
||||
tooltip="If there is no audio input, generate audio automatically.",
|
||||
tooltip="If no audio input is provided, generate audio automatically.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
"prompt_extend",
|
||||
@ -624,7 +638,15 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"shot_type",
|
||||
options=["single", "multi"],
|
||||
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
|
||||
"single continuous shot or multiple shots with cuts. "
|
||||
"This parameter takes effect only when prompt_extend is True.",
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@ -643,19 +665,24 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
async def execute(
|
||||
cls,
|
||||
model: str,
|
||||
image: torch.Tensor,
|
||||
image: Input.Image,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
resolution: str = "480P",
|
||||
resolution: str = "720P",
|
||||
duration: int = 5,
|
||||
audio: Optional[Input.Audio] = None,
|
||||
audio: Input.Audio | None = None,
|
||||
seed: int = 0,
|
||||
generate_audio: bool = False,
|
||||
prompt_extend: bool = True,
|
||||
watermark: bool = True,
|
||||
shot_type: str = "single",
|
||||
):
|
||||
if get_number_of_images(image) != 1:
|
||||
raise ValueError("Exactly one input image is required.")
|
||||
if "480P" in resolution and model == "wan2.6-i2v":
|
||||
raise ValueError("The Wan 2.6 model does not support 480P.")
|
||||
if duration == 15 and model == "wan2.5-i2v-preview":
|
||||
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
|
||||
image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000)
|
||||
audio_url = None
|
||||
if audio is not None:
|
||||
@ -677,11 +704,12 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
audio=generate_audio,
|
||||
prompt_extend=prompt_extend,
|
||||
watermark=watermark,
|
||||
shot_type=shot_type,
|
||||
),
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
|
||||
@ -661,6 +661,40 @@ class SamplerSASolver(io.ComfyNode):
|
||||
get_sampler = execute
|
||||
|
||||
|
||||
class SamplerSEEDS2(io.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="SamplerSEEDS2",
|
||||
category="sampling/custom_sampling/samplers",
|
||||
inputs=[
|
||||
io.Combo.Input("solver_type", options=["phi_1", "phi_2"]),
|
||||
io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength"),
|
||||
io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"),
|
||||
io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"),
|
||||
],
|
||||
outputs=[io.Sampler.Output()],
|
||||
description=(
|
||||
"This sampler node can represent multiple samplers:\n\n"
|
||||
"seeds_2\n"
|
||||
"- default setting\n\n"
|
||||
"exp_heun_2_x0\n"
|
||||
"- solver_type=phi_2, r=1.0, eta=0.0\n\n"
|
||||
"exp_heun_2_x0_sde\n"
|
||||
"- solver_type=phi_2, r=1.0, eta=1.0, s_noise=1.0"
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, solver_type, eta, s_noise, r) -> io.NodeOutput:
|
||||
sampler_name = "seeds_2"
|
||||
sampler = comfy.samplers.ksampler(
|
||||
sampler_name,
|
||||
{"eta": eta, "s_noise": s_noise, "r": r, "solver_type": solver_type},
|
||||
)
|
||||
return io.NodeOutput(sampler)
|
||||
|
||||
|
||||
class Noise_EmptyNoise:
|
||||
def __init__(self):
|
||||
self.seed = 0
|
||||
@ -998,6 +1032,7 @@ class CustomSamplersExtension(ComfyExtension):
|
||||
SamplerDPMAdaptative,
|
||||
SamplerER_SDE,
|
||||
SamplerSASolver,
|
||||
SamplerSEEDS2,
|
||||
SplitSigmas,
|
||||
SplitSigmasDenoise,
|
||||
FlipSigmas,
|
||||
|
||||
@ -138,12 +138,13 @@ class FluxKontextMultiReferenceLatentMethod(io.ComfyNode):
|
||||
def define_schema(cls):
|
||||
return io.Schema(
|
||||
node_id="FluxKontextMultiReferenceLatentMethod",
|
||||
display_name="Edit Model Reference Method",
|
||||
category="advanced/conditioning/flux",
|
||||
inputs=[
|
||||
io.Conditioning.Input("conditioning"),
|
||||
io.Combo.Input(
|
||||
"reference_latents_method",
|
||||
options=["offset", "index", "uxo/uno"],
|
||||
options=["offset", "index", "uxo/uno", "index_timestep_zero"],
|
||||
),
|
||||
],
|
||||
outputs=[
|
||||
|
||||
@ -248,7 +248,16 @@ class ModelPatchLoader:
|
||||
model = SigLIPMultiFeatProjModel(device=comfy.model_management.unet_offload_device(), dtype=dtype, operations=comfy.ops.manual_cast)
|
||||
elif 'control_all_x_embedder.2-1.weight' in sd: # alipai z image fun controlnet
|
||||
sd = z_image_convert(sd)
|
||||
model = comfy.ldm.lumina.controlnet.ZImage_Control(device=comfy.model_management.unet_offload_device(), dtype=dtype, operations=comfy.ops.manual_cast)
|
||||
config = {}
|
||||
if 'control_layers.14.adaLN_modulation.0.weight' in sd:
|
||||
config['n_control_layers'] = 15
|
||||
config['additional_in_dim'] = 17
|
||||
config['refiner_control'] = True
|
||||
ref_weight = sd.get("control_noise_refiner.0.after_proj.weight", None)
|
||||
if ref_weight is not None:
|
||||
if torch.count_nonzero(ref_weight) == 0:
|
||||
config['broken'] = True
|
||||
model = comfy.ldm.lumina.controlnet.ZImage_Control(device=comfy.model_management.unet_offload_device(), dtype=dtype, operations=comfy.ops.manual_cast, **config)
|
||||
|
||||
model.load_state_dict(sd)
|
||||
model = ModelPatcher(model, load_device=comfy.model_management.get_torch_device(), offload_device=comfy.model_management.unet_offload_device())
|
||||
@ -303,62 +312,122 @@ class DiffSynthCnetPatch:
|
||||
|
||||
|
||||
class ZImageControlPatch:
|
||||
def __init__(self, model_patch, vae, image, strength):
|
||||
def __init__(self, model_patch, vae, image, strength, inpaint_image=None, mask=None):
|
||||
self.model_patch = model_patch
|
||||
self.vae = vae
|
||||
self.image = image
|
||||
self.inpaint_image = inpaint_image
|
||||
self.mask = mask
|
||||
self.strength = strength
|
||||
self.encoded_image = self.encode_latent_cond(image)
|
||||
self.encoded_image_size = (image.shape[1], image.shape[2])
|
||||
self.is_inpaint = self.model_patch.model.additional_in_dim > 0
|
||||
|
||||
skip_encoding = False
|
||||
if self.image is not None and self.inpaint_image is not None:
|
||||
if self.image.shape != self.inpaint_image.shape:
|
||||
skip_encoding = True
|
||||
|
||||
if skip_encoding:
|
||||
self.encoded_image = None
|
||||
else:
|
||||
self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
|
||||
if self.image is None:
|
||||
self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
|
||||
else:
|
||||
self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
|
||||
self.temp_data = None
|
||||
|
||||
def encode_latent_cond(self, image):
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(image))
|
||||
return latent_image
|
||||
def encode_latent_cond(self, control_image=None, inpaint_image=None):
|
||||
latent_image = None
|
||||
if control_image is not None:
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
|
||||
|
||||
if self.is_inpaint:
|
||||
if inpaint_image is None:
|
||||
inpaint_image = torch.ones_like(control_image) * 0.5
|
||||
|
||||
if self.mask is not None:
|
||||
mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
|
||||
inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5
|
||||
|
||||
inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))
|
||||
|
||||
if self.mask is None:
|
||||
mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
|
||||
else:
|
||||
mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
|
||||
|
||||
if latent_image is None:
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))
|
||||
|
||||
return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
|
||||
else:
|
||||
return latent_image
|
||||
|
||||
def __call__(self, kwargs):
|
||||
x = kwargs.get("x")
|
||||
img = kwargs.get("img")
|
||||
img_input = kwargs.get("img_input")
|
||||
txt = kwargs.get("txt")
|
||||
pe = kwargs.get("pe")
|
||||
vec = kwargs.get("vec")
|
||||
block_index = kwargs.get("block_index")
|
||||
block_type = kwargs.get("block_type", "")
|
||||
spacial_compression = self.vae.spacial_compression_encode()
|
||||
if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
|
||||
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
|
||||
image_scaled = None
|
||||
if self.image is not None:
|
||||
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
|
||||
self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])
|
||||
|
||||
inpaint_scaled = None
|
||||
if self.inpaint_image is not None:
|
||||
inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
|
||||
self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])
|
||||
|
||||
loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
|
||||
self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1))
|
||||
self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
|
||||
self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
|
||||
comfy.model_management.load_models_gpu(loaded_models)
|
||||
|
||||
cnet_index = (block_index // 5)
|
||||
cnet_index_float = (block_index / 5)
|
||||
cnet_blocks = self.model_patch.model.n_control_layers
|
||||
div = round(30 / cnet_blocks)
|
||||
|
||||
cnet_index = (block_index // div)
|
||||
cnet_index_float = (block_index / div)
|
||||
|
||||
kwargs.pop("img") # we do ops in place
|
||||
kwargs.pop("txt")
|
||||
|
||||
cnet_blocks = self.model_patch.model.n_control_layers
|
||||
if cnet_index_float > (cnet_blocks - 1):
|
||||
self.temp_data = None
|
||||
return kwargs
|
||||
|
||||
if self.temp_data is None or self.temp_data[0] > cnet_index:
|
||||
self.temp_data = (-1, (None, self.model_patch.model(txt, self.encoded_image.to(img.dtype), pe, vec)))
|
||||
if block_type == "noise_refiner":
|
||||
self.temp_data = (-3, (None, self.model_patch.model(txt, self.encoded_image.to(img.dtype), pe, vec)))
|
||||
else:
|
||||
self.temp_data = (-1, (None, self.model_patch.model(txt, self.encoded_image.to(img.dtype), pe, vec)))
|
||||
|
||||
while self.temp_data[0] < cnet_index and (self.temp_data[0] + 1) < cnet_blocks:
|
||||
if block_type == "noise_refiner":
|
||||
next_layer = self.temp_data[0] + 1
|
||||
self.temp_data = (next_layer, self.model_patch.model.forward_control_block(next_layer, self.temp_data[1][1], img[:, :self.temp_data[1][1].shape[1]], None, pe, vec))
|
||||
self.temp_data = (next_layer, self.model_patch.model.forward_noise_refiner_block(block_index, self.temp_data[1][1], img_input[:, :self.temp_data[1][1].shape[1]], None, pe, vec))
|
||||
if self.temp_data[1][0] is not None:
|
||||
img[:, :self.temp_data[1][0].shape[1]] += (self.temp_data[1][0] * self.strength)
|
||||
else:
|
||||
while self.temp_data[0] < cnet_index and (self.temp_data[0] + 1) < cnet_blocks:
|
||||
next_layer = self.temp_data[0] + 1
|
||||
self.temp_data = (next_layer, self.model_patch.model.forward_control_block(next_layer, self.temp_data[1][1], img_input[:, :self.temp_data[1][1].shape[1]], None, pe, vec))
|
||||
|
||||
if cnet_index_float == self.temp_data[0]:
|
||||
img[:, :self.temp_data[1][0].shape[1]] += (self.temp_data[1][0] * self.strength)
|
||||
if cnet_blocks == self.temp_data[0] + 1:
|
||||
self.temp_data = None
|
||||
if cnet_index_float == self.temp_data[0]:
|
||||
img[:, :self.temp_data[1][0].shape[1]] += (self.temp_data[1][0] * self.strength)
|
||||
if cnet_blocks == self.temp_data[0] + 1:
|
||||
self.temp_data = None
|
||||
|
||||
return kwargs
|
||||
|
||||
def to(self, device_or_dtype):
|
||||
if isinstance(device_or_dtype, torch.device):
|
||||
self.encoded_image = self.encoded_image.to(device_or_dtype)
|
||||
if self.encoded_image is not None:
|
||||
self.encoded_image = self.encoded_image.to(device_or_dtype)
|
||||
self.temp_data = None
|
||||
return self
|
||||
|
||||
@ -383,9 +452,12 @@ class QwenImageDiffsynthControlnet:
|
||||
|
||||
CATEGORY = "advanced/loaders/qwen"
|
||||
|
||||
def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
|
||||
def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
|
||||
model_patched = model.clone()
|
||||
image = image[:, :, :, :3]
|
||||
if image is not None:
|
||||
image = image[:, :, :, :3]
|
||||
if inpaint_image is not None:
|
||||
inpaint_image = inpaint_image[:, :, :, :3]
|
||||
if mask is not None:
|
||||
if mask.ndim == 3:
|
||||
mask = mask.unsqueeze(1)
|
||||
@ -394,11 +466,24 @@ class QwenImageDiffsynthControlnet:
|
||||
mask = 1.0 - mask
|
||||
|
||||
if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
|
||||
model_patched.set_model_double_block_patch(ZImageControlPatch(model_patch, vae, image, strength))
|
||||
patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
|
||||
model_patched.set_model_noise_refiner_patch(patch)
|
||||
model_patched.set_model_double_block_patch(patch)
|
||||
else:
|
||||
model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
|
||||
return (model_patched,)
|
||||
|
||||
class ZImageFunControlnet(QwenImageDiffsynthControlnet):
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": { "model": ("MODEL",),
|
||||
"model_patch": ("MODEL_PATCH",),
|
||||
"vae": ("VAE",),
|
||||
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
|
||||
},
|
||||
"optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
|
||||
|
||||
CATEGORY = "advanced/loaders/zimage"
|
||||
|
||||
class UsoStyleProjectorPatch:
|
||||
def __init__(self, model_patch, encoded_image):
|
||||
@ -447,5 +532,6 @@ class USOStyleReference:
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"ModelPatchLoader": ModelPatchLoader,
|
||||
"QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
|
||||
"ZImageFunControlnet": ZImageFunControlnet,
|
||||
"USOStyleReference": USOStyleReference,
|
||||
}
|
||||
|
||||
@ -51,6 +51,8 @@ torch._inductor.codecache.write_atomic = write_atomic
|
||||
|
||||
# torch._inductor.utils.is_big_gpu = lambda *args: True
|
||||
|
||||
def skip_torch_compile_dict(guard_entries):
|
||||
return [("transformer_options" not in entry.name) for entry in guard_entries]
|
||||
|
||||
class TorchCompileModel(CustomNode):
|
||||
@classmethod
|
||||
@ -113,7 +115,7 @@ class TorchCompileModel(CustomNode):
|
||||
patcher = to_return
|
||||
if object_patch is None or len(object_patches) == 0 or len(object_patches) == 1 and object_patches[0].strip() == "":
|
||||
object_patches = [DIFFUSION_MODEL]
|
||||
set_torch_compile_wrapper(patcher, keys=object_patches, **compile_kwargs)
|
||||
set_torch_compile_wrapper(patcher, keys=object_patches, options={"guard_filter_fn": skip_torch_compile_dict}, **compile_kwargs)
|
||||
return to_return,
|
||||
elif isinstance(model, torch.nn.Module):
|
||||
model_management.unload_all_models()
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "comfyui"
|
||||
version = "0.4.0"
|
||||
version = "0.5.0"
|
||||
description = "An installable version of ComfyUI"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
|
||||
358
tests/execution/test_preview_method.py
Normal file
358
tests/execution/test_preview_method.py
Normal file
@ -0,0 +1,358 @@
|
||||
"""
|
||||
E2E tests for Queue-specific Preview Method Override feature.
|
||||
|
||||
Tests actual execution with different preview_method values.
|
||||
Requires a running ComfyUI server with models.
|
||||
|
||||
Usage:
|
||||
COMFYUI_SERVER=http://localhost:8988 pytest test_preview_method_e2e.py -v -m preview_method
|
||||
|
||||
Note:
|
||||
These tests execute actual image generation and wait for completion.
|
||||
Tests verify preview image transmission based on preview_method setting.
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
import uuid
|
||||
import time
|
||||
import random
|
||||
import websocket
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Server configuration
|
||||
SERVER_URL = os.environ.get("COMFYUI_SERVER", "http://localhost:8988")
|
||||
SERVER_HOST = SERVER_URL.replace("http://", "").replace("https://", "")
|
||||
|
||||
# Use existing inference graph fixture
|
||||
GRAPH_FILE = Path(__file__).parent.parent / "inference" / "graphs" / "default_graph_sdxl1_0.json"
|
||||
|
||||
|
||||
def is_server_running() -> bool:
|
||||
"""Check if ComfyUI server is running."""
|
||||
try:
|
||||
request = urllib.request.Request(f"{SERVER_URL}/system_stats")
|
||||
with urllib.request.urlopen(request, timeout=2.0):
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def prepare_graph_for_test(graph: dict, steps: int = 5) -> dict:
|
||||
"""Prepare graph for testing: randomize seeds and reduce steps."""
|
||||
adapted = json.loads(json.dumps(graph)) # Deep copy
|
||||
for node_id, node in adapted.items():
|
||||
inputs = node.get("inputs", {})
|
||||
# Handle both "seed" and "noise_seed" (used by KSamplerAdvanced)
|
||||
if "seed" in inputs:
|
||||
inputs["seed"] = random.randint(0, 2**32 - 1)
|
||||
if "noise_seed" in inputs:
|
||||
inputs["noise_seed"] = random.randint(0, 2**32 - 1)
|
||||
# Reduce steps for faster testing (default 20 -> 5)
|
||||
if "steps" in inputs:
|
||||
inputs["steps"] = steps
|
||||
return adapted
|
||||
|
||||
|
||||
# Alias for backward compatibility
|
||||
randomize_seed = prepare_graph_for_test
|
||||
|
||||
|
||||
class PreviewMethodClient:
|
||||
"""Client for testing preview_method with WebSocket execution tracking."""
|
||||
|
||||
def __init__(self, server_address: str):
|
||||
self.server_address = server_address
|
||||
self.client_id = str(uuid.uuid4())
|
||||
self.ws = None
|
||||
|
||||
def connect(self):
|
||||
"""Connect to WebSocket."""
|
||||
self.ws = websocket.WebSocket()
|
||||
self.ws.settimeout(120) # 2 minute timeout for sampling
|
||||
self.ws.connect(f"ws://{self.server_address}/ws?clientId={self.client_id}")
|
||||
|
||||
def close(self):
|
||||
"""Close WebSocket connection."""
|
||||
if self.ws:
|
||||
self.ws.close()
|
||||
|
||||
def queue_prompt(self, prompt: dict, extra_data: dict = None) -> dict:
|
||||
"""Queue a prompt and return response with prompt_id."""
|
||||
data = {
|
||||
"prompt": prompt,
|
||||
"client_id": self.client_id,
|
||||
"extra_data": extra_data or {}
|
||||
}
|
||||
req = urllib.request.Request(
|
||||
f"http://{self.server_address}/prompt",
|
||||
data=json.dumps(data).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
return json.loads(urllib.request.urlopen(req).read())
|
||||
|
||||
def wait_for_execution(self, prompt_id: str, timeout: float = 120.0) -> dict:
|
||||
"""
|
||||
Wait for execution to complete via WebSocket.
|
||||
|
||||
Returns:
|
||||
dict with keys: completed, error, preview_count, execution_time
|
||||
"""
|
||||
result = {
|
||||
"completed": False,
|
||||
"error": None,
|
||||
"preview_count": 0,
|
||||
"execution_time": 0.0
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
self.ws.settimeout(timeout)
|
||||
|
||||
try:
|
||||
while True:
|
||||
out = self.ws.recv()
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if isinstance(out, str):
|
||||
message = json.loads(out)
|
||||
msg_type = message.get("type")
|
||||
data = message.get("data", {})
|
||||
|
||||
if data.get("prompt_id") != prompt_id:
|
||||
continue
|
||||
|
||||
if msg_type == "executing":
|
||||
if data.get("node") is None:
|
||||
# Execution complete
|
||||
result["completed"] = True
|
||||
result["execution_time"] = elapsed
|
||||
break
|
||||
|
||||
elif msg_type == "execution_error":
|
||||
result["error"] = data
|
||||
result["execution_time"] = elapsed
|
||||
break
|
||||
|
||||
elif msg_type == "progress":
|
||||
# Progress update during sampling
|
||||
pass
|
||||
|
||||
elif isinstance(out, bytes):
|
||||
# Binary data = preview image
|
||||
result["preview_count"] += 1
|
||||
|
||||
except websocket.WebSocketTimeoutException:
|
||||
result["error"] = "Timeout waiting for execution"
|
||||
result["execution_time"] = time.time() - start_time
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def load_graph() -> dict:
|
||||
"""Load the SDXL graph fixture with randomized seed."""
|
||||
with open(GRAPH_FILE) as f:
|
||||
graph = json.load(f)
|
||||
return randomize_seed(graph) # Avoid caching
|
||||
|
||||
|
||||
# Skip all tests if server is not running
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(
|
||||
not is_server_running(),
|
||||
reason=f"ComfyUI server not running at {SERVER_URL}"
|
||||
),
|
||||
pytest.mark.preview_method,
|
||||
pytest.mark.execution,
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create and connect a test client."""
|
||||
c = PreviewMethodClient(SERVER_HOST)
|
||||
c.connect()
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def graph():
|
||||
"""Load the test graph."""
|
||||
return load_graph()
|
||||
|
||||
|
||||
class TestPreviewMethodExecution:
|
||||
"""Test actual execution with different preview methods."""
|
||||
|
||||
def test_execution_with_latent2rgb(self, client, graph):
|
||||
"""
|
||||
Execute with preview_method=latent2rgb.
|
||||
Should complete and potentially receive preview images.
|
||||
"""
|
||||
extra_data = {"preview_method": "latent2rgb"}
|
||||
|
||||
response = client.queue_prompt(graph, extra_data)
|
||||
assert "prompt_id" in response
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
# Should complete (may error if model missing, but that's separate)
|
||||
assert result["completed"] or result["error"] is not None
|
||||
# Execution should take some time (sampling)
|
||||
if result["completed"]:
|
||||
assert result["execution_time"] > 0.5, "Execution too fast - likely didn't run"
|
||||
# latent2rgb should produce previews
|
||||
print(f"latent2rgb: {result['preview_count']} previews in {result['execution_time']:.2f}s") # noqa: T201
|
||||
|
||||
def test_execution_with_taesd(self, client, graph):
|
||||
"""
|
||||
Execute with preview_method=taesd.
|
||||
TAESD provides higher quality previews.
|
||||
"""
|
||||
extra_data = {"preview_method": "taesd"}
|
||||
|
||||
response = client.queue_prompt(graph, extra_data)
|
||||
assert "prompt_id" in response
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
assert result["completed"] or result["error"] is not None
|
||||
if result["completed"]:
|
||||
assert result["execution_time"] > 0.5
|
||||
# taesd should also produce previews
|
||||
print(f"taesd: {result['preview_count']} previews in {result['execution_time']:.2f}s") # noqa: T201
|
||||
|
||||
def test_execution_with_none_preview(self, client, graph):
|
||||
"""
|
||||
Execute with preview_method=none.
|
||||
No preview images should be generated.
|
||||
"""
|
||||
extra_data = {"preview_method": "none"}
|
||||
|
||||
response = client.queue_prompt(graph, extra_data)
|
||||
assert "prompt_id" in response
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
assert result["completed"] or result["error"] is not None
|
||||
if result["completed"]:
|
||||
# With "none", should receive no preview images
|
||||
assert result["preview_count"] == 0, \
|
||||
f"Expected no previews with 'none', got {result['preview_count']}"
|
||||
print(f"none: {result['preview_count']} previews in {result['execution_time']:.2f}s") # noqa: T201
|
||||
|
||||
def test_execution_with_default(self, client, graph):
|
||||
"""
|
||||
Execute with preview_method=default.
|
||||
Should use server's CLI default setting.
|
||||
"""
|
||||
extra_data = {"preview_method": "default"}
|
||||
|
||||
response = client.queue_prompt(graph, extra_data)
|
||||
assert "prompt_id" in response
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
assert result["completed"] or result["error"] is not None
|
||||
if result["completed"]:
|
||||
print(f"default: {result['preview_count']} previews in {result['execution_time']:.2f}s") # noqa: T201
|
||||
|
||||
def test_execution_without_preview_method(self, client, graph):
|
||||
"""
|
||||
Execute without preview_method in extra_data.
|
||||
Should use server's default preview method.
|
||||
"""
|
||||
extra_data = {} # No preview_method
|
||||
|
||||
response = client.queue_prompt(graph, extra_data)
|
||||
assert "prompt_id" in response
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
assert result["completed"] or result["error"] is not None
|
||||
if result["completed"]:
|
||||
print(f"(no override): {result['preview_count']} previews in {result['execution_time']:.2f}s") # noqa: T201
|
||||
|
||||
|
||||
class TestPreviewMethodComparison:
|
||||
"""Compare preview behavior between different methods."""
|
||||
|
||||
def test_none_vs_latent2rgb_preview_count(self, client, graph):
|
||||
"""
|
||||
Compare preview counts: 'none' should have 0, others should have >0.
|
||||
This is the key verification that preview_method actually works.
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# Run with none (randomize seed to avoid caching)
|
||||
graph_none = randomize_seed(graph)
|
||||
extra_data_none = {"preview_method": "none"}
|
||||
response = client.queue_prompt(graph_none, extra_data_none)
|
||||
results["none"] = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
# Run with latent2rgb (randomize seed again)
|
||||
graph_rgb = randomize_seed(graph)
|
||||
extra_data_rgb = {"preview_method": "latent2rgb"}
|
||||
response = client.queue_prompt(graph_rgb, extra_data_rgb)
|
||||
results["latent2rgb"] = client.wait_for_execution(response["prompt_id"])
|
||||
|
||||
# Verify both completed
|
||||
assert results["none"]["completed"], f"'none' execution failed: {results['none']['error']}"
|
||||
assert results["latent2rgb"]["completed"], f"'latent2rgb' execution failed: {results['latent2rgb']['error']}"
|
||||
|
||||
# Key assertion: 'none' should have 0 previews
|
||||
assert results["none"]["preview_count"] == 0, \
|
||||
f"'none' should have 0 previews, got {results['none']['preview_count']}"
|
||||
|
||||
# 'latent2rgb' should have at least 1 preview (depends on steps)
|
||||
assert results["latent2rgb"]["preview_count"] > 0, \
|
||||
f"'latent2rgb' should have >0 previews, got {results['latent2rgb']['preview_count']}"
|
||||
|
||||
print("\nPreview count comparison:") # noqa: T201
|
||||
print(f" none: {results['none']['preview_count']} previews") # noqa: T201
|
||||
print(f" latent2rgb: {results['latent2rgb']['preview_count']} previews") # noqa: T201
|
||||
|
||||
|
||||
class TestPreviewMethodSequential:
|
||||
"""Test sequential execution with different preview methods."""
|
||||
|
||||
def test_sequential_different_methods(self, client, graph):
|
||||
"""
|
||||
Execute multiple prompts sequentially with different preview methods.
|
||||
Each should complete independently with correct preview behavior.
|
||||
"""
|
||||
methods = ["latent2rgb", "none", "default"]
|
||||
results = []
|
||||
|
||||
for method in methods:
|
||||
# Randomize seed for each execution to avoid caching
|
||||
graph_run = randomize_seed(graph)
|
||||
extra_data = {"preview_method": method}
|
||||
response = client.queue_prompt(graph_run, extra_data)
|
||||
|
||||
result = client.wait_for_execution(response["prompt_id"])
|
||||
results.append({
|
||||
"method": method,
|
||||
"completed": result["completed"],
|
||||
"preview_count": result["preview_count"],
|
||||
"execution_time": result["execution_time"],
|
||||
"error": result["error"]
|
||||
})
|
||||
|
||||
# All should complete or have clear errors
|
||||
for r in results:
|
||||
assert r["completed"] or r["error"] is not None, \
|
||||
f"Method {r['method']} neither completed nor errored"
|
||||
|
||||
# "none" should have zero previews if completed
|
||||
none_result = next(r for r in results if r["method"] == "none")
|
||||
if none_result["completed"]:
|
||||
assert none_result["preview_count"] == 0, \
|
||||
f"'none' should have 0 previews, got {none_result['preview_count']}"
|
||||
|
||||
print("\nSequential execution results:") # noqa: T201
|
||||
for r in results:
|
||||
status = "✓" if r["completed"] else f"✗ ({r['error']})"
|
||||
print(f" {r['method']}: {status}, {r['preview_count']} previews, {r['execution_time']:.2f}s") # noqa: T201
|
||||
352
tests/unit/preview_method_override_test.py
Normal file
352
tests/unit/preview_method_override_test.py
Normal file
@ -0,0 +1,352 @@
|
||||
"""
|
||||
Unit tests for Queue-specific Preview Method Override feature.
|
||||
|
||||
Tests the preview method override functionality:
|
||||
- LatentPreviewMethod.from_string() method
|
||||
- set_preview_method() function in latent_preview.py
|
||||
- default_preview_method variable
|
||||
- Integration with args.preview_method
|
||||
"""
|
||||
import pytest
|
||||
from comfy.cli_args import args, LatentPreviewMethod
|
||||
from latent_preview import set_preview_method, default_preview_method
|
||||
|
||||
|
||||
class TestLatentPreviewMethodFromString:
|
||||
"""Test LatentPreviewMethod.from_string() classmethod."""
|
||||
|
||||
@pytest.mark.parametrize("value,expected", [
|
||||
("auto", LatentPreviewMethod.Auto),
|
||||
("latent2rgb", LatentPreviewMethod.Latent2RGB),
|
||||
("taesd", LatentPreviewMethod.TAESD),
|
||||
("none", LatentPreviewMethod.NoPreviews),
|
||||
])
|
||||
def test_valid_values_return_enum(self, value, expected):
|
||||
"""Valid string values should return corresponding enum."""
|
||||
assert LatentPreviewMethod.from_string(value) == expected
|
||||
|
||||
@pytest.mark.parametrize("invalid", [
|
||||
"invalid",
|
||||
"TAESD", # Case sensitive
|
||||
"AUTO", # Case sensitive
|
||||
"Latent2RGB", # Case sensitive
|
||||
"latent",
|
||||
"",
|
||||
"default", # default is special, not a method
|
||||
])
|
||||
def test_invalid_values_return_none(self, invalid):
|
||||
"""Invalid string values should return None."""
|
||||
assert LatentPreviewMethod.from_string(invalid) is None
|
||||
|
||||
|
||||
class TestLatentPreviewMethodEnumValues:
|
||||
"""Test LatentPreviewMethod enum has expected values."""
|
||||
|
||||
def test_enum_values(self):
|
||||
"""Verify enum values match expected strings."""
|
||||
assert LatentPreviewMethod.NoPreviews.value == "none"
|
||||
assert LatentPreviewMethod.Auto.value == "auto"
|
||||
assert LatentPreviewMethod.Latent2RGB.value == "latent2rgb"
|
||||
assert LatentPreviewMethod.TAESD.value == "taesd"
|
||||
|
||||
def test_enum_count(self):
|
||||
"""Verify exactly 4 preview methods exist."""
|
||||
assert len(LatentPreviewMethod) == 4
|
||||
|
||||
|
||||
class TestSetPreviewMethod:
|
||||
"""Test set_preview_method() function from latent_preview.py."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Store original value before each test."""
|
||||
self.original = args.preview_method
|
||||
|
||||
def teardown_method(self):
|
||||
"""Restore original value after each test."""
|
||||
args.preview_method = self.original
|
||||
|
||||
def test_override_with_taesd(self):
|
||||
"""'taesd' should set args.preview_method to TAESD."""
|
||||
set_preview_method("taesd")
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
def test_override_with_latent2rgb(self):
|
||||
"""'latent2rgb' should set args.preview_method to Latent2RGB."""
|
||||
set_preview_method("latent2rgb")
|
||||
assert args.preview_method == LatentPreviewMethod.Latent2RGB
|
||||
|
||||
def test_override_with_auto(self):
|
||||
"""'auto' should set args.preview_method to Auto."""
|
||||
set_preview_method("auto")
|
||||
assert args.preview_method == LatentPreviewMethod.Auto
|
||||
|
||||
def test_override_with_none_value(self):
|
||||
"""'none' should set args.preview_method to NoPreviews."""
|
||||
set_preview_method("none")
|
||||
assert args.preview_method == LatentPreviewMethod.NoPreviews
|
||||
|
||||
def test_default_restores_original(self):
|
||||
"""'default' should restore to default_preview_method."""
|
||||
# First override to something else
|
||||
set_preview_method("taesd")
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Then use 'default' to restore
|
||||
set_preview_method("default")
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_none_param_restores_original(self):
|
||||
"""None parameter should restore to default_preview_method."""
|
||||
# First override to something else
|
||||
set_preview_method("taesd")
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Then use None to restore
|
||||
set_preview_method(None)
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_empty_string_restores_original(self):
|
||||
"""Empty string should restore to default_preview_method."""
|
||||
set_preview_method("taesd")
|
||||
set_preview_method("")
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_invalid_value_restores_original(self):
|
||||
"""Invalid value should restore to default_preview_method."""
|
||||
set_preview_method("taesd")
|
||||
set_preview_method("invalid_method")
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_case_sensitive_invalid_restores(self):
|
||||
"""Case-mismatched values should restore to default."""
|
||||
set_preview_method("taesd")
|
||||
set_preview_method("TAESD") # Wrong case
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
|
||||
class TestDefaultPreviewMethod:
|
||||
"""Test default_preview_method module variable."""
|
||||
|
||||
def test_default_is_not_none(self):
|
||||
"""default_preview_method should not be None."""
|
||||
assert default_preview_method is not None
|
||||
|
||||
def test_default_is_enum_member(self):
|
||||
"""default_preview_method should be a LatentPreviewMethod enum."""
|
||||
assert isinstance(default_preview_method, LatentPreviewMethod)
|
||||
|
||||
def test_default_matches_args_initial(self):
|
||||
"""default_preview_method should match CLI default or user setting."""
|
||||
# This tests that default_preview_method was captured at module load
|
||||
# After set_preview_method(None), args should equal default
|
||||
original = args.preview_method
|
||||
set_preview_method("taesd")
|
||||
set_preview_method(None)
|
||||
assert args.preview_method == default_preview_method
|
||||
args.preview_method = original
|
||||
|
||||
|
||||
class TestArgsPreviewMethodModification:
|
||||
"""Test args.preview_method can be modified correctly."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Store original value before each test."""
|
||||
self.original = args.preview_method
|
||||
|
||||
def teardown_method(self):
|
||||
"""Restore original value after each test."""
|
||||
args.preview_method = self.original
|
||||
|
||||
def test_args_accepts_all_enum_values(self):
|
||||
"""args.preview_method should accept all LatentPreviewMethod values."""
|
||||
for method in LatentPreviewMethod:
|
||||
args.preview_method = method
|
||||
assert args.preview_method == method
|
||||
|
||||
def test_args_modification_and_restoration(self):
|
||||
"""args.preview_method should be modifiable and restorable."""
|
||||
original = args.preview_method
|
||||
|
||||
args.preview_method = LatentPreviewMethod.TAESD
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
args.preview_method = original
|
||||
assert args.preview_method == original
|
||||
|
||||
|
||||
class TestExecutionFlow:
|
||||
"""Test the execution flow pattern used in execution.py."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Store original value before each test."""
|
||||
self.original = args.preview_method
|
||||
|
||||
def teardown_method(self):
|
||||
"""Restore original value after each test."""
|
||||
args.preview_method = self.original
|
||||
|
||||
def test_sequential_executions_with_different_methods(self):
|
||||
"""Simulate multiple queue executions with different preview methods."""
|
||||
# Execution 1: taesd
|
||||
set_preview_method("taesd")
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Execution 2: none
|
||||
set_preview_method("none")
|
||||
assert args.preview_method == LatentPreviewMethod.NoPreviews
|
||||
|
||||
# Execution 3: default (restore)
|
||||
set_preview_method("default")
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
# Execution 4: auto
|
||||
set_preview_method("auto")
|
||||
assert args.preview_method == LatentPreviewMethod.Auto
|
||||
|
||||
# Execution 5: no override (None)
|
||||
set_preview_method(None)
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_override_then_default_pattern(self):
|
||||
"""Test the pattern: override -> execute -> next call restores."""
|
||||
# First execution with override
|
||||
set_preview_method("latent2rgb")
|
||||
assert args.preview_method == LatentPreviewMethod.Latent2RGB
|
||||
|
||||
# Second execution without override restores default
|
||||
set_preview_method(None)
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_extra_data_simulation(self):
|
||||
"""Simulate extra_data.get('preview_method') patterns."""
|
||||
# Simulate: extra_data = {"preview_method": "taesd"}
|
||||
extra_data = {"preview_method": "taesd"}
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Simulate: extra_data = {}
|
||||
extra_data = {}
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
# Simulate: extra_data = {"preview_method": "default"}
|
||||
extra_data = {"preview_method": "default"}
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
|
||||
class TestRealWorldScenarios:
|
||||
"""Tests using real-world prompt data patterns."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Store original value before each test."""
|
||||
self.original = args.preview_method
|
||||
|
||||
def teardown_method(self):
|
||||
"""Restore original value after each test."""
|
||||
args.preview_method = self.original
|
||||
|
||||
def test_captured_prompt_without_preview_method(self):
|
||||
"""
|
||||
Test with captured prompt that has no preview_method.
|
||||
Based on: tests-unit/execution_test/fixtures/default_prompt.json
|
||||
"""
|
||||
# Real captured extra_data structure (preview_method absent)
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "271314f0dabd48e5aaa488ed7a4ceb0d",
|
||||
"create_time": 1765416558179
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_captured_prompt_with_preview_method_taesd(self):
|
||||
"""Test captured prompt with preview_method: taesd."""
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "271314f0dabd48e5aaa488ed7a4ceb0d",
|
||||
"preview_method": "taesd"
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
def test_captured_prompt_with_preview_method_none(self):
|
||||
"""Test captured prompt with preview_method: none (disable preview)."""
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "test-client",
|
||||
"preview_method": "none"
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.NoPreviews
|
||||
|
||||
def test_captured_prompt_with_preview_method_latent2rgb(self):
|
||||
"""Test captured prompt with preview_method: latent2rgb."""
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "test-client",
|
||||
"preview_method": "latent2rgb"
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.Latent2RGB
|
||||
|
||||
def test_captured_prompt_with_preview_method_auto(self):
|
||||
"""Test captured prompt with preview_method: auto."""
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "test-client",
|
||||
"preview_method": "auto"
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.Auto
|
||||
|
||||
def test_captured_prompt_with_preview_method_default(self):
|
||||
"""Test captured prompt with preview_method: default (use CLI setting)."""
|
||||
# First set to something else
|
||||
set_preview_method("taesd")
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Then simulate a prompt with "default"
|
||||
extra_data = {
|
||||
"extra_pnginfo": {"workflow": {}},
|
||||
"client_id": "test-client",
|
||||
"preview_method": "default"
|
||||
}
|
||||
|
||||
set_preview_method(extra_data.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
def test_sequential_queue_with_different_preview_methods(self):
|
||||
"""
|
||||
Simulate real queue scenario: multiple prompts with different settings.
|
||||
This tests the actual usage pattern in ComfyUI.
|
||||
"""
|
||||
# Queue 1: User wants TAESD preview
|
||||
extra_data_1 = {"client_id": "client-1", "preview_method": "taesd"}
|
||||
set_preview_method(extra_data_1.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.TAESD
|
||||
|
||||
# Queue 2: User wants no preview (faster execution)
|
||||
extra_data_2 = {"client_id": "client-2", "preview_method": "none"}
|
||||
set_preview_method(extra_data_2.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.NoPreviews
|
||||
|
||||
# Queue 3: User doesn't specify (use server default)
|
||||
extra_data_3 = {"client_id": "client-3"}
|
||||
set_preview_method(extra_data_3.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
# Queue 4: User explicitly wants default
|
||||
extra_data_4 = {"client_id": "client-4", "preview_method": "default"}
|
||||
set_preview_method(extra_data_4.get("preview_method"))
|
||||
assert args.preview_method == default_preview_method
|
||||
|
||||
# Queue 5: User wants latent2rgb
|
||||
extra_data_5 = {"client_id": "client-5", "preview_method": "latent2rgb"}
|
||||
set_preview_method(extra_data_5.get("preview_method"))
|
||||
assert args.preview_method == LatentPreviewMethod.Latent2RGB
|
||||
Loading…
Reference in New Issue
Block a user