Merge branch 'master' into master

This commit is contained in:
iChristGit 2026-02-25 03:36:39 +02:00 committed by GitHub
commit caf67e1f03
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 395 additions and 37 deletions

View File

@ -46,6 +46,8 @@ class NodeReplaceManager:
connections: dict[str, list[tuple[str, str, int]]] = {} connections: dict[str, list[tuple[str, str, int]]] = {}
need_replacement: set[str] = set() need_replacement: set[str] = set()
for node_number, node_struct in prompt.items(): for node_number, node_struct in prompt.items():
if "class_type" not in node_struct or "inputs" not in node_struct:
continue
class_type = node_struct["class_type"] class_type = node_struct["class_type"]
# need replacement if not in NODE_CLASS_MAPPINGS and has replacement # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type): if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):

View File

@ -157,11 +157,9 @@ class Embeddings1DConnector(nn.Module):
self.num_learnable_registers = num_learnable_registers self.num_learnable_registers = num_learnable_registers
if self.num_learnable_registers: if self.num_learnable_registers:
self.learnable_registers = nn.Parameter( self.learnable_registers = nn.Parameter(
torch.rand( torch.empty(
self.num_learnable_registers, inner_dim, dtype=dtype, device=device self.num_learnable_registers, inner_dim, dtype=dtype, device=device
) )
* 2.0
- 1.0
) )
def get_fractional_positions(self, indices_grid): def get_fractional_positions(self, indices_grid):

View File

@ -271,6 +271,7 @@ class ModelPatcher:
self.is_clip = False self.is_clip = False
self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed
self.cached_patcher_init: tuple[Callable, tuple] | None = None
if not hasattr(self.model, 'model_loaded_weight_memory'): if not hasattr(self.model, 'model_loaded_weight_memory'):
self.model.model_loaded_weight_memory = 0 self.model.model_loaded_weight_memory = 0
@ -307,8 +308,15 @@ class ModelPatcher:
def get_free_memory(self, device): def get_free_memory(self, device):
return comfy.model_management.get_free_memory(device) return comfy.model_management.get_free_memory(device)
def clone(self): def clone(self, disable_dynamic=False):
n = self.__class__(self.model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update) class_ = self.__class__
model = self.model
if self.is_dynamic() and disable_dynamic:
class_ = ModelPatcher
temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True)
model = temp_model_patcher.model
n = class_(model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
n.patches = {} n.patches = {}
for k in self.patches: for k in self.patches:
n.patches[k] = self.patches[k][:] n.patches[k] = self.patches[k][:]
@ -362,6 +370,8 @@ class ModelPatcher:
n.is_clip = self.is_clip n.is_clip = self.is_clip
n.hook_mode = self.hook_mode n.hook_mode = self.hook_mode
n.cached_patcher_init = self.cached_patcher_init
for callback in self.get_all_callbacks(CallbacksMP.ON_CLONE): for callback in self.get_all_callbacks(CallbacksMP.ON_CLONE):
callback(self, n) callback(self, n)
return n return n

View File

@ -827,6 +827,10 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
else: else:
sd = {} sd = {}
if not hasattr(self, 'weight'):
logging.warning("Warning: state dict on uninitialized op {}".format(prefix))
return sd
if self.bias is not None: if self.bias is not None:
sd["{}bias".format(prefix)] = self.bias sd["{}bias".format(prefix)] = self.bias

View File

@ -1530,14 +1530,24 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
return (model, clip, vae) return (model, clip, vae)
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}): def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, disable_dynamic=False):
sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True) sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata) out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata, disable_dynamic=disable_dynamic)
if out is None: if out is None:
raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd))) raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd)))
if output_model:
out[0].cached_patcher_init = (load_checkpoint_guess_config_model_only, (ckpt_path, embedding_directory, model_options, te_model_options))
return out return out
def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None): def load_checkpoint_guess_config_model_only(ckpt_path, embedding_directory=None, model_options={}, te_model_options={}, disable_dynamic=False):
model, *_ = load_checkpoint_guess_config(ckpt_path, False, False, False,
embedding_directory=embedding_directory,
model_options=model_options,
te_model_options=te_model_options,
disable_dynamic=disable_dynamic)
return model
def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None, disable_dynamic=False):
clip = None clip = None
clipvision = None clipvision = None
vae = None vae = None
@ -1586,7 +1596,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
if output_model: if output_model:
inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype) inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device) model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device)
model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device()) ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
model_patcher = ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
model.load_model_weights(sd, diffusion_model_prefix, assign=model_patcher.is_dynamic()) model.load_model_weights(sd, diffusion_model_prefix, assign=model_patcher.is_dynamic())
if output_vae: if output_vae:
@ -1637,7 +1648,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
return (model_patcher, clip, vae, clipvision) return (model_patcher, clip, vae, clipvision)
def load_diffusion_model_state_dict(sd, model_options={}, metadata=None): def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable_dynamic=False):
""" """
Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats. Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.
@ -1721,7 +1732,8 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
model_config.optimizations["fp8"] = True model_config.optimizations["fp8"] = True
model = model_config.get_model(new_sd, "") model = model_config.get_model(new_sd, "")
model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=offload_device) ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
model_patcher = ModelPatcher(model, load_device=load_device, offload_device=offload_device)
if not model_management.is_device_cpu(offload_device): if not model_management.is_device_cpu(offload_device):
model.to(offload_device) model.to(offload_device)
model.load_model_weights(new_sd, "", assign=model_patcher.is_dynamic()) model.load_model_weights(new_sd, "", assign=model_patcher.is_dynamic())
@ -1730,12 +1742,13 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
logging.info("left over keys in diffusion model: {}".format(left_over)) logging.info("left over keys in diffusion model: {}".format(left_over))
return model_patcher return model_patcher
def load_diffusion_model(unet_path, model_options={}): def load_diffusion_model(unet_path, model_options={}, disable_dynamic=False):
sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True) sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True)
model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata) model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata, disable_dynamic=disable_dynamic)
if model is None: if model is None:
logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path)) logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd))) raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
model.cached_patcher_init = (load_diffusion_model, (unet_path, model_options))
return model return model
def load_unet(unet_path, dtype=None): def load_unet(unet_path, dtype=None):

View File

@ -101,6 +101,7 @@ class LTXAVTEModel(torch.nn.Module):
super().__init__() super().__init__()
self.dtypes = set() self.dtypes = set()
self.dtypes.add(dtype) self.dtypes.add(dtype)
self.compat_mode = False
self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None) self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None)
self.dtypes.add(dtype_llama) self.dtypes.add(dtype_llama)
@ -108,6 +109,28 @@ class LTXAVTEModel(torch.nn.Module):
operations = self.gemma3_12b.operations # TODO operations = self.gemma3_12b.operations # TODO
self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device) self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device)
def enable_compat_mode(self): # TODO: remove
from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
operations = self.gemma3_12b.operations
dtype = self.text_embedding_projection.weight.dtype
device = self.text_embedding_projection.weight.device
self.audio_embeddings_connector = Embeddings1DConnector(
split_rope=True,
double_precision_rope=True,
dtype=dtype,
device=device,
operations=operations,
)
self.video_embeddings_connector = Embeddings1DConnector(
split_rope=True,
double_precision_rope=True,
dtype=dtype,
device=device,
operations=operations,
)
self.compat_mode = True
def set_clip_options(self, options): def set_clip_options(self, options):
self.execution_device = options.get("execution_device", self.execution_device) self.execution_device = options.get("execution_device", self.execution_device)
self.gemma3_12b.set_clip_options(options) self.gemma3_12b.set_clip_options(options)
@ -129,6 +152,12 @@ class LTXAVTEModel(torch.nn.Module):
out = out.reshape((out.shape[0], out.shape[1], -1)) out = out.reshape((out.shape[0], out.shape[1], -1))
out = self.text_embedding_projection(out) out = self.text_embedding_projection(out)
out = out.float() out = out.float()
if self.compat_mode:
out_vid = self.video_embeddings_connector(out)[0]
out_audio = self.audio_embeddings_connector(out)[0]
out = torch.concat((out_vid, out_audio), dim=-1)
return out.to(out_device), pooled return out.to(out_device), pooled
def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed): def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
@ -152,6 +181,16 @@ class LTXAVTEModel(torch.nn.Module):
missing_all.extend([f"{prefix}{k}" for k in missing]) missing_all.extend([f"{prefix}{k}" for k in missing])
unexpected_all.extend([f"{prefix}{k}" for k in unexpected]) unexpected_all.extend([f"{prefix}{k}" for k in unexpected])
if "model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.2.attn1.to_q.bias" not in sd: # TODO: remove
ww = sd.get("model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.0.attn1.to_q.bias", None)
if ww is not None:
if ww.shape[0] == 3840:
self.enable_compat_mode()
sdv = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.video_embeddings_connector.": ""}, filter_keys=True)
self.video_embeddings_connector.load_state_dict(sdv, strict=False, assign=getattr(self, "can_assign_sd", False))
sda = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.audio_embeddings_connector.": ""}, filter_keys=True)
self.audio_embeddings_connector.load_state_dict(sda, strict=False, assign=getattr(self, "can_assign_sd", False))
return (missing_all, unexpected_all) return (missing_all, unexpected_all)
def memory_estimation_function(self, token_weight_pairs, device=None): def memory_estimation_function(self, token_weight_pairs, device=None):

View File

@ -27,6 +27,7 @@ class Seedream4TaskCreationRequest(BaseModel):
sequential_image_generation: str = Field("disabled") sequential_image_generation: str = Field("disabled")
sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15)) sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15))
watermark: bool = Field(False) watermark: bool = Field(False)
output_format: str | None = None
class ImageTaskCreationResponse(BaseModel): class ImageTaskCreationResponse(BaseModel):
@ -106,6 +107,7 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [
("2496x1664 (3:2)", 2496, 1664), ("2496x1664 (3:2)", 2496, 1664),
("1664x2496 (2:3)", 1664, 2496), ("1664x2496 (2:3)", 1664, 2496),
("3024x1296 (21:9)", 3024, 1296), ("3024x1296 (21:9)", 3024, 1296),
("3072x3072 (1:1)", 3072, 3072),
("4096x4096 (1:1)", 4096, 4096), ("4096x4096 (1:1)", 4096, 4096),
("Custom", None, None), ("Custom", None, None),
] ]

View File

@ -134,6 +134,13 @@ class ImageToVideoWithAudioRequest(BaseModel):
shot_type: str | None = Field(None) shot_type: str | None = Field(None)
class KlingAvatarRequest(BaseModel):
image: str = Field(...)
sound_file: str = Field(...)
prompt: str | None = Field(None)
mode: str = Field(...)
class MotionControlRequest(BaseModel): class MotionControlRequest(BaseModel):
prompt: str = Field(...) prompt: str = Field(...)
image_url: str = Field(...) image_url: str = Field(...)

View File

@ -37,6 +37,12 @@ from comfy_api_nodes.util import (
BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations" BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations"
SEEDREAM_MODELS = {
"seedream 5.0 lite": "seedream-5-0-260128",
"seedream-4-5-251128": "seedream-4-5-251128",
"seedream-4-0-250828": "seedream-4-0-250828",
}
# Long-running tasks endpoints(e.g., video) # Long-running tasks endpoints(e.g., video)
BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"
BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id} BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id}
@ -180,14 +186,13 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
def define_schema(cls): def define_schema(cls):
return IO.Schema( return IO.Schema(
node_id="ByteDanceSeedreamNode", node_id="ByteDanceSeedreamNode",
display_name="ByteDance Seedream 4.5", display_name="ByteDance Seedream 5.0",
category="api node/image/ByteDance", category="api node/image/ByteDance",
description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.", description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input(
"model", "model",
options=["seedream-4-5-251128", "seedream-4-0-250828"], options=list(SEEDREAM_MODELS.keys()),
tooltip="Model name",
), ),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
@ -198,7 +203,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
IO.Image.Input( IO.Image.Input(
"image", "image",
tooltip="Input image(s) for image-to-image generation. " tooltip="Input image(s) for image-to-image generation. "
"List of 1-10 images for single or multi-reference generation.", "Reference image(s) for single or multi-reference generation.",
optional=True, optional=True,
), ),
IO.Combo.Input( IO.Combo.Input(
@ -210,8 +215,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
"width", "width",
default=2048, default=2048,
min=1024, min=1024,
max=4096, max=6240,
step=8, step=2,
tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`", tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
optional=True, optional=True,
), ),
@ -219,8 +224,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
"height", "height",
default=2048, default=2048,
min=1024, min=1024,
max=4096, max=4992,
step=8, step=2,
tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`", tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
optional=True, optional=True,
), ),
@ -283,7 +288,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
depends_on=IO.PriceBadgeDepends(widgets=["model"]), depends_on=IO.PriceBadgeDepends(widgets=["model"]),
expr=""" expr="""
( (
$price := $contains(widgets.model, "seedream-4-5-251128") ? 0.04 : 0.03; $price := $contains(widgets.model, "5.0 lite") ? 0.035 :
$contains(widgets.model, "4-5") ? 0.04 : 0.03;
{ {
"type":"usd", "type":"usd",
"usd": $price, "usd": $price,
@ -309,6 +315,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
watermark: bool = False, watermark: bool = False,
fail_on_partial: bool = True, fail_on_partial: bool = True,
) -> IO.NodeOutput: ) -> IO.NodeOutput:
model = SEEDREAM_MODELS[model]
validate_string(prompt, strip_whitespace=True, min_length=1) validate_string(prompt, strip_whitespace=True, min_length=1)
w = h = None w = h = None
for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4: for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
@ -318,15 +325,12 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
if w is None or h is None: if w is None or h is None:
w, h = width, height w, h = width, height
if not (1024 <= w <= 4096) or not (1024 <= h <= 4096):
raise ValueError(
f"Custom size out of range: {w}x{h}. " "Both width and height must be between 1024 and 4096 pixels."
)
out_num_pixels = w * h out_num_pixels = w * h
mp_provided = out_num_pixels / 1_000_000.0 mp_provided = out_num_pixels / 1_000_000.0
if "seedream-4-5" in model and out_num_pixels < 3686400: if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400:
raise ValueError( raise ValueError(
f"Minimum image resolution that Seedream 4.5 can generate is 3.68MP, " f"Minimum image resolution for the selected model is 3.68MP, "
f"but {mp_provided:.2f}MP provided." f"but {mp_provided:.2f}MP provided."
) )
if "seedream-4-0" in model and out_num_pixels < 921600: if "seedream-4-0" in model and out_num_pixels < 921600:
@ -334,9 +338,18 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
f"Minimum image resolution that the selected model can generate is 0.92MP, " f"Minimum image resolution that the selected model can generate is 0.92MP, "
f"but {mp_provided:.2f}MP provided." f"but {mp_provided:.2f}MP provided."
) )
max_pixels = 10_404_496 if "seedream-5-0" in model else 16_777_216
if out_num_pixels > max_pixels:
raise ValueError(
f"Maximum image resolution for the selected model is {max_pixels / 1_000_000:.2f}MP, "
f"but {mp_provided:.2f}MP provided."
)
n_input_images = get_number_of_images(image) if image is not None else 0 n_input_images = get_number_of_images(image) if image is not None else 0
if n_input_images > 10: max_num_of_images = 14 if model == "seedream-5-0-260128" else 10
raise ValueError(f"Maximum of 10 reference images are supported, but {n_input_images} received.") if n_input_images > max_num_of_images:
raise ValueError(
f"Maximum of {max_num_of_images} reference images are supported, but {n_input_images} received."
)
if sequential_image_generation == "auto" and n_input_images + max_images > 15: if sequential_image_generation == "auto" and n_input_images + max_images > 15:
raise ValueError( raise ValueError(
"The maximum number of generated images plus the number of reference images cannot exceed 15." "The maximum number of generated images plus the number of reference images cannot exceed 15."
@ -364,6 +377,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
sequential_image_generation=sequential_image_generation, sequential_image_generation=sequential_image_generation,
sequential_image_generation_options=Seedream4Options(max_images=max_images), sequential_image_generation_options=Seedream4Options(max_images=max_images),
watermark=watermark, watermark=watermark,
output_format="png" if model == "seedream-5-0-260128" else None,
), ),
) )
if len(response.data) == 1: if len(response.data) == 1:

View File

@ -50,6 +50,7 @@ from comfy_api_nodes.apis import (
) )
from comfy_api_nodes.apis.kling import ( from comfy_api_nodes.apis.kling import (
ImageToVideoWithAudioRequest, ImageToVideoWithAudioRequest,
KlingAvatarRequest,
MotionControlRequest, MotionControlRequest,
MultiPromptEntry, MultiPromptEntry,
OmniImageParamImage, OmniImageParamImage,
@ -74,6 +75,7 @@ from comfy_api_nodes.util import (
upload_image_to_comfyapi, upload_image_to_comfyapi,
upload_images_to_comfyapi, upload_images_to_comfyapi,
upload_video_to_comfyapi, upload_video_to_comfyapi,
validate_audio_duration,
validate_image_aspect_ratio, validate_image_aspect_ratio,
validate_image_dimensions, validate_image_dimensions,
validate_string, validate_string,
@ -3139,6 +3141,103 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
class KlingAvatarNode(IO.ComfyNode):
@classmethod
def define_schema(cls) -> IO.Schema:
return IO.Schema(
node_id="KlingAvatarNode",
display_name="Kling Avatar 2.0",
category="api node/video/Kling",
description="Generate broadcast-style digital human videos from a single photo and an audio file.",
inputs=[
IO.Image.Input(
"image",
tooltip="Avatar reference image. "
"Width and height must be at least 300px. Aspect ratio must be between 1:2.5 and 2.5:1.",
),
IO.Audio.Input(
"sound_file",
tooltip="Audio input. Must be between 2 and 300 seconds in duration.",
),
IO.Combo.Input("mode", options=["std", "pro"]),
IO.String.Input(
"prompt",
multiline=True,
default="",
optional=True,
tooltip="Optional prompt to define avatar actions, emotions, and camera movements.",
),
IO.Int.Input(
"seed",
default=0,
min=0,
max=2147483647,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
tooltip="Seed controls whether the node should re-run; "
"results are non-deterministic regardless of seed.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["mode"]),
expr="""
(
$prices := {"std": 0.056, "pro": 0.112};
{"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}}
)
""",
),
)
@classmethod
async def execute(
cls,
image: Input.Image,
sound_file: Input.Audio,
mode: str,
seed: int,
prompt: str = "",
) -> IO.NodeOutput:
validate_image_dimensions(image, min_width=300, min_height=300)
validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1))
validate_audio_duration(sound_file, min_duration=2, max_duration=300)
response = await sync_op(
cls,
ApiEndpoint(path="/proxy/kling/v1/videos/avatar/image2video", method="POST"),
response_model=TaskStatusResponse,
data=KlingAvatarRequest(
image=await upload_image_to_comfyapi(cls, image),
sound_file=await upload_audio_to_comfyapi(
cls, sound_file, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
),
prompt=prompt or None,
mode=mode,
),
)
if response.code:
raise RuntimeError(
f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
)
final_response = await poll_op(
cls,
ApiEndpoint(path=f"/proxy/kling/v1/videos/avatar/image2video/{response.data.task_id}"),
response_model=TaskStatusResponse,
status_extractor=lambda r: (r.data.task_status if r.data else None),
max_poll_attempts=800,
)
return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
class KlingExtension(ComfyExtension): class KlingExtension(ComfyExtension):
@override @override
async def get_node_list(self) -> list[type[IO.ComfyNode]]: async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -3167,6 +3266,7 @@ class KlingExtension(ComfyExtension):
MotionControl, MotionControl,
KlingVideoNode, KlingVideoNode,
KlingFirstLastFrameNode, KlingFirstLastFrameNode,
KlingAvatarNode,
] ]

View File

@ -6,6 +6,7 @@ import folder_paths
import json import json
import os import os
import re import re
import math
import torch import torch
import comfy.utils import comfy.utils
@ -682,6 +683,172 @@ class ImageScaleToMaxDimension(IO.ComfyNode):
upscale = execute # TODO: remove upscale = execute # TODO: remove
class SplitImageToTileList(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="SplitImageToTileList",
category="image/batch",
search_aliases=["split image", "tile image", "slice image"],
display_name="Split Image into List of Tiles",
description="Splits an image into a batched list of tiles with a specified overlap.",
inputs=[
IO.Image.Input("image"),
IO.Int.Input("tile_width", default=1024, min=64, max=MAX_RESOLUTION),
IO.Int.Input("tile_height", default=1024, min=64, max=MAX_RESOLUTION),
IO.Int.Input("overlap", default=128, min=0, max=4096),
],
outputs=[
IO.Image.Output(is_output_list=True),
],
)
@staticmethod
def get_grid_coords(width, height, tile_width, tile_height, overlap):
coords = []
stride_x = max(1, tile_width - overlap)
stride_y = max(1, tile_height - overlap)
y = 0
while y < height:
x = 0
y_end = min(y + tile_height, height)
y_start = max(0, y_end - tile_height)
while x < width:
x_end = min(x + tile_width, width)
x_start = max(0, x_end - tile_width)
coords.append((x_start, y_start, x_end, y_end))
if x_end >= width:
break
x += stride_x
if y_end >= height:
break
y += stride_y
return coords
@classmethod
def execute(cls, image, tile_width, tile_height, overlap):
b, h, w, c = image.shape
coords = cls.get_grid_coords(w, h, tile_width, tile_height, overlap)
output_list = []
for (x_start, y_start, x_end, y_end) in coords:
tile = image[:, y_start:y_end, x_start:x_end, :]
output_list.append(tile)
return IO.NodeOutput(output_list)
class ImageMergeTileList(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="ImageMergeTileList",
display_name="Merge List of Tiles to Image",
category="image/batch",
search_aliases=["split image", "tile image", "slice image"],
is_input_list=True,
inputs=[
IO.Image.Input("image_list"),
IO.Int.Input("final_width", default=1024, min=64, max=32768),
IO.Int.Input("final_height", default=1024, min=64, max=32768),
IO.Int.Input("overlap", default=128, min=0, max=4096),
],
outputs=[
IO.Image.Output(is_output_list=False),
],
)
@staticmethod
def get_grid_coords(width, height, tile_width, tile_height, overlap):
coords = []
stride_x = max(1, tile_width - overlap)
stride_y = max(1, tile_height - overlap)
y = 0
while y < height:
x = 0
y_end = min(y + tile_height, height)
y_start = max(0, y_end - tile_height)
while x < width:
x_end = min(x + tile_width, width)
x_start = max(0, x_end - tile_width)
coords.append((x_start, y_start, x_end, y_end))
if x_end >= width:
break
x += stride_x
if y_end >= height:
break
y += stride_y
return coords
@classmethod
def execute(cls, image_list, final_width, final_height, overlap):
w = final_width[0]
h = final_height[0]
ovlp = overlap[0]
feather_str = 1.0
first_tile = image_list[0]
b, t_h, t_w, c = first_tile.shape
device = first_tile.device
dtype = first_tile.dtype
coords = cls.get_grid_coords(w, h, t_w, t_h, ovlp)
canvas = torch.zeros((b, h, w, c), device=device, dtype=dtype)
weights = torch.zeros((b, h, w, 1), device=device, dtype=dtype)
if ovlp > 0:
y_w = torch.sin(math.pi * torch.linspace(0, 1, t_h, device=device, dtype=dtype))
x_w = torch.sin(math.pi * torch.linspace(0, 1, t_w, device=device, dtype=dtype))
y_w = torch.clamp(y_w, min=1e-5)
x_w = torch.clamp(x_w, min=1e-5)
sine_mask = (y_w.unsqueeze(1) * x_w.unsqueeze(0)).unsqueeze(0).unsqueeze(-1)
flat_mask = torch.ones_like(sine_mask)
weight_mask = torch.lerp(flat_mask, sine_mask, feather_str)
else:
weight_mask = torch.ones((1, t_h, t_w, 1), device=device, dtype=dtype)
for i, (x_start, y_start, x_end, y_end) in enumerate(coords):
if i >= len(image_list):
break
tile = image_list[i]
region_h = y_end - y_start
region_w = x_end - x_start
real_h = min(region_h, tile.shape[1])
real_w = min(region_w, tile.shape[2])
y_end_actual = y_start + real_h
x_end_actual = x_start + real_w
tile_crop = tile[:, :real_h, :real_w, :]
mask_crop = weight_mask[:, :real_h, :real_w, :]
canvas[:, y_start:y_end_actual, x_start:x_end_actual, :] += tile_crop * mask_crop
weights[:, y_start:y_end_actual, x_start:x_end_actual, :] += mask_crop
weights[weights == 0] = 1.0
merged_image = canvas / weights
return IO.NodeOutput(merged_image)
class ImagesExtension(ComfyExtension): class ImagesExtension(ComfyExtension):
@override @override
async def get_node_list(self) -> list[type[IO.ComfyNode]]: async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@ -701,6 +868,8 @@ class ImagesExtension(ComfyExtension):
ImageRotate, ImageRotate,
ImageFlip, ImageFlip,
ImageScaleToMaxDimension, ImageScaleToMaxDimension,
SplitImageToTileList,
ImageMergeTileList,
] ]

View File

@ -25,7 +25,7 @@ class TorchCompileModel(io.ComfyNode):
@classmethod @classmethod
def execute(cls, model, backend) -> io.NodeOutput: def execute(cls, model, backend) -> io.NodeOutput:
m = model.clone() m = model.clone(disable_dynamic=True)
set_torch_compile_wrapper(model=m, backend=backend, options={"guard_filter_fn": skip_torch_compile_dict}) set_torch_compile_wrapper(model=m, backend=backend, options={"guard_filter_fn": skip_torch_compile_dict})
return io.NodeOutput(m) return io.NodeOutput(m)

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.14.1" __version__ = "0.15.0"

View File

@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.14.1" version = "0.15.0"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.10" requires-python = ">=3.10"

View File

@ -1,6 +1,6 @@
comfyui-frontend-package==1.39.14 comfyui-frontend-package==1.39.16
comfyui-workflow-templates==0.8.43 comfyui-workflow-templates==0.9.3
comfyui-embedded-docs==0.4.1 comfyui-embedded-docs==0.4.3
torch torch
torchsde torchsde
torchvision torchvision
@ -22,7 +22,7 @@ alembic
SQLAlchemy SQLAlchemy
av>=14.2.0 av>=14.2.0
comfy-kitchen>=0.2.7 comfy-kitchen>=0.2.7
comfy-aimdo>=0.2.0 comfy-aimdo>=0.2.1
requests requests
#non essential dependencies: #non essential dependencies: