Merge branch 'comfyanonymous:master' into master

This commit is contained in:
patientx 2025-12-17 12:48:51 +03:00 committed by GitHub
commit e5ba0a4f58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 125 additions and 72 deletions

View File

@ -1618,6 +1618,17 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
x = x + sde_noise * sigmas[i + 1] * s_noise x = x + sde_noise * sigmas[i + 1] * s_noise
return x return x
@torch.no_grad()
def sample_exp_heun_2_x0(model, x, sigmas, extra_args=None, callback=None, disable=None, solver_type="phi_2"):
"""Deterministic exponential Heun second order method in data prediction (x0) and logSNR time."""
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None, r=1.0, solver_type=solver_type)
@torch.no_grad()
def sample_exp_heun_2_x0_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type="phi_2"):
"""Stochastic exponential Heun second order method in data prediction (x0) and logSNR time."""
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=1.0, solver_type=solver_type)
@torch.no_grad() @torch.no_grad()
def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3): def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):

View File

@ -363,6 +363,9 @@ class QwenImageTransformer2DModel(nn.Module):
for _ in range(num_layers) for _ in range(num_layers)
]) ])
if self.default_ref_method == "index_timestep_zero":
self.register_buffer("__index_timestep_zero__", torch.tensor([]))
if final_layer: if final_layer:
self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations) self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device) self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device)

View File

@ -259,7 +259,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["nerf_tile_size"] = 512 dit_config["nerf_tile_size"] = 512
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear" dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
dit_config["nerf_embedder_dtype"] = torch.float32 dit_config["nerf_embedder_dtype"] = torch.float32
if "__x0__" in state_dict_keys: # x0 pred if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
dit_config["use_x0"] = True dit_config["use_x0"] = True
else: else:
dit_config["use_x0"] = False dit_config["use_x0"] = False
@ -618,6 +618,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["image_model"] = "qwen_image" dit_config["image_model"] = "qwen_image"
dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1] dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1]
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.') dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.')
if "{}__index_timestep_zero__".format(key_prefix) in state_dict_keys: # 2511
dit_config["default_ref_method"] = "index_timestep_zero"
return dit_config return dit_config
if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5 if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5

View File

@ -720,7 +720,7 @@ class Sampler:
sigma = float(sigmas[0]) sigma = float(sigmas[0])
return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma
KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2","dpm_2", "dpm_2_ancestral", KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2", "exp_heun_2_x0", "exp_heun_2_x0_sde", "dpm_2", "dpm_2_ancestral",
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
"dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_2m_sde_heun", "dpmpp_2m_sde_heun_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_2m_sde_heun", "dpmpp_2m_sde_heun_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
"ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp", "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",

View File

@ -1,7 +1,5 @@
import re import re
from typing import Optional
import torch
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing_extensions import override from typing_extensions import override
@ -21,26 +19,26 @@ from comfy_api_nodes.util import (
class Text2ImageInputField(BaseModel): class Text2ImageInputField(BaseModel):
prompt: str = Field(...) prompt: str = Field(...)
negative_prompt: Optional[str] = Field(None) negative_prompt: str | None = Field(None)
class Image2ImageInputField(BaseModel): class Image2ImageInputField(BaseModel):
prompt: str = Field(...) prompt: str = Field(...)
negative_prompt: Optional[str] = Field(None) negative_prompt: str | None = Field(None)
images: list[str] = Field(..., min_length=1, max_length=2) images: list[str] = Field(..., min_length=1, max_length=2)
class Text2VideoInputField(BaseModel): class Text2VideoInputField(BaseModel):
prompt: str = Field(...) prompt: str = Field(...)
negative_prompt: Optional[str] = Field(None) negative_prompt: str | None = Field(None)
audio_url: Optional[str] = Field(None) audio_url: str | None = Field(None)
class Image2VideoInputField(BaseModel): class Image2VideoInputField(BaseModel):
prompt: str = Field(...) prompt: str = Field(...)
negative_prompt: Optional[str] = Field(None) negative_prompt: str | None = Field(None)
img_url: str = Field(...) img_url: str = Field(...)
audio_url: Optional[str] = Field(None) audio_url: str | None = Field(None)
class Txt2ImageParametersField(BaseModel): class Txt2ImageParametersField(BaseModel):
@ -52,7 +50,7 @@ class Txt2ImageParametersField(BaseModel):
class Image2ImageParametersField(BaseModel): class Image2ImageParametersField(BaseModel):
size: Optional[str] = Field(None) size: str | None = Field(None)
n: int = Field(1, description="Number of images to generate.") # we support only value=1 n: int = Field(1, description="Number of images to generate.") # we support only value=1
seed: int = Field(..., ge=0, le=2147483647) seed: int = Field(..., ge=0, le=2147483647)
watermark: bool = Field(True) watermark: bool = Field(True)
@ -61,19 +59,21 @@ class Image2ImageParametersField(BaseModel):
class Text2VideoParametersField(BaseModel): class Text2VideoParametersField(BaseModel):
size: str = Field(...) size: str = Field(...)
seed: int = Field(..., ge=0, le=2147483647) seed: int = Field(..., ge=0, le=2147483647)
duration: int = Field(5, ge=5, le=10) duration: int = Field(5, ge=5, le=15)
prompt_extend: bool = Field(True) prompt_extend: bool = Field(True)
watermark: bool = Field(True) watermark: bool = Field(True)
audio: bool = Field(False, description="Should be audio generated automatically") audio: bool = Field(False, description="Whether to generate audio automatically.")
shot_type: str = Field("single")
class Image2VideoParametersField(BaseModel): class Image2VideoParametersField(BaseModel):
resolution: str = Field(...) resolution: str = Field(...)
seed: int = Field(..., ge=0, le=2147483647) seed: int = Field(..., ge=0, le=2147483647)
duration: int = Field(5, ge=5, le=10) duration: int = Field(5, ge=5, le=15)
prompt_extend: bool = Field(True) prompt_extend: bool = Field(True)
watermark: bool = Field(True) watermark: bool = Field(True)
audio: bool = Field(False, description="Should be audio generated automatically") audio: bool = Field(False, description="Whether to generate audio automatically.")
shot_type: str = Field("single")
class Text2ImageTaskCreationRequest(BaseModel): class Text2ImageTaskCreationRequest(BaseModel):
@ -106,39 +106,39 @@ class TaskCreationOutputField(BaseModel):
class TaskCreationResponse(BaseModel): class TaskCreationResponse(BaseModel):
output: Optional[TaskCreationOutputField] = Field(None) output: TaskCreationOutputField | None = Field(None)
request_id: str = Field(...) request_id: str = Field(...)
code: Optional[str] = Field(None, description="The error code of the failed request.") code: str | None = Field(None, description="Error code for the failed request.")
message: Optional[str] = Field(None, description="Details of the failed request.") message: str | None = Field(None, description="Details about the failed request.")
class TaskResult(BaseModel): class TaskResult(BaseModel):
url: Optional[str] = Field(None) url: str | None = Field(None)
code: Optional[str] = Field(None) code: str | None = Field(None)
message: Optional[str] = Field(None) message: str | None = Field(None)
class ImageTaskStatusOutputField(TaskCreationOutputField): class ImageTaskStatusOutputField(TaskCreationOutputField):
task_id: str = Field(...) task_id: str = Field(...)
task_status: str = Field(...) task_status: str = Field(...)
results: Optional[list[TaskResult]] = Field(None) results: list[TaskResult] | None = Field(None)
class VideoTaskStatusOutputField(TaskCreationOutputField): class VideoTaskStatusOutputField(TaskCreationOutputField):
task_id: str = Field(...) task_id: str = Field(...)
task_status: str = Field(...) task_status: str = Field(...)
video_url: Optional[str] = Field(None) video_url: str | None = Field(None)
code: Optional[str] = Field(None) code: str | None = Field(None)
message: Optional[str] = Field(None) message: str | None = Field(None)
class ImageTaskStatusResponse(BaseModel): class ImageTaskStatusResponse(BaseModel):
output: Optional[ImageTaskStatusOutputField] = Field(None) output: ImageTaskStatusOutputField | None = Field(None)
request_id: str = Field(...) request_id: str = Field(...)
class VideoTaskStatusResponse(BaseModel): class VideoTaskStatusResponse(BaseModel):
output: Optional[VideoTaskStatusOutputField] = Field(None) output: VideoTaskStatusOutputField | None = Field(None)
request_id: str = Field(...) request_id: str = Field(...)
@ -152,7 +152,7 @@ class WanTextToImageApi(IO.ComfyNode):
node_id="WanTextToImageApi", node_id="WanTextToImageApi",
display_name="Wan Text to Image", display_name="Wan Text to Image",
category="api node/image/Wan", category="api node/image/Wan",
description="Generates image based on text prompt.", description="Generates an image based on a text prompt.",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input(
"model", "model",
@ -164,13 +164,13 @@ class WanTextToImageApi(IO.ComfyNode):
"prompt", "prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
), ),
IO.String.Input( IO.String.Input(
"negative_prompt", "negative_prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Negative text prompt to guide what to avoid.", tooltip="Negative prompt describing what to avoid.",
optional=True, optional=True,
), ),
IO.Int.Input( IO.Int.Input(
@ -209,7 +209,7 @@ class WanTextToImageApi(IO.ComfyNode):
IO.Boolean.Input( IO.Boolean.Input(
"watermark", "watermark",
default=True, default=True,
tooltip='Whether to add an "AI generated" watermark to the result.', tooltip="Whether to add an AI-generated watermark to the result.",
optional=True, optional=True,
), ),
], ],
@ -252,7 +252,7 @@ class WanTextToImageApi(IO.ComfyNode):
), ),
) )
if not initial_response.output: if not initial_response.output:
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
response = await poll_op( response = await poll_op(
cls, cls,
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@ -272,7 +272,7 @@ class WanImageToImageApi(IO.ComfyNode):
display_name="Wan Image to Image", display_name="Wan Image to Image",
category="api node/image/Wan", category="api node/image/Wan",
description="Generates an image from one or two input images and a text prompt. " description="Generates an image from one or two input images and a text prompt. "
"The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).", "The output image is currently fixed at 1.6 MP, and its aspect ratio matches the input image(s).",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input(
"model", "model",
@ -282,19 +282,19 @@ class WanImageToImageApi(IO.ComfyNode):
), ),
IO.Image.Input( IO.Image.Input(
"image", "image",
tooltip="Single-image editing or multi-image fusion, maximum 2 images.", tooltip="Single-image editing or multi-image fusion. Maximum 2 images.",
), ),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
), ),
IO.String.Input( IO.String.Input(
"negative_prompt", "negative_prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Negative text prompt to guide what to avoid.", tooltip="Negative prompt describing what to avoid.",
optional=True, optional=True,
), ),
# redo this later as an optional combo of recommended resolutions # redo this later as an optional combo of recommended resolutions
@ -328,7 +328,7 @@ class WanImageToImageApi(IO.ComfyNode):
IO.Boolean.Input( IO.Boolean.Input(
"watermark", "watermark",
default=True, default=True,
tooltip='Whether to add an "AI generated" watermark to the result.', tooltip="Whether to add an AI-generated watermark to the result.",
optional=True, optional=True,
), ),
], ],
@ -347,7 +347,7 @@ class WanImageToImageApi(IO.ComfyNode):
async def execute( async def execute(
cls, cls,
model: str, model: str,
image: torch.Tensor, image: Input.Image,
prompt: str, prompt: str,
negative_prompt: str = "", negative_prompt: str = "",
# width: int = 1024, # width: int = 1024,
@ -357,7 +357,7 @@ class WanImageToImageApi(IO.ComfyNode):
): ):
n_images = get_number_of_images(image) n_images = get_number_of_images(image)
if n_images not in (1, 2): if n_images not in (1, 2):
raise ValueError(f"Expected 1 or 2 input images, got {n_images}.") raise ValueError(f"Expected 1 or 2 input images, but got {n_images}.")
images = [] images = []
for i in image: for i in image:
images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096)) images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096))
@ -376,7 +376,7 @@ class WanImageToImageApi(IO.ComfyNode):
), ),
) )
if not initial_response.output: if not initial_response.output:
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
response = await poll_op( response = await poll_op(
cls, cls,
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@ -395,25 +395,25 @@ class WanTextToVideoApi(IO.ComfyNode):
node_id="WanTextToVideoApi", node_id="WanTextToVideoApi",
display_name="Wan Text to Video", display_name="Wan Text to Video",
category="api node/video/Wan", category="api node/video/Wan",
description="Generates video based on text prompt.", description="Generates a video based on a text prompt.",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input(
"model", "model",
options=["wan2.5-t2v-preview"], options=["wan2.5-t2v-preview", "wan2.6-t2v"],
default="wan2.5-t2v-preview", default="wan2.6-t2v",
tooltip="Model to use.", tooltip="Model to use.",
), ),
IO.String.Input( IO.String.Input(
"prompt", "prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
), ),
IO.String.Input( IO.String.Input(
"negative_prompt", "negative_prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Negative text prompt to guide what to avoid.", tooltip="Negative prompt describing what to avoid.",
optional=True, optional=True,
), ),
IO.Combo.Input( IO.Combo.Input(
@ -433,23 +433,23 @@ class WanTextToVideoApi(IO.ComfyNode):
"1080p: 4:3 (1632x1248)", "1080p: 4:3 (1632x1248)",
"1080p: 3:4 (1248x1632)", "1080p: 3:4 (1248x1632)",
], ],
default="480p: 1:1 (624x624)", default="720p: 1:1 (960x960)",
optional=True, optional=True,
), ),
IO.Int.Input( IO.Int.Input(
"duration", "duration",
default=5, default=5,
min=5, min=5,
max=10, max=15,
step=5, step=5,
display_mode=IO.NumberDisplay.number, display_mode=IO.NumberDisplay.number,
tooltip="Available durations: 5 and 10 seconds", tooltip="A 15-second duration is available only for the Wan 2.6 model.",
optional=True, optional=True,
), ),
IO.Audio.Input( IO.Audio.Input(
"audio", "audio",
optional=True, optional=True,
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.", tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
), ),
IO.Int.Input( IO.Int.Input(
"seed", "seed",
@ -466,7 +466,7 @@ class WanTextToVideoApi(IO.ComfyNode):
"generate_audio", "generate_audio",
default=False, default=False,
optional=True, optional=True,
tooltip="If there is no audio input, generate audio automatically.", tooltip="If no audio input is provided, generate audio automatically.",
), ),
IO.Boolean.Input( IO.Boolean.Input(
"prompt_extend", "prompt_extend",
@ -477,7 +477,15 @@ class WanTextToVideoApi(IO.ComfyNode):
IO.Boolean.Input( IO.Boolean.Input(
"watermark", "watermark",
default=True, default=True,
tooltip='Whether to add an "AI generated" watermark to the result.', tooltip="Whether to add an AI-generated watermark to the result.",
optional=True,
),
IO.Combo.Input(
"shot_type",
options=["single", "multi"],
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
"single continuous shot or multiple shots with cuts. "
"This parameter takes effect only when prompt_extend is True.",
optional=True, optional=True,
), ),
], ],
@ -498,14 +506,19 @@ class WanTextToVideoApi(IO.ComfyNode):
model: str, model: str,
prompt: str, prompt: str,
negative_prompt: str = "", negative_prompt: str = "",
size: str = "480p: 1:1 (624x624)", size: str = "720p: 1:1 (960x960)",
duration: int = 5, duration: int = 5,
audio: Optional[Input.Audio] = None, audio: Input.Audio | None = None,
seed: int = 0, seed: int = 0,
generate_audio: bool = False, generate_audio: bool = False,
prompt_extend: bool = True, prompt_extend: bool = True,
watermark: bool = True, watermark: bool = True,
shot_type: str = "single",
): ):
if "480p" in size and model == "wan2.6-t2v":
raise ValueError("The Wan 2.6 model does not support 480p.")
if duration == 15 and model == "wan2.5-t2v-preview":
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
width, height = RES_IN_PARENS.search(size).groups() width, height = RES_IN_PARENS.search(size).groups()
audio_url = None audio_url = None
if audio is not None: if audio is not None:
@ -526,11 +539,12 @@ class WanTextToVideoApi(IO.ComfyNode):
audio=generate_audio, audio=generate_audio,
prompt_extend=prompt_extend, prompt_extend=prompt_extend,
watermark=watermark, watermark=watermark,
shot_type=shot_type,
), ),
), ),
) )
if not initial_response.output: if not initial_response.output:
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
response = await poll_op( response = await poll_op(
cls, cls,
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@ -549,12 +563,12 @@ class WanImageToVideoApi(IO.ComfyNode):
node_id="WanImageToVideoApi", node_id="WanImageToVideoApi",
display_name="Wan Image to Video", display_name="Wan Image to Video",
category="api node/video/Wan", category="api node/video/Wan",
description="Generates video based on the first frame and text prompt.", description="Generates a video from the first frame and a text prompt.",
inputs=[ inputs=[
IO.Combo.Input( IO.Combo.Input(
"model", "model",
options=["wan2.5-i2v-preview"], options=["wan2.5-i2v-preview", "wan2.6-i2v"],
default="wan2.5-i2v-preview", default="wan2.6-i2v",
tooltip="Model to use.", tooltip="Model to use.",
), ),
IO.Image.Input( IO.Image.Input(
@ -564,13 +578,13 @@ class WanImageToVideoApi(IO.ComfyNode):
"prompt", "prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.", tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
), ),
IO.String.Input( IO.String.Input(
"negative_prompt", "negative_prompt",
multiline=True, multiline=True,
default="", default="",
tooltip="Negative text prompt to guide what to avoid.", tooltip="Negative prompt describing what to avoid.",
optional=True, optional=True,
), ),
IO.Combo.Input( IO.Combo.Input(
@ -580,23 +594,23 @@ class WanImageToVideoApi(IO.ComfyNode):
"720P", "720P",
"1080P", "1080P",
], ],
default="480P", default="720P",
optional=True, optional=True,
), ),
IO.Int.Input( IO.Int.Input(
"duration", "duration",
default=5, default=5,
min=5, min=5,
max=10, max=15,
step=5, step=5,
display_mode=IO.NumberDisplay.number, display_mode=IO.NumberDisplay.number,
tooltip="Available durations: 5 and 10 seconds", tooltip="Duration 15 available only for WAN2.6 model.",
optional=True, optional=True,
), ),
IO.Audio.Input( IO.Audio.Input(
"audio", "audio",
optional=True, optional=True,
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.", tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
), ),
IO.Int.Input( IO.Int.Input(
"seed", "seed",
@ -613,7 +627,7 @@ class WanImageToVideoApi(IO.ComfyNode):
"generate_audio", "generate_audio",
default=False, default=False,
optional=True, optional=True,
tooltip="If there is no audio input, generate audio automatically.", tooltip="If no audio input is provided, generate audio automatically.",
), ),
IO.Boolean.Input( IO.Boolean.Input(
"prompt_extend", "prompt_extend",
@ -624,7 +638,15 @@ class WanImageToVideoApi(IO.ComfyNode):
IO.Boolean.Input( IO.Boolean.Input(
"watermark", "watermark",
default=True, default=True,
tooltip='Whether to add an "AI generated" watermark to the result.', tooltip="Whether to add an AI-generated watermark to the result.",
optional=True,
),
IO.Combo.Input(
"shot_type",
options=["single", "multi"],
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
"single continuous shot or multiple shots with cuts. "
"This parameter takes effect only when prompt_extend is True.",
optional=True, optional=True,
), ),
], ],
@ -643,19 +665,24 @@ class WanImageToVideoApi(IO.ComfyNode):
async def execute( async def execute(
cls, cls,
model: str, model: str,
image: torch.Tensor, image: Input.Image,
prompt: str, prompt: str,
negative_prompt: str = "", negative_prompt: str = "",
resolution: str = "480P", resolution: str = "720P",
duration: int = 5, duration: int = 5,
audio: Optional[Input.Audio] = None, audio: Input.Audio | None = None,
seed: int = 0, seed: int = 0,
generate_audio: bool = False, generate_audio: bool = False,
prompt_extend: bool = True, prompt_extend: bool = True,
watermark: bool = True, watermark: bool = True,
shot_type: str = "single",
): ):
if get_number_of_images(image) != 1: if get_number_of_images(image) != 1:
raise ValueError("Exactly one input image is required.") raise ValueError("Exactly one input image is required.")
if "480P" in resolution and model == "wan2.6-i2v":
raise ValueError("The Wan 2.6 model does not support 480P.")
if duration == 15 and model == "wan2.5-i2v-preview":
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000) image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000)
audio_url = None audio_url = None
if audio is not None: if audio is not None:
@ -677,11 +704,12 @@ class WanImageToVideoApi(IO.ComfyNode):
audio=generate_audio, audio=generate_audio,
prompt_extend=prompt_extend, prompt_extend=prompt_extend,
watermark=watermark, watermark=watermark,
shot_type=shot_type,
), ),
), ),
) )
if not initial_response.output: if not initial_response.output:
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}") raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
response = await poll_op( response = await poll_op(
cls, cls,
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"), ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),

View File

@ -671,7 +671,16 @@ class SamplerSEEDS2(io.ComfyNode):
io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"), io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"),
io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"), io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"),
], ],
outputs=[io.Sampler.Output()] outputs=[io.Sampler.Output()],
description=(
"This sampler node can represent multiple samplers:\n\n"
"seeds_2\n"
"- default setting\n\n"
"exp_heun_2_x0\n"
"- solver_type=phi_2, r=1.0, eta=0.0\n\n"
"exp_heun_2_x0_sde\n"
"- solver_type=phi_2, r=1.0, eta=1.0, s_noise=1.0"
)
) )
@classmethod @classmethod

View File

@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is # This file is automatically generated by the build process when version is
# updated in pyproject.toml. # updated in pyproject.toml.
__version__ = "0.4.0" __version__ = "0.5.0"

View File

@ -1,6 +1,6 @@
[project] [project]
name = "ComfyUI" name = "ComfyUI"
version = "0.4.0" version = "0.5.0"
readme = "README.md" readme = "README.md"
license = { file = "LICENSE" } license = { file = "LICENSE" }
requires-python = ">=3.9" requires-python = ">=3.9"