Compare commits

...

3 Commits

Author SHA1 Message Date
Alexander Piskun
393d2880dd
feat(api-nodes): added nodes for Vidu2 (#11760)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
2026-01-09 12:59:38 -08:00
Alexander Piskun
4484b93d61
fix(api-nodes): do not downscale the input image for Topaz Enhance (#11768) 2026-01-09 12:25:56 -08:00
comfyanonymous
bd0e6825e8
Be less strict when loading mixed ops weights. (#11769) 2026-01-09 14:21:06 -05:00
5 changed files with 490 additions and 152 deletions

View File

@ -546,7 +546,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
weight_key = f"{prefix}weight"
weight = state_dict.pop(weight_key, None)
if weight is None:
raise ValueError(f"Missing weight for layer {layer_name}")
logging.warning(f"Missing weight for layer {layer_name}")
return
manually_loaded_keys = [weight_key]

View File

@ -0,0 +1,41 @@
from pydantic import BaseModel, Field
class SubjectReference(BaseModel):
id: str = Field(...)
images: list[str] = Field(...)
class TaskCreationRequest(BaseModel):
model: str = Field(...)
prompt: str = Field(..., max_length=2000)
duration: int = Field(...)
seed: int = Field(..., ge=0, le=2147483647)
aspect_ratio: str | None = Field(None)
resolution: str | None = Field(None)
movement_amplitude: str | None = Field(None)
images: list[str] | None = Field(None, description="Base64 encoded string or image URL")
subjects: list[SubjectReference] | None = Field(None)
bgm: bool | None = Field(None)
audio: bool | None = Field(None)
class TaskCreationResponse(BaseModel):
task_id: str = Field(...)
state: str = Field(...)
created_at: str = Field(...)
code: int | None = Field(None, description="Error code")
class TaskResult(BaseModel):
id: str = Field(..., description="Creation id")
url: str = Field(..., description="The URL of the generated results, valid for one hour")
cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
class TaskStatusResponse(BaseModel):
state: str = Field(...)
err_code: str | None = Field(None)
progress: float | None = Field(None)
credits: int | None = Field(None)
creations: list[TaskResult] = Field(..., description="Generated results")

View File

@ -2,7 +2,6 @@ import builtins
from io import BytesIO
import aiohttp
import torch
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension, Input
@ -138,7 +137,7 @@ class TopazImageEnhance(IO.ComfyNode):
async def execute(
cls,
model: str,
image: torch.Tensor,
image: Input.Image,
prompt: str = "",
subject_detection: str = "All",
face_enhancement: bool = True,
@ -153,7 +152,9 @@ class TopazImageEnhance(IO.ComfyNode):
) -> IO.NodeOutput:
if get_number_of_images(image) != 1:
raise ValueError("Only one input image is supported.")
download_url = await upload_images_to_comfyapi(cls, image, max_images=1, mime_type="image/png")
download_url = await upload_images_to_comfyapi(
cls, image, max_images=1, mime_type="image/png", total_pixels=4096*4096
)
initial_response = await sync_op(
cls,
ApiEndpoint(path="/proxy/topaz/image/v1/enhance-gen/async", method="POST"),

View File

@ -1,12 +1,13 @@
import logging
from enum import Enum
from typing import Literal, Optional, TypeVar
import torch
from pydantic import BaseModel, Field
from typing_extensions import override
from comfy_api.latest import IO, ComfyExtension
from comfy_api.latest import IO, ComfyExtension, Input
from comfy_api_nodes.apis.vidu import (
SubjectReference,
TaskCreationRequest,
TaskCreationResponse,
TaskResult,
TaskStatusResponse,
)
from comfy_api_nodes.util import (
ApiEndpoint,
download_url_to_video_output,
@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
validate_image_aspect_ratio,
validate_image_dimensions,
validate_images_aspect_ratio_closeness,
validate_string,
)
VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
@ -25,98 +27,33 @@ VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"
R = TypeVar("R")
class VideoModelName(str, Enum):
vidu_q1 = "viduq1"
class AspectRatio(str, Enum):
r_16_9 = "16:9"
r_9_16 = "9:16"
r_1_1 = "1:1"
class Resolution(str, Enum):
r_1080p = "1080p"
class MovementAmplitude(str, Enum):
auto = "auto"
small = "small"
medium = "medium"
large = "large"
class TaskCreationRequest(BaseModel):
model: VideoModelName = VideoModelName.vidu_q1
prompt: Optional[str] = Field(None, max_length=1500)
duration: Optional[Literal[5]] = 5
seed: Optional[int] = Field(0, ge=0, le=2147483647)
aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
resolution: Optional[Resolution] = Resolution.r_1080p
movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
class TaskCreationResponse(BaseModel):
task_id: str = Field(...)
state: str = Field(...)
created_at: str = Field(...)
code: Optional[int] = Field(None, description="Error code")
class TaskResult(BaseModel):
id: str = Field(..., description="Creation id")
url: str = Field(..., description="The URL of the generated results, valid for one hour")
cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
class TaskStatusResponse(BaseModel):
state: str = Field(...)
err_code: Optional[str] = Field(None)
creations: list[TaskResult] = Field(..., description="Generated results")
def get_video_url_from_response(response) -> Optional[str]:
if response.creations:
return response.creations[0].url
return None
def get_video_from_response(response) -> TaskResult:
if not response.creations:
error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
logging.info(error_msg)
raise RuntimeError(error_msg)
logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
return response.creations[0]
async def execute_task(
cls: type[IO.ComfyNode],
vidu_endpoint: str,
payload: TaskCreationRequest,
estimated_duration: int,
) -> R:
response = await sync_op(
) -> list[TaskResult]:
task_creation_response = await sync_op(
cls,
endpoint=ApiEndpoint(path=vidu_endpoint, method="POST"),
response_model=TaskCreationResponse,
data=payload,
)
if response.state == "failed":
error_msg = f"Vidu request failed. Code: {response.code}"
logging.error(error_msg)
raise RuntimeError(error_msg)
return await poll_op(
if task_creation_response.state == "failed":
raise RuntimeError(f"Vidu request failed. Code: {task_creation_response.code}")
response = await poll_op(
cls,
ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % response.task_id),
ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % task_creation_response.task_id),
response_model=TaskStatusResponse,
status_extractor=lambda r: r.state,
estimated_duration=estimated_duration,
progress_extractor=lambda r: r.progress,
max_poll_attempts=320,
)
if not response.creations:
raise RuntimeError(
f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
)
return response.creations
class ViduTextToVideoNode(IO.ComfyNode):
@ -127,14 +64,9 @@ class ViduTextToVideoNode(IO.ComfyNode):
node_id="ViduTextToVideoNode",
display_name="Vidu Text To Video Generation",
category="api node/video/Vidu",
description="Generate video from text prompt",
description="Generate video from a text prompt",
inputs=[
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
IO.String.Input(
"prompt",
multiline=True,
@ -163,22 +95,19 @@ class ViduTextToVideoNode(IO.ComfyNode):
),
IO.Combo.Input(
"aspect_ratio",
options=AspectRatio,
default=AspectRatio.r_16_9,
options=["16:9", "9:16", "1:1"],
tooltip="The aspect ratio of the output video",
optional=True,
),
IO.Combo.Input(
"resolution",
options=Resolution,
default=Resolution.r_1080p,
options=["1080p"],
tooltip="Supported values may vary by model & duration",
optional=True,
),
IO.Combo.Input(
"movement_amplitude",
options=MovementAmplitude,
default=MovementAmplitude.auto,
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame",
optional=True,
),
@ -208,7 +137,7 @@ class ViduTextToVideoNode(IO.ComfyNode):
if not prompt:
raise ValueError("The prompt field is required and cannot be empty.")
payload = TaskCreationRequest(
model_name=model,
model=model,
prompt=prompt,
duration=duration,
seed=seed,
@ -216,8 +145,8 @@ class ViduTextToVideoNode(IO.ComfyNode):
resolution=resolution,
movement_amplitude=movement_amplitude,
)
results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload, 320)
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class ViduImageToVideoNode(IO.ComfyNode):
@ -230,12 +159,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
category="api node/video/Vidu",
description="Generate video from image and optional prompt",
inputs=[
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
IO.Image.Input(
"image",
tooltip="An image to be used as the start frame of the generated video",
@ -270,15 +194,13 @@ class ViduImageToVideoNode(IO.ComfyNode):
),
IO.Combo.Input(
"resolution",
options=Resolution,
default=Resolution.r_1080p,
options=["1080p"],
tooltip="Supported values may vary by model & duration",
optional=True,
),
IO.Combo.Input(
"movement_amplitude",
options=MovementAmplitude,
default=MovementAmplitude.auto.value,
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame",
optional=True,
),
@ -298,7 +220,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
async def execute(
cls,
model: str,
image: torch.Tensor,
image: Input.Image,
prompt: str,
duration: int,
seed: int,
@ -309,7 +231,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
raise ValueError("Only one input image is allowed.")
validate_image_aspect_ratio(image, (1, 4), (4, 1))
payload = TaskCreationRequest(
model_name=model,
model=model,
prompt=prompt,
duration=duration,
seed=seed,
@ -322,8 +244,8 @@ class ViduImageToVideoNode(IO.ComfyNode):
max_images=1,
mime_type="image/png",
)
results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload, 120)
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class ViduReferenceVideoNode(IO.ComfyNode):
@ -334,14 +256,9 @@ class ViduReferenceVideoNode(IO.ComfyNode):
node_id="ViduReferenceVideoNode",
display_name="Vidu Reference To Video Generation",
category="api node/video/Vidu",
description="Generate video from multiple images and prompt",
description="Generate video from multiple images and a prompt",
inputs=[
IO.Combo.Input(
"model",
options=VideoModelName,
default=VideoModelName.vidu_q1,
tooltip="Model name",
),
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
IO.Image.Input(
"images",
tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
@ -374,22 +291,19 @@ class ViduReferenceVideoNode(IO.ComfyNode):
),
IO.Combo.Input(
"aspect_ratio",
options=AspectRatio,
default=AspectRatio.r_16_9,
options=["16:9", "9:16", "1:1"],
tooltip="The aspect ratio of the output video",
optional=True,
),
IO.Combo.Input(
"resolution",
options=[model.value for model in Resolution],
default=Resolution.r_1080p.value,
options=["1080p"],
tooltip="Supported values may vary by model & duration",
optional=True,
),
IO.Combo.Input(
"movement_amplitude",
options=[model.value for model in MovementAmplitude],
default=MovementAmplitude.auto.value,
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame",
optional=True,
),
@ -409,7 +323,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
async def execute(
cls,
model: str,
images: torch.Tensor,
images: Input.Image,
prompt: str,
duration: int,
seed: int,
@ -426,7 +340,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
validate_image_aspect_ratio(image, (1, 4), (4, 1))
validate_image_dimensions(image, min_width=128, min_height=128)
payload = TaskCreationRequest(
model_name=model,
model=model,
prompt=prompt,
duration=duration,
seed=seed,
@ -440,8 +354,8 @@ class ViduReferenceVideoNode(IO.ComfyNode):
max_images=7,
mime_type="image/png",
)
results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload, 120)
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class ViduStartEndToVideoNode(IO.ComfyNode):
@ -454,12 +368,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
category="api node/video/Vidu",
description="Generate a video from start and end frames and a prompt",
inputs=[
IO.Combo.Input(
"model",
options=[model.value for model in VideoModelName],
default=VideoModelName.vidu_q1.value,
tooltip="Model name",
),
IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
IO.Image.Input(
"first_frame",
tooltip="Start frame",
@ -497,15 +406,13 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
),
IO.Combo.Input(
"resolution",
options=[model.value for model in Resolution],
default=Resolution.r_1080p.value,
options=["1080p"],
tooltip="Supported values may vary by model & duration",
optional=True,
),
IO.Combo.Input(
"movement_amplitude",
options=[model.value for model in MovementAmplitude],
default=MovementAmplitude.auto.value,
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame",
optional=True,
),
@ -525,8 +432,8 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
async def execute(
cls,
model: str,
first_frame: torch.Tensor,
end_frame: torch.Tensor,
first_frame: Input.Image,
end_frame: Input.Image,
prompt: str,
duration: int,
seed: int,
@ -535,7 +442,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
) -> IO.NodeOutput:
validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
payload = TaskCreationRequest(
model_name=model,
model=model,
prompt=prompt,
duration=duration,
seed=seed,
@ -546,8 +453,391 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
(await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
for frame in (first_frame, end_frame)
]
results = await execute_task(cls, VIDU_START_END_VIDEO, payload, 96)
return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class Vidu2TextToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="Vidu2TextToVideoNode",
display_name="Vidu2 Text-to-Video Generation",
category="api node/video/Vidu",
description="Generate video from a text prompt",
inputs=[
IO.Combo.Input("model", options=["viduq2"]),
IO.String.Input(
"prompt",
multiline=True,
tooltip="A textual description for video generation, with a maximum length of 2000 characters.",
),
IO.Int.Input(
"duration",
default=5,
min=1,
max=10,
step=1,
display_mode=IO.NumberDisplay.slider,
),
IO.Int.Input(
"seed",
default=1,
min=0,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
),
IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "3:4", "4:3", "1:1"]),
IO.Combo.Input("resolution", options=["720p", "1080p"]),
IO.Boolean.Input(
"background_music",
default=False,
tooltip="Whether to add background music to the generated video.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
model: str,
prompt: str,
duration: int,
seed: int,
aspect_ratio: str,
resolution: str,
background_music: bool,
) -> IO.NodeOutput:
validate_string(prompt, min_length=1, max_length=2000)
results = await execute_task(
cls,
VIDU_TEXT_TO_VIDEO,
TaskCreationRequest(
model=model,
prompt=prompt,
duration=duration,
seed=seed,
aspect_ratio=aspect_ratio,
resolution=resolution,
bgm=background_music,
),
)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class Vidu2ImageToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="Vidu2ImageToVideoNode",
display_name="Vidu2 Image-to-Video Generation",
category="api node/video/Vidu",
description="Generate a video from an image and an optional prompt.",
inputs=[
IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
IO.Image.Input(
"image",
tooltip="An image to be used as the start frame of the generated video.",
),
IO.String.Input(
"prompt",
multiline=True,
default="",
tooltip="An optional text prompt for video generation (max 2000 characters).",
),
IO.Int.Input(
"duration",
default=5,
min=1,
max=10,
step=1,
display_mode=IO.NumberDisplay.slider,
),
IO.Int.Input(
"seed",
default=1,
min=0,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
),
IO.Combo.Input(
"resolution",
options=["720p", "1080p"],
),
IO.Combo.Input(
"movement_amplitude",
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
model: str,
image: Input.Image,
prompt: str,
duration: int,
seed: int,
resolution: str,
movement_amplitude: str,
) -> IO.NodeOutput:
if get_number_of_images(image) > 1:
raise ValueError("Only one input image is allowed.")
validate_image_aspect_ratio(image, (1, 4), (4, 1))
validate_string(prompt, max_length=2000)
results = await execute_task(
cls,
VIDU_IMAGE_TO_VIDEO,
TaskCreationRequest(
model=model,
prompt=prompt,
duration=duration,
seed=seed,
resolution=resolution,
movement_amplitude=movement_amplitude,
images=await upload_images_to_comfyapi(
cls,
image,
max_images=1,
mime_type="image/png",
),
),
)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class Vidu2ReferenceVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="Vidu2ReferenceVideoNode",
display_name="Vidu2 Reference-to-Video Generation",
category="api node/video/Vidu",
description="Generate a video from multiple reference images and a prompt.",
inputs=[
IO.Combo.Input("model", options=["viduq2"]),
IO.Autogrow.Input(
"subjects",
template=IO.Autogrow.TemplateNames(
IO.Image.Input("reference_images"),
names=["subject1", "subject2", "subject3"],
min=1,
),
tooltip="For each subject, provide up to 3 reference images (7 images total across all subjects). "
"Reference them in prompts via @subject{subject_id}.",
),
IO.String.Input(
"prompt",
multiline=True,
tooltip="When enabled, the video will include generated speech and background music "
"based on the prompt.",
),
IO.Boolean.Input(
"audio",
default=False,
tooltip="When enabled video will contain generated speech and background music based on the prompt.",
),
IO.Int.Input(
"duration",
default=5,
min=1,
max=10,
step=1,
display_mode=IO.NumberDisplay.slider,
),
IO.Int.Input(
"seed",
default=1,
min=0,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
),
IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "4:3", "3:4", "1:1"]),
IO.Combo.Input("resolution", options=["720p"]),
IO.Combo.Input(
"movement_amplitude",
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
model: str,
subjects: IO.Autogrow.Type,
prompt: str,
audio: bool,
duration: int,
seed: int,
aspect_ratio: str,
resolution: str,
movement_amplitude: str,
) -> IO.NodeOutput:
validate_string(prompt, min_length=1, max_length=2000)
total_images = 0
for i in subjects:
if get_number_of_images(subjects[i]) > 3:
raise ValueError("Maximum number of images per subject is 3.")
for im in subjects[i]:
total_images += 1
validate_image_aspect_ratio(im, (1, 4), (4, 1))
validate_image_dimensions(im, min_width=128, min_height=128)
if total_images > 7:
raise ValueError("Too many reference images; the maximum allowed is 7.")
subjects_param: list[SubjectReference] = []
for i in subjects:
subjects_param.append(
SubjectReference(
id=i,
images=await upload_images_to_comfyapi(
cls,
subjects[i],
max_images=3,
mime_type="image/png",
wait_label=f"Uploading reference images for {i}",
),
),
)
payload = TaskCreationRequest(
model=model,
prompt=prompt,
audio=audio,
duration=duration,
seed=seed,
aspect_ratio=aspect_ratio,
resolution=resolution,
movement_amplitude=movement_amplitude,
subjects=subjects_param,
)
results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class Vidu2StartEndToVideoNode(IO.ComfyNode):
@classmethod
def define_schema(cls):
return IO.Schema(
node_id="Vidu2StartEndToVideoNode",
display_name="Vidu2 Start/End Frame-to-Video Generation",
category="api node/video/Vidu",
description="Generate a video from a start frame, an end frame, and a prompt.",
inputs=[
IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
IO.Image.Input("first_frame"),
IO.Image.Input("end_frame"),
IO.String.Input(
"prompt",
multiline=True,
tooltip="Prompt description (max 2000 characters).",
),
IO.Int.Input(
"duration",
default=5,
min=2,
max=8,
step=1,
display_mode=IO.NumberDisplay.slider,
),
IO.Int.Input(
"seed",
default=1,
min=0,
max=2147483647,
step=1,
display_mode=IO.NumberDisplay.number,
control_after_generate=True,
),
IO.Combo.Input("resolution", options=["720p", "1080p"]),
IO.Combo.Input(
"movement_amplitude",
options=["auto", "small", "medium", "large"],
tooltip="The movement amplitude of objects in the frame.",
),
],
outputs=[
IO.Video.Output(),
],
hidden=[
IO.Hidden.auth_token_comfy_org,
IO.Hidden.api_key_comfy_org,
IO.Hidden.unique_id,
],
is_api_node=True,
)
@classmethod
async def execute(
cls,
model: str,
first_frame: Input.Image,
end_frame: Input.Image,
prompt: str,
duration: int,
seed: int,
resolution: str,
movement_amplitude: str,
) -> IO.NodeOutput:
validate_string(prompt, max_length=2000)
if get_number_of_images(first_frame) > 1:
raise ValueError("Only one input image is allowed for `first_frame`.")
if get_number_of_images(end_frame) > 1:
raise ValueError("Only one input image is allowed for `end_frame`.")
validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
payload = TaskCreationRequest(
model=model,
prompt=prompt,
duration=duration,
seed=seed,
resolution=resolution,
movement_amplitude=movement_amplitude,
images=[
(await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
for frame in (first_frame, end_frame)
],
)
results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
return IO.NodeOutput(await download_url_to_video_output(results[0].url))
class ViduExtension(ComfyExtension):
@ -558,6 +848,10 @@ class ViduExtension(ComfyExtension):
ViduImageToVideoNode,
ViduReferenceVideoNode,
ViduStartEndToVideoNode,
Vidu2TextToVideoNode,
Vidu2ImageToVideoNode,
Vidu2ReferenceVideoNode,
Vidu2StartEndToVideoNode,
]

View File

@ -49,6 +49,7 @@ async def upload_images_to_comfyapi(
mime_type: str | None = None,
wait_label: str | None = "Uploading",
show_batch_index: bool = True,
total_pixels: int = 2048 * 2048,
) -> list[str]:
"""
Uploads images to ComfyUI API and returns download URLs.
@ -63,7 +64,7 @@ async def upload_images_to_comfyapi(
for idx in range(num_to_upload):
tensor = image[idx] if is_batch else image
img_io = tensor_to_bytesio(tensor, mime_type=mime_type)
img_io = tensor_to_bytesio(tensor, total_pixels=total_pixels, mime_type=mime_type)
effective_label = wait_label
if wait_label and show_batch_index and num_to_upload > 1: