From 393d2880ddc6e57c0fa3b878bb50fa2901bd57e6 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Fri, 9 Jan 2026 22:59:38 +0200
Subject: [PATCH] feat(api-nodes): added nodes for Vidu2 (#11760)

---
 comfy_api_nodes/apis/vidu.py  |  41 +++
 comfy_api_nodes/nodes_vidu.py | 588 +++++++++++++++++++++++++---------
 2 files changed, 482 insertions(+), 147 deletions(-)
 create mode 100644 comfy_api_nodes/apis/vidu.py

diff --git a/comfy_api_nodes/apis/vidu.py b/comfy_api_nodes/apis/vidu.py
new file mode 100644
index 000000000..a9bb6f7ce
--- /dev/null
+++ b/comfy_api_nodes/apis/vidu.py
@@ -0,0 +1,41 @@
+from pydantic import BaseModel, Field
+
+
+class SubjectReference(BaseModel):
+    id: str = Field(...)
+    images: list[str] = Field(...)
+
+
+class TaskCreationRequest(BaseModel):
+    model: str = Field(...)
+    prompt: str = Field(..., max_length=2000)
+    duration: int = Field(...)
+    seed: int = Field(..., ge=0, le=2147483647)
+    aspect_ratio: str | None = Field(None)
+    resolution: str | None = Field(None)
+    movement_amplitude: str | None = Field(None)
+    images: list[str] | None = Field(None, description="Base64 encoded string or image URL")
+    subjects: list[SubjectReference] | None = Field(None)
+    bgm: bool | None = Field(None)
+    audio: bool | None = Field(None)
+
+
+class TaskCreationResponse(BaseModel):
+    task_id: str = Field(...)
+    state: str = Field(...)
+    created_at: str = Field(...)
+    code: int | None = Field(None, description="Error code")
+
+
+class TaskResult(BaseModel):
+    id: str = Field(..., description="Creation id")
+    url: str = Field(..., description="The URL of the generated results, valid for one hour")
+    cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
+
+
+class TaskStatusResponse(BaseModel):
+    state: str = Field(...)
+    err_code: str | None = Field(None)
+    progress: float | None = Field(None)
+    credits: int | None = Field(None)
+    creations: list[TaskResult] = Field(..., description="Generated results")
diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py
index 7a679f0d9..9d94ae7ad 100644
--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@@ -1,12 +1,13 @@
-import logging
-from enum import Enum
-from typing import Literal, Optional, TypeVar
-
-import torch
-from pydantic import BaseModel, Field
 from typing_extensions import override
 
-from comfy_api.latest import IO, ComfyExtension
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.vidu import (
+    SubjectReference,
+    TaskCreationRequest,
+    TaskCreationResponse,
+    TaskResult,
+    TaskStatusResponse,
+)
 from comfy_api_nodes.util import (
     ApiEndpoint,
     download_url_to_video_output,
@@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
     validate_image_aspect_ratio,
     validate_image_dimensions,
     validate_images_aspect_ratio_closeness,
+    validate_string,
 )
 
 VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
@@ -25,98 +27,33 @@ VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
 VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
 VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"
 
-R = TypeVar("R")
-
-
-class VideoModelName(str, Enum):
-    vidu_q1 = "viduq1"
-
-
-class AspectRatio(str, Enum):
-    r_16_9 = "16:9"
-    r_9_16 = "9:16"
-    r_1_1 = "1:1"
-
-
-class Resolution(str, Enum):
-    r_1080p = "1080p"
-
-
-class MovementAmplitude(str, Enum):
-    auto = "auto"
-    small = "small"
-    medium = "medium"
-    large = "large"
-
-
-class TaskCreationRequest(BaseModel):
-    model: VideoModelName = VideoModelName.vidu_q1
-    prompt: Optional[str] = Field(None, max_length=1500)
-    duration: Optional[Literal[5]] = 5
-    seed: Optional[int] = Field(0, ge=0, le=2147483647)
-    aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
-    resolution: Optional[Resolution] = Resolution.r_1080p
-    movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
-    images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
-
-
-class TaskCreationResponse(BaseModel):
-    task_id: str = Field(...)
-    state: str = Field(...)
-    created_at: str = Field(...)
-    code: Optional[int] = Field(None, description="Error code")
-
-
-class TaskResult(BaseModel):
-    id: str = Field(..., description="Creation id")
-    url: str = Field(..., description="The URL of the generated results, valid for one hour")
-    cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
-
-
-class TaskStatusResponse(BaseModel):
-    state: str = Field(...)
-    err_code: Optional[str] = Field(None)
-    creations: list[TaskResult] = Field(..., description="Generated results")
-
-
-def get_video_url_from_response(response) -> Optional[str]:
-    if response.creations:
-        return response.creations[0].url
-    return None
-
-
-def get_video_from_response(response) -> TaskResult:
-    if not response.creations:
-        error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
-        logging.info(error_msg)
-        raise RuntimeError(error_msg)
-    logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
-    return response.creations[0]
-
 
 async def execute_task(
     cls: type[IO.ComfyNode],
     vidu_endpoint: str,
     payload: TaskCreationRequest,
-    estimated_duration: int,
-) -> R:
-    response = await sync_op(
+) -> list[TaskResult]:
+    task_creation_response = await sync_op(
         cls,
         endpoint=ApiEndpoint(path=vidu_endpoint, method="POST"),
         response_model=TaskCreationResponse,
         data=payload,
     )
-    if response.state == "failed":
-        error_msg = f"Vidu request failed. Code: {response.code}"
-        logging.error(error_msg)
-        raise RuntimeError(error_msg)
-    return await poll_op(
+    if task_creation_response.state == "failed":
+        raise RuntimeError(f"Vidu request failed. Code: {task_creation_response.code}")
+    response = await poll_op(
         cls,
-        ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % response.task_id),
+        ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % task_creation_response.task_id),
         response_model=TaskStatusResponse,
         status_extractor=lambda r: r.state,
-        estimated_duration=estimated_duration,
+        progress_extractor=lambda r: r.progress,
+        max_poll_attempts=320,
     )
+    if not response.creations:
+        raise RuntimeError(
+            f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
+        )
+    return response.creations
 
 
 class ViduTextToVideoNode(IO.ComfyNode):
@@ -127,14 +64,9 @@ class ViduTextToVideoNode(IO.ComfyNode):
             node_id="ViduTextToVideoNode",
             display_name="Vidu Text To Video Generation",
             category="api node/video/Vidu",
-            description="Generate video from text prompt",
+            description="Generate video from a text prompt",
             inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
@@ -163,22 +95,19 @@ class ViduTextToVideoNode(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "aspect_ratio",
-                    options=AspectRatio,
-                    default=AspectRatio.r_16_9,
+                    options=["16:9", "9:16", "1:1"],
                     tooltip="The aspect ratio of the output video",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "resolution",
-                    options=Resolution,
-                    default=Resolution.r_1080p,
+                    options=["1080p"],
                     tooltip="Supported values may vary by model & duration",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "movement_amplitude",
-                    options=MovementAmplitude,
-                    default=MovementAmplitude.auto,
+                    options=["auto", "small", "medium", "large"],
                     tooltip="The movement amplitude of objects in the frame",
                     optional=True,
                 ),
@@ -208,7 +137,7 @@ class ViduTextToVideoNode(IO.ComfyNode):
         if not prompt:
             raise ValueError("The prompt field is required and cannot be empty.")
         payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
             prompt=prompt,
             duration=duration,
             seed=seed,
@@ -216,8 +145,8 @@ class ViduTextToVideoNode(IO.ComfyNode):
             resolution=resolution,
             movement_amplitude=movement_amplitude,
         )
-        results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload, 320)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
 
 
 class ViduImageToVideoNode(IO.ComfyNode):
@@ -230,12 +159,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
             category="api node/video/Vidu",
             description="Generate video from image and optional prompt",
             inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                 IO.Image.Input(
                     "image",
                     tooltip="An image to be used as the start frame of the generated video",
@@ -270,15 +194,13 @@ class ViduImageToVideoNode(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "resolution",
-                    options=Resolution,
-                    default=Resolution.r_1080p,
+                    options=["1080p"],
                     tooltip="Supported values may vary by model & duration",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "movement_amplitude",
-                    options=MovementAmplitude,
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                     tooltip="The movement amplitude of objects in the frame",
                     optional=True,
                 ),
@@ -298,7 +220,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
     async def execute(
         cls,
         model: str,
-        image: torch.Tensor,
+        image: Input.Image,
         prompt: str,
         duration: int,
         seed: int,
@@ -309,7 +231,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
             raise ValueError("Only one input image is allowed.")
         validate_image_aspect_ratio(image, (1, 4), (4, 1))
         payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
             prompt=prompt,
             duration=duration,
             seed=seed,
@@ -322,8 +244,8 @@ class ViduImageToVideoNode(IO.ComfyNode):
             max_images=1,
             mime_type="image/png",
         )
-        results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload, 120)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
 
 
 class ViduReferenceVideoNode(IO.ComfyNode):
@@ -334,14 +256,9 @@ class ViduReferenceVideoNode(IO.ComfyNode):
             node_id="ViduReferenceVideoNode",
             display_name="Vidu Reference To Video Generation",
             category="api node/video/Vidu",
-            description="Generate video from multiple images and prompt",
+            description="Generate video from multiple images and a prompt",
             inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                 IO.Image.Input(
                     "images",
                     tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
@@ -374,22 +291,19 @@ class ViduReferenceVideoNode(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "aspect_ratio",
-                    options=AspectRatio,
-                    default=AspectRatio.r_16_9,
+                    options=["16:9", "9:16", "1:1"],
                     tooltip="The aspect ratio of the output video",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=["1080p"],
                     tooltip="Supported values may vary by model & duration",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                     tooltip="The movement amplitude of objects in the frame",
                     optional=True,
                 ),
@@ -409,7 +323,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
     async def execute(
         cls,
         model: str,
-        images: torch.Tensor,
+        images: Input.Image,
         prompt: str,
         duration: int,
         seed: int,
@@ -426,7 +340,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
             validate_image_aspect_ratio(image, (1, 4), (4, 1))
             validate_image_dimensions(image, min_width=128, min_height=128)
         payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
             prompt=prompt,
             duration=duration,
             seed=seed,
@@ -440,8 +354,8 @@ class ViduReferenceVideoNode(IO.ComfyNode):
             max_images=7,
             mime_type="image/png",
         )
-        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload, 120)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
 
 
 class ViduStartEndToVideoNode(IO.ComfyNode):
@@ -454,12 +368,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
             category="api node/video/Vidu",
             description="Generate a video from start and end frames and a prompt",
             inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=[model.value for model in VideoModelName],
-                    default=VideoModelName.vidu_q1.value,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                 IO.Image.Input(
                     "first_frame",
                     tooltip="Start frame",
@@ -497,15 +406,13 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=["1080p"],
                     tooltip="Supported values may vary by model & duration",
                     optional=True,
                 ),
                 IO.Combo.Input(
                     "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                     tooltip="The movement amplitude of objects in the frame",
                     optional=True,
                 ),
@@ -525,8 +432,8 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
     async def execute(
         cls,
         model: str,
-        first_frame: torch.Tensor,
-        end_frame: torch.Tensor,
+        first_frame: Input.Image,
+        end_frame: Input.Image,
         prompt: str,
         duration: int,
         seed: int,
@@ -535,7 +442,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
     ) -> IO.NodeOutput:
         validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
         payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
             prompt=prompt,
             duration=duration,
             seed=seed,
@@ -546,8 +453,391 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
             (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
             for frame in (first_frame, end_frame)
         ]
-        results = await execute_task(cls, VIDU_START_END_VIDEO, payload, 96)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2TextToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2TextToVideoNode",
+            display_name="Vidu2 Text-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate video from a text prompt",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2"]),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="A textual description for video generation, with a maximum length of 2000 characters.",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "3:4", "4:3", "1:1"]),
+                IO.Combo.Input("resolution", options=["720p", "1080p"]),
+                IO.Boolean.Input(
+                    "background_music",
+                    default=False,
+                    tooltip="Whether to add background music to the generated video.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        prompt: str,
+        duration: int,
+        seed: int,
+        aspect_ratio: str,
+        resolution: str,
+        background_music: bool,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2000)
+        results = await execute_task(
+            cls,
+            VIDU_TEXT_TO_VIDEO,
+            TaskCreationRequest(
+                model=model,
+                prompt=prompt,
+                duration=duration,
+                seed=seed,
+                aspect_ratio=aspect_ratio,
+                resolution=resolution,
+                bgm=background_music,
+            ),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2ImageToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2ImageToVideoNode",
+            display_name="Vidu2 Image-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from an image and an optional prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
+                IO.Image.Input(
+                    "image",
+                    tooltip="An image to be used as the start frame of the generated video.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="An optional text prompt for video generation (max 2000 characters).",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input(
+                    "resolution",
+                    options=["720p", "1080p"],
+                ),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        image: Input.Image,
+        prompt: str,
+        duration: int,
+        seed: int,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        if get_number_of_images(image) > 1:
+            raise ValueError("Only one input image is allowed.")
+        validate_image_aspect_ratio(image, (1, 4), (4, 1))
+        validate_string(prompt, max_length=2000)
+        results = await execute_task(
+            cls,
+            VIDU_IMAGE_TO_VIDEO,
+            TaskCreationRequest(
+                model=model,
+                prompt=prompt,
+                duration=duration,
+                seed=seed,
+                resolution=resolution,
+                movement_amplitude=movement_amplitude,
+                images=await upload_images_to_comfyapi(
+                    cls,
+                    image,
+                    max_images=1,
+                    mime_type="image/png",
+                ),
+            ),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2ReferenceVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2ReferenceVideoNode",
+            display_name="Vidu2 Reference-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from multiple reference images and a prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2"]),
+                IO.Autogrow.Input(
+                    "subjects",
+                    template=IO.Autogrow.TemplateNames(
+                        IO.Image.Input("reference_images"),
+                        names=["subject1", "subject2", "subject3"],
+                        min=1,
+                    ),
+                    tooltip="For each subject, provide up to 3 reference images (7 images total across all subjects). "
+                    "Reference them in prompts via @subject{subject_id}.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="When enabled, the video will include generated speech and background music "
+                    "based on the prompt.",
+                ),
+                IO.Boolean.Input(
+                    "audio",
+                    default=False,
+                    tooltip="When enabled video will contain generated speech and background music based on the prompt.",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "4:3", "3:4", "1:1"]),
+                IO.Combo.Input("resolution", options=["720p"]),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        subjects: IO.Autogrow.Type,
+        prompt: str,
+        audio: bool,
+        duration: int,
+        seed: int,
+        aspect_ratio: str,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2000)
+        total_images = 0
+        for i in subjects:
+            if get_number_of_images(subjects[i]) > 3:
+                raise ValueError("Maximum number of images per subject is 3.")
+            for im in subjects[i]:
+                total_images += 1
+                validate_image_aspect_ratio(im, (1, 4), (4, 1))
+                validate_image_dimensions(im, min_width=128, min_height=128)
+        if total_images > 7:
+            raise ValueError("Too many reference images; the maximum allowed is 7.")
+        subjects_param: list[SubjectReference] = []
+        for i in subjects:
+            subjects_param.append(
+                SubjectReference(
+                    id=i,
+                    images=await upload_images_to_comfyapi(
+                        cls,
+                        subjects[i],
+                        max_images=3,
+                        mime_type="image/png",
+                        wait_label=f"Uploading reference images for {i}",
+                    ),
+                ),
+            )
+        payload = TaskCreationRequest(
+            model=model,
+            prompt=prompt,
+            audio=audio,
+            duration=duration,
+            seed=seed,
+            aspect_ratio=aspect_ratio,
+            resolution=resolution,
+            movement_amplitude=movement_amplitude,
+            subjects=subjects_param,
+        )
+        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2StartEndToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2StartEndToVideoNode",
+            display_name="Vidu2 Start/End Frame-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from a start frame, an end frame, and a prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
+                IO.Image.Input("first_frame"),
+                IO.Image.Input("end_frame"),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="Prompt description (max 2000 characters).",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=2,
+                    max=8,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("resolution", options=["720p", "1080p"]),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        first_frame: Input.Image,
+        end_frame: Input.Image,
+        prompt: str,
+        duration: int,
+        seed: int,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, max_length=2000)
+        if get_number_of_images(first_frame) > 1:
+            raise ValueError("Only one input image is allowed for `first_frame`.")
+        if get_number_of_images(end_frame) > 1:
+            raise ValueError("Only one input image is allowed for `end_frame`.")
+        validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
+        payload = TaskCreationRequest(
+            model=model,
+            prompt=prompt,
+            duration=duration,
+            seed=seed,
+            resolution=resolution,
+            movement_amplitude=movement_amplitude,
+            images=[
+                (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
+                for frame in (first_frame, end_frame)
+            ],
+        )
+        results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
 
 
 class ViduExtension(ComfyExtension):
@@ -558,6 +848,10 @@ class ViduExtension(ComfyExtension):
             ViduImageToVideoNode,
             ViduReferenceVideoNode,
             ViduStartEndToVideoNode,
+            Vidu2TextToVideoNode,
+            Vidu2ImageToVideoNode,
+            Vidu2ReferenceVideoNode,
+            Vidu2StartEndToVideoNode,
         ]