Merge 3988f37386 into c6238047ee

Put more details about portable in readme. (#11816 )
fix(api-nodes): use a unique name for uploading audio files (#11778 )
2026-01-27 23:00:20 +08:00 · 2026-01-12 13:45:03 +01:00 · 2026-01-11 21:11:53 -05:00 · 2026-01-11 03:07:11 -08:00 · 2026-01-10 14:40:42 -08:00 · 2026-01-10 17:31:31 -05:00
15 changed files with 518 additions and 174 deletions
--- a/README.md
+++ b/README.md
@ -183,7 +183,7 @@ Simply download, extract with [7-Zip](https://7-zip.org) or with the windows exp

 If you have trouble extracting it, right click the file -> properties -> unblock

-Update your Nvidia drivers if it doesn't start.
+The portable above currently comes with python 3.13 and pytorch cuda 13.0. Update your Nvidia drivers if it doesn't start.

 #### Alternative Downloads:

@ -212,7 +212,7 @@ Python 3.14 works but you may encounter issues with the torch compile node. The

 Python 3.13 is very well supported. If you have trouble with some custom node dependencies on 3.13 you can try 3.12

-torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch unless it is less than 2 weeks old.
+torch 2.4 and above is supported but some features might only work on newer versions. We generally recommend using the latest major version of pytorch with the latest cuda version unless it is less than 2 weeks old.

 ### Instructions:

--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -237,6 +237,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        else:
            dit_config["vec_in_dim"] = None

+        dit_config["num_heads"] = dit_config["hidden_size"] // sum(dit_config["axes_dim"])
+
        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -368,7 +368,7 @@ try:
                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                        ENABLE_PYTORCH_ATTENTION = True
                if rocm_version >= (7, 0):
-                   if any((a in arch) for a in ["gfx1201"]):
+                   if any((a in arch) for a in ["gfx1200", "gfx1201"]):
                       ENABLE_PYTORCH_ATTENTION = True
        if torch_version_numeric >= (2, 7) and rocm_version >= (6, 4):
            if any((a in arch) for a in ["gfx1200", "gfx1201", "gfx950"]):  # TODO: more arches, "gfx942" gives error on pytorch nightly 2.10 1013 rocm7.0
@ -1252,7 +1252,7 @@ def pytorch_attention_enabled():
    return ENABLE_PYTORCH_ATTENTION

 def pytorch_attention_enabled_vae():
-    if is_amd():
+    if is_amd() and not amd_min_version(device=None, min_rdna_version=4):
        return False  # enabling pytorch attention on AMD currently causes crash when doing high res
    return pytorch_attention_enabled()

--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -625,21 +625,29 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                        missing_keys.remove(key)

            def state_dict(self, *args, destination=None, prefix="", **kwargs):
-                sd = super().state_dict(*args, destination=destination, prefix=prefix, **kwargs)
-                if isinstance(self.weight, QuantizedTensor):
-                    layout_cls = self.weight._layout_cls
+                if destination is not None:
+                    sd = destination
+                else:
+                    sd = {}

-                    # Check if it's any FP8 variant (E4M3 or E5M2)
-                    if layout_cls in ("TensorCoreFP8E4M3Layout", "TensorCoreFP8E5M2Layout", "TensorCoreFP8Layout"):
-                        sd["{}weight_scale".format(prefix)] = self.weight._params.scale
-                    elif layout_cls == "TensorCoreNVFP4Layout":
-                        sd["{}weight_scale_2".format(prefix)] = self.weight._params.scale
-                        sd["{}weight_scale".format(prefix)] = self.weight._params.block_scale
+                if self.bias is not None:
+                    sd["{}bias".format(prefix)] = self.bias
+
+                if isinstance(self.weight, QuantizedTensor):
+                    sd_out = self.weight.state_dict("{}weight".format(prefix))
+                    for k in sd_out:
+                        sd[k] = sd_out[k]

                    quant_conf = {"format": self.quant_format}
                    if self._full_precision_mm_config:
                        quant_conf["full_precision_matrix_mult"] = True
                    sd["{}comfy_quant".format(prefix)] = torch.tensor(list(json.dumps(quant_conf).encode('utf-8')), dtype=torch.uint8)
+
+                    input_scale = getattr(self, 'input_scale', None)
+                    if input_scale is not None:
+                        sd["{}input_scale".format(prefix)] = input_scale
+                else:
+                    sd["{}weight".format(prefix)] = self.weight
                return sd

            def _forward(self, input, weight, bias):
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -1059,9 +1059,9 @@ def detect_te_model(sd):
        return TEModel.JINA_CLIP_2
    if "encoder.block.23.layer.1.DenseReluDense.wi_1.weight" in sd:
        weight = sd["encoder.block.23.layer.1.DenseReluDense.wi_1.weight"]
-        if weight.shape[-1] == 4096:
+        if weight.shape[0] == 10240:
            return TEModel.T5_XXL
-        elif weight.shape[-1] == 2048:
+        elif weight.shape[0] == 5120:
            return TEModel.T5_XL
    if 'encoder.block.23.layer.1.DenseReluDense.wi.weight' in sd:
        return TEModel.T5_XXL_OLD
--- a/comfy/text_encoders/cosmos.py
+++ b/comfy/text_encoders/cosmos.py
@ -36,7 +36,7 @@ def te(dtype_t5=None, t5_quantization_metadata=None):
            if t5_quantization_metadata is not None:
                model_options = model_options.copy()
                model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                dtype = dtype_t5
            super().__init__(device=device, dtype=dtype, model_options=model_options)
    return CosmosTEModel_
--- a/comfy/text_encoders/genmo.py
+++ b/comfy/text_encoders/genmo.py
@ -32,7 +32,7 @@ def mochi_te(dtype_t5=None, t5_quantization_metadata=None):
            if t5_quantization_metadata is not None:
                model_options = model_options.copy()
                model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                dtype = dtype_t5
            super().__init__(device=device, dtype=dtype, model_options=model_options)
    return MochiTEModel_
--- a/comfy/text_encoders/pixart_t5.py
+++ b/comfy/text_encoders/pixart_t5.py
@ -36,7 +36,7 @@ def pixart_te(dtype_t5=None, t5_quantization_metadata=None):
            if t5_quantization_metadata is not None:
                model_options = model_options.copy()
                model_options["t5xxl_quantization_metadata"] = t5_quantization_metadata
-            if dtype is None:
+            if dtype_t5 is not None:
                dtype = dtype_t5
            super().__init__(device=device, dtype=dtype, model_options=model_options)
    return PixArtTEModel_
--- a/comfy_api_nodes/apis/vidu.py
+++ b/comfy_api_nodes/apis/vidu.py
@ -0,0 +1,41 @@
+from pydantic import BaseModel, Field
+
+
+class SubjectReference(BaseModel):
+    id: str = Field(...)
+    images: list[str] = Field(...)
+
+
+class TaskCreationRequest(BaseModel):
+    model: str = Field(...)
+    prompt: str = Field(..., max_length=2000)
+    duration: int = Field(...)
+    seed: int = Field(..., ge=0, le=2147483647)
+    aspect_ratio: str | None = Field(None)
+    resolution: str | None = Field(None)
+    movement_amplitude: str | None = Field(None)
+    images: list[str] | None = Field(None, description="Base64 encoded string or image URL")
+    subjects: list[SubjectReference] | None = Field(None)
+    bgm: bool | None = Field(None)
+    audio: bool | None = Field(None)
+
+
+class TaskCreationResponse(BaseModel):
+    task_id: str = Field(...)
+    state: str = Field(...)
+    created_at: str = Field(...)
+    code: int | None = Field(None, description="Error code")
+
+
+class TaskResult(BaseModel):
+    id: str = Field(..., description="Creation id")
+    url: str = Field(..., description="The URL of the generated results, valid for one hour")
+    cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
+
+
+class TaskStatusResponse(BaseModel):
+    state: str = Field(...)
+    err_code: str | None = Field(None)
+    progress: float | None = Field(None)
+    credits: int | None = Field(None)
+    creations: list[TaskResult] = Field(..., description="Generated results")
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@ -567,7 +567,7 @@ async def execute_lipsync(
    # Upload the audio file to Comfy API and get download URL
    if audio:
        audio_url = await upload_audio_to_comfyapi(
-            cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg", filename="output.mp3"
+            cls, audio, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
        )
        logging.info("Uploaded audio to Comfy API. URL: %s", audio_url)
    else:
--- a/comfy_api_nodes/nodes_vidu.py
+++ b/comfy_api_nodes/nodes_vidu.py
@ -1,12 +1,13 @@
-import logging
-from enum import Enum
-from typing import Literal, Optional, TypeVar
-
-import torch
-from pydantic import BaseModel, Field
 from typing_extensions import override

-from comfy_api.latest import IO, ComfyExtension
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.vidu import (
+    SubjectReference,
+    TaskCreationRequest,
+    TaskCreationResponse,
+    TaskResult,
+    TaskStatusResponse,
+)
 from comfy_api_nodes.util import (
    ApiEndpoint,
    download_url_to_video_output,
@ -17,6 +18,7 @@ from comfy_api_nodes.util import (
    validate_image_aspect_ratio,
    validate_image_dimensions,
    validate_images_aspect_ratio_closeness,
+    validate_string,
 )

 VIDU_TEXT_TO_VIDEO = "/proxy/vidu/text2video"
@ -25,98 +27,33 @@ VIDU_REFERENCE_VIDEO = "/proxy/vidu/reference2video"
 VIDU_START_END_VIDEO = "/proxy/vidu/start-end2video"
 VIDU_GET_GENERATION_STATUS = "/proxy/vidu/tasks/%s/creations"

-R = TypeVar("R")
-
-
-class VideoModelName(str, Enum):
-    vidu_q1 = "viduq1"
-
-
-class AspectRatio(str, Enum):
-    r_16_9 = "16:9"
-    r_9_16 = "9:16"
-    r_1_1 = "1:1"
-
-
-class Resolution(str, Enum):
-    r_1080p = "1080p"
-
-
-class MovementAmplitude(str, Enum):
-    auto = "auto"
-    small = "small"
-    medium = "medium"
-    large = "large"
-
-
-class TaskCreationRequest(BaseModel):
-    model: VideoModelName = VideoModelName.vidu_q1
-    prompt: Optional[str] = Field(None, max_length=1500)
-    duration: Optional[Literal[5]] = 5
-    seed: Optional[int] = Field(0, ge=0, le=2147483647)
-    aspect_ratio: Optional[AspectRatio] = AspectRatio.r_16_9
-    resolution: Optional[Resolution] = Resolution.r_1080p
-    movement_amplitude: Optional[MovementAmplitude] = MovementAmplitude.auto
-    images: Optional[list[str]] = Field(None, description="Base64 encoded string or image URL")
-
-
-class TaskCreationResponse(BaseModel):
-    task_id: str = Field(...)
-    state: str = Field(...)
-    created_at: str = Field(...)
-    code: Optional[int] = Field(None, description="Error code")
-
-
-class TaskResult(BaseModel):
-    id: str = Field(..., description="Creation id")
-    url: str = Field(..., description="The URL of the generated results, valid for one hour")
-    cover_url: str = Field(..., description="The cover URL of the generated results, valid for one hour")
-
-
-class TaskStatusResponse(BaseModel):
-    state: str = Field(...)
-    err_code: Optional[str] = Field(None)
-    creations: list[TaskResult] = Field(..., description="Generated results")
-
-
-def get_video_url_from_response(response) -> Optional[str]:
-    if response.creations:
-        return response.creations[0].url
-    return None
-
-
-def get_video_from_response(response) -> TaskResult:
-    if not response.creations:
-        error_msg = f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
-        logging.info(error_msg)
-        raise RuntimeError(error_msg)
-    logging.info("Vidu task %s succeeded. Video URL: %s", response.creations[0].id, response.creations[0].url)
-    return response.creations[0]
-

 async def execute_task(
    cls: type[IO.ComfyNode],
    vidu_endpoint: str,
    payload: TaskCreationRequest,
-    estimated_duration: int,
-) -> R:
-    response = await sync_op(
+) -> list[TaskResult]:
+    task_creation_response = await sync_op(
        cls,
        endpoint=ApiEndpoint(path=vidu_endpoint, method="POST"),
        response_model=TaskCreationResponse,
        data=payload,
    )
-    if response.state == "failed":
-        error_msg = f"Vidu request failed. Code: {response.code}"
-        logging.error(error_msg)
-        raise RuntimeError(error_msg)
-    return await poll_op(
+    if task_creation_response.state == "failed":
+        raise RuntimeError(f"Vidu request failed. Code: {task_creation_response.code}")
+    response = await poll_op(
        cls,
-        ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % response.task_id),
+        ApiEndpoint(path=VIDU_GET_GENERATION_STATUS % task_creation_response.task_id),
        response_model=TaskStatusResponse,
        status_extractor=lambda r: r.state,
-        estimated_duration=estimated_duration,
+        progress_extractor=lambda r: r.progress,
+        max_poll_attempts=320,
    )
+    if not response.creations:
+        raise RuntimeError(
+            f"Vidu request does not contain results. State: {response.state}, Error Code: {response.err_code}"
+        )
+    return response.creations


 class ViduTextToVideoNode(IO.ComfyNode):
@ -127,14 +64,9 @@ class ViduTextToVideoNode(IO.ComfyNode):
            node_id="ViduTextToVideoNode",
            display_name="Vidu Text To Video Generation",
            category="api node/video/Vidu",
-            description="Generate video from text prompt",
+            description="Generate video from a text prompt",
            inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                IO.String.Input(
                    "prompt",
                    multiline=True,
@ -163,22 +95,19 @@ class ViduTextToVideoNode(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "aspect_ratio",
-                    options=AspectRatio,
-                    default=AspectRatio.r_16_9,
+                    options=["16:9", "9:16", "1:1"],
                    tooltip="The aspect ratio of the output video",
                    optional=True,
                ),
                IO.Combo.Input(
                    "resolution",
-                    options=Resolution,
-                    default=Resolution.r_1080p,
+                    options=["1080p"],
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
                IO.Combo.Input(
                    "movement_amplitude",
-                    options=MovementAmplitude,
-                    default=MovementAmplitude.auto,
+                    options=["auto", "small", "medium", "large"],
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
@ -208,7 +137,7 @@ class ViduTextToVideoNode(IO.ComfyNode):
        if not prompt:
            raise ValueError("The prompt field is required and cannot be empty.")
        payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
            prompt=prompt,
            duration=duration,
            seed=seed,
@ -216,8 +145,8 @@ class ViduTextToVideoNode(IO.ComfyNode):
            resolution=resolution,
            movement_amplitude=movement_amplitude,
        )
-        results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload, 320)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_TEXT_TO_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))


 class ViduImageToVideoNode(IO.ComfyNode):
@ -230,12 +159,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
            category="api node/video/Vidu",
            description="Generate video from image and optional prompt",
            inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                IO.Image.Input(
                    "image",
                    tooltip="An image to be used as the start frame of the generated video",
@ -270,15 +194,13 @@ class ViduImageToVideoNode(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "resolution",
-                    options=Resolution,
-                    default=Resolution.r_1080p,
+                    options=["1080p"],
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
                IO.Combo.Input(
                    "movement_amplitude",
-                    options=MovementAmplitude,
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
@ -298,7 +220,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
    async def execute(
        cls,
        model: str,
-        image: torch.Tensor,
+        image: Input.Image,
        prompt: str,
        duration: int,
        seed: int,
@ -309,7 +231,7 @@ class ViduImageToVideoNode(IO.ComfyNode):
            raise ValueError("Only one input image is allowed.")
        validate_image_aspect_ratio(image, (1, 4), (4, 1))
        payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
            prompt=prompt,
            duration=duration,
            seed=seed,
@ -322,8 +244,8 @@ class ViduImageToVideoNode(IO.ComfyNode):
            max_images=1,
            mime_type="image/png",
        )
-        results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload, 120)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_IMAGE_TO_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))


 class ViduReferenceVideoNode(IO.ComfyNode):
@ -334,14 +256,9 @@ class ViduReferenceVideoNode(IO.ComfyNode):
            node_id="ViduReferenceVideoNode",
            display_name="Vidu Reference To Video Generation",
            category="api node/video/Vidu",
-            description="Generate video from multiple images and prompt",
+            description="Generate video from multiple images and a prompt",
            inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=VideoModelName,
-                    default=VideoModelName.vidu_q1,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                IO.Image.Input(
                    "images",
                    tooltip="Images to use as references to generate a video with consistent subjects (max 7 images).",
@ -374,22 +291,19 @@ class ViduReferenceVideoNode(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "aspect_ratio",
-                    options=AspectRatio,
-                    default=AspectRatio.r_16_9,
+                    options=["16:9", "9:16", "1:1"],
                    tooltip="The aspect ratio of the output video",
                    optional=True,
                ),
                IO.Combo.Input(
                    "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=["1080p"],
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
                IO.Combo.Input(
                    "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
@ -409,7 +323,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
    async def execute(
        cls,
        model: str,
-        images: torch.Tensor,
+        images: Input.Image,
        prompt: str,
        duration: int,
        seed: int,
@ -426,7 +340,7 @@ class ViduReferenceVideoNode(IO.ComfyNode):
            validate_image_aspect_ratio(image, (1, 4), (4, 1))
            validate_image_dimensions(image, min_width=128, min_height=128)
        payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
            prompt=prompt,
            duration=duration,
            seed=seed,
@ -440,8 +354,8 @@ class ViduReferenceVideoNode(IO.ComfyNode):
            max_images=7,
            mime_type="image/png",
        )
-        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload, 120)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))


 class ViduStartEndToVideoNode(IO.ComfyNode):
@ -454,12 +368,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
            category="api node/video/Vidu",
            description="Generate a video from start and end frames and a prompt",
            inputs=[
-                IO.Combo.Input(
-                    "model",
-                    options=[model.value for model in VideoModelName],
-                    default=VideoModelName.vidu_q1.value,
-                    tooltip="Model name",
-                ),
+                IO.Combo.Input("model", options=["viduq1"], tooltip="Model name"),
                IO.Image.Input(
                    "first_frame",
                    tooltip="Start frame",
@ -497,15 +406,13 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "resolution",
-                    options=[model.value for model in Resolution],
-                    default=Resolution.r_1080p.value,
+                    options=["1080p"],
                    tooltip="Supported values may vary by model & duration",
                    optional=True,
                ),
                IO.Combo.Input(
                    "movement_amplitude",
-                    options=[model.value for model in MovementAmplitude],
-                    default=MovementAmplitude.auto.value,
+                    options=["auto", "small", "medium", "large"],
                    tooltip="The movement amplitude of objects in the frame",
                    optional=True,
                ),
@ -525,8 +432,8 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
    async def execute(
        cls,
        model: str,
-        first_frame: torch.Tensor,
-        end_frame: torch.Tensor,
+        first_frame: Input.Image,
+        end_frame: Input.Image,
        prompt: str,
        duration: int,
        seed: int,
@ -535,7 +442,7 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
    ) -> IO.NodeOutput:
        validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
        payload = TaskCreationRequest(
-            model_name=model,
+            model=model,
            prompt=prompt,
            duration=duration,
            seed=seed,
@ -546,8 +453,391 @@ class ViduStartEndToVideoNode(IO.ComfyNode):
            (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
            for frame in (first_frame, end_frame)
        ]
-        results = await execute_task(cls, VIDU_START_END_VIDEO, payload, 96)
-        return IO.NodeOutput(await download_url_to_video_output(get_video_from_response(results).url))
+        results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2TextToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2TextToVideoNode",
+            display_name="Vidu2 Text-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate video from a text prompt",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2"]),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="A textual description for video generation, with a maximum length of 2000 characters.",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "3:4", "4:3", "1:1"]),
+                IO.Combo.Input("resolution", options=["720p", "1080p"]),
+                IO.Boolean.Input(
+                    "background_music",
+                    default=False,
+                    tooltip="Whether to add background music to the generated video.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        prompt: str,
+        duration: int,
+        seed: int,
+        aspect_ratio: str,
+        resolution: str,
+        background_music: bool,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2000)
+        results = await execute_task(
+            cls,
+            VIDU_TEXT_TO_VIDEO,
+            TaskCreationRequest(
+                model=model,
+                prompt=prompt,
+                duration=duration,
+                seed=seed,
+                aspect_ratio=aspect_ratio,
+                resolution=resolution,
+                bgm=background_music,
+            ),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2ImageToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2ImageToVideoNode",
+            display_name="Vidu2 Image-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from an image and an optional prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
+                IO.Image.Input(
+                    "image",
+                    tooltip="An image to be used as the start frame of the generated video.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="An optional text prompt for video generation (max 2000 characters).",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input(
+                    "resolution",
+                    options=["720p", "1080p"],
+                ),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        image: Input.Image,
+        prompt: str,
+        duration: int,
+        seed: int,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        if get_number_of_images(image) > 1:
+            raise ValueError("Only one input image is allowed.")
+        validate_image_aspect_ratio(image, (1, 4), (4, 1))
+        validate_string(prompt, max_length=2000)
+        results = await execute_task(
+            cls,
+            VIDU_IMAGE_TO_VIDEO,
+            TaskCreationRequest(
+                model=model,
+                prompt=prompt,
+                duration=duration,
+                seed=seed,
+                resolution=resolution,
+                movement_amplitude=movement_amplitude,
+                images=await upload_images_to_comfyapi(
+                    cls,
+                    image,
+                    max_images=1,
+                    mime_type="image/png",
+                ),
+            ),
+        )
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2ReferenceVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2ReferenceVideoNode",
+            display_name="Vidu2 Reference-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from multiple reference images and a prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2"]),
+                IO.Autogrow.Input(
+                    "subjects",
+                    template=IO.Autogrow.TemplateNames(
+                        IO.Image.Input("reference_images"),
+                        names=["subject1", "subject2", "subject3"],
+                        min=1,
+                    ),
+                    tooltip="For each subject, provide up to 3 reference images (7 images total across all subjects). "
+                    "Reference them in prompts via @subject{subject_id}.",
+                ),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="When enabled, the video will include generated speech and background music "
+                    "based on the prompt.",
+                ),
+                IO.Boolean.Input(
+                    "audio",
+                    default=False,
+                    tooltip="When enabled video will contain generated speech and background music based on the prompt.",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=1,
+                    max=10,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "4:3", "3:4", "1:1"]),
+                IO.Combo.Input("resolution", options=["720p"]),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        subjects: IO.Autogrow.Type,
+        prompt: str,
+        audio: bool,
+        duration: int,
+        seed: int,
+        aspect_ratio: str,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, min_length=1, max_length=2000)
+        total_images = 0
+        for i in subjects:
+            if get_number_of_images(subjects[i]) > 3:
+                raise ValueError("Maximum number of images per subject is 3.")
+            for im in subjects[i]:
+                total_images += 1
+                validate_image_aspect_ratio(im, (1, 4), (4, 1))
+                validate_image_dimensions(im, min_width=128, min_height=128)
+        if total_images > 7:
+            raise ValueError("Too many reference images; the maximum allowed is 7.")
+        subjects_param: list[SubjectReference] = []
+        for i in subjects:
+            subjects_param.append(
+                SubjectReference(
+                    id=i,
+                    images=await upload_images_to_comfyapi(
+                        cls,
+                        subjects[i],
+                        max_images=3,
+                        mime_type="image/png",
+                        wait_label=f"Uploading reference images for {i}",
+                    ),
+                ),
+            )
+        payload = TaskCreationRequest(
+            model=model,
+            prompt=prompt,
+            audio=audio,
+            duration=duration,
+            seed=seed,
+            aspect_ratio=aspect_ratio,
+            resolution=resolution,
+            movement_amplitude=movement_amplitude,
+            subjects=subjects_param,
+        )
+        results = await execute_task(cls, VIDU_REFERENCE_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))
+
+
+class Vidu2StartEndToVideoNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="Vidu2StartEndToVideoNode",
+            display_name="Vidu2 Start/End Frame-to-Video Generation",
+            category="api node/video/Vidu",
+            description="Generate a video from a start frame, an end frame, and a prompt.",
+            inputs=[
+                IO.Combo.Input("model", options=["viduq2-pro-fast", "viduq2-pro", "viduq2-turbo"]),
+                IO.Image.Input("first_frame"),
+                IO.Image.Input("end_frame"),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="Prompt description (max 2000 characters).",
+                ),
+                IO.Int.Input(
+                    "duration",
+                    default=5,
+                    min=2,
+                    max=8,
+                    step=1,
+                    display_mode=IO.NumberDisplay.slider,
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    step=1,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                ),
+                IO.Combo.Input("resolution", options=["720p", "1080p"]),
+                IO.Combo.Input(
+                    "movement_amplitude",
+                    options=["auto", "small", "medium", "large"],
+                    tooltip="The movement amplitude of objects in the frame.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        model: str,
+        first_frame: Input.Image,
+        end_frame: Input.Image,
+        prompt: str,
+        duration: int,
+        seed: int,
+        resolution: str,
+        movement_amplitude: str,
+    ) -> IO.NodeOutput:
+        validate_string(prompt, max_length=2000)
+        if get_number_of_images(first_frame) > 1:
+            raise ValueError("Only one input image is allowed for `first_frame`.")
+        if get_number_of_images(end_frame) > 1:
+            raise ValueError("Only one input image is allowed for `end_frame`.")
+        validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False)
+        payload = TaskCreationRequest(
+            model=model,
+            prompt=prompt,
+            duration=duration,
+            seed=seed,
+            resolution=resolution,
+            movement_amplitude=movement_amplitude,
+            images=[
+                (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0]
+                for frame in (first_frame, end_frame)
+            ],
+        )
+        results = await execute_task(cls, VIDU_START_END_VIDEO, payload)
+        return IO.NodeOutput(await download_url_to_video_output(results[0].url))


 class ViduExtension(ComfyExtension):
@ -558,6 +848,10 @@ class ViduExtension(ComfyExtension):
            ViduImageToVideoNode,
            ViduReferenceVideoNode,
            ViduStartEndToVideoNode,
+            Vidu2TextToVideoNode,
+            Vidu2ImageToVideoNode,
+            Vidu2ReferenceVideoNode,
+            Vidu2StartEndToVideoNode,
        ]


--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@ -55,7 +55,7 @@ def image_tensor_pair_to_batch(image1: torch.Tensor, image2: torch.Tensor) -> to

 def tensor_to_bytesio(
    image: torch.Tensor,
-    name: str | None = None,
+    *,
    total_pixels: int = 2048 * 2048,
    mime_type: str = "image/png",
 ) -> BytesIO:
@ -75,7 +75,7 @@ def tensor_to_bytesio(

    pil_image = tensor_to_pil(image, total_pixels=total_pixels)
    img_binary = pil_to_bytesio(pil_image, mime_type=mime_type)
-    img_binary.name = f"{name if name else uuid.uuid4()}.{mimetype_to_extension(mime_type)}"
+    img_binary.name = f"{uuid.uuid4()}.{mimetype_to_extension(mime_type)}"
    return img_binary


--- a/comfy_api_nodes/util/upload_helpers.py
+++ b/comfy_api_nodes/util/upload_helpers.py
@ -82,7 +82,6 @@ async def upload_audio_to_comfyapi(
    container_format: str = "mp4",
    codec_name: str = "aac",
    mime_type: str = "audio/mp4",
-    filename: str = "uploaded_audio.mp4",
 ) -> str:
    """
    Uploads a single audio input to ComfyUI API and returns its download URL.
@ -92,7 +91,7 @@ async def upload_audio_to_comfyapi(
    waveform: torch.Tensor = audio["waveform"]
    audio_data_np = audio_tensor_to_contiguous_ndarray(waveform)
    audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, container_format, codec_name)
-    return await upload_file_to_comfyapi(cls, audio_bytes_io, filename, mime_type)
+    return await upload_file_to_comfyapi(cls, audio_bytes_io, f"{uuid.uuid4()}.{container_format}", mime_type)


 async def upload_video_to_comfyapi(
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,6 @@
 comfyui-frontend-package==1.36.13
 comfyui-workflow-templates==0.7.69
-comfyui-embedded-docs==0.3.1
+comfyui-embedded-docs==0.4.0
 torch
 torchsde
 torchvision
--- a/tests-unit/comfy_quant/test_mixed_precision.py
+++ b/tests-unit/comfy_quant/test_mixed_precision.py
@ -153,9 +153,9 @@ class TestMixedPrecisionOps(unittest.TestCase):
        state_dict2 = model.state_dict()

        # Verify layer1.weight is a QuantizedTensor with scale preserved
-        self.assertIsInstance(state_dict2["layer1.weight"], QuantizedTensor)
-        self.assertEqual(state_dict2["layer1.weight"]._params.scale.item(), 3.0)
-        self.assertEqual(state_dict2["layer1.weight"]._layout_cls, "TensorCoreFP8E4M3Layout")
+        self.assertTrue(torch.equal(state_dict2["layer1.weight"].view(torch.uint8), fp8_weight.view(torch.uint8)))
+        self.assertEqual(state_dict2["layer1.weight_scale"].item(), 3.0)
+        self.assertEqual(model.layer1.weight._layout_cls, "TensorCoreFP8E4M3Layout")

        # Verify non-quantized layers are standard tensors
        self.assertNotIsInstance(state_dict2["layer2.weight"], QuantizedTensor)
Author	SHA1	Message	Date
DELUXA	6effbc8eae	Merge `3988f37386` into `c6238047ee`	2026-01-12 13:45:03 +01:00
comfyanonymous	c6238047ee	Put more details about portable in readme. (#11816 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-01-11 21:11:53 -05:00
Alexander Piskun	5cd1113236	fix(api-nodes): use a unique name for uploading audio files (#11778 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Generate Pydantic Stubs from api.comfy.org / generate-models (push) Has been cancelled Details	2026-01-11 03:07:11 -08:00
comfyanonymous	2f642d5d9b	Fix chroma fp8 te being treated as fp16. (#11795 ) Some checks are pending Python Linting / Run Pylint (push) Waiting to run Details Python Linting / Run Ruff (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details	2026-01-10 14:40:42 -08:00
comfyanonymous	cd912963f1	Fix issue with t5 text encoder in fp4. (#11794 )	2026-01-10 17:31:31 -05:00
DELUXA	6e4b1f9d00	pythorch_attn_by_def_on_gfx1200 (#11793 )	2026-01-10 16:51:05 -05:00
comfyanonymous	dc202a2e51	Properly save mixed ops. (#11772 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Build package / Build Test (3.10) (push) Has been cancelled Details Build package / Build Test (3.11) (push) Has been cancelled Details Build package / Build Test (3.12) (push) Has been cancelled Details Build package / Build Test (3.13) (push) Has been cancelled Details Build package / Build Test (3.14) (push) Has been cancelled Details	2026-01-10 02:03:57 -05:00
ComfyUI Wiki	153bc524bf	chore: update embedded docs to v0.4.0 (#11776 )	2026-01-10 01:29:30 -05:00
Alexander Piskun	393d2880dd	feat(api-nodes): added nodes for Vidu2 (#11760 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details	2026-01-09 12:59:38 -08:00
Your Name	3988f37386	Enable pytorch attention in VAE for AMD RDNA 4	2025-09-20 00:10:20 +03:00