From 971932346ac6e6e02c1e1e8cfe34df2f0e1cea3e Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:27:38 -0700 Subject: [PATCH 01/14] Update quant doc so it's not completely wrong. (#13381) There is still more that needs to be fixed. --- QUANTIZATION.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/QUANTIZATION.md b/QUANTIZATION.md index 1693e13f3..300822029 100644 --- a/QUANTIZATION.md +++ b/QUANTIZATION.md @@ -139,9 +139,9 @@ Example: "_quantization_metadata": { "format_version": "1.0", "layers": { - "model.layers.0.mlp.up_proj": "float8_e4m3fn", - "model.layers.0.mlp.down_proj": "float8_e4m3fn", - "model.layers.1.mlp.up_proj": "float8_e4m3fn" + "model.layers.0.mlp.up_proj": {"format": "float8_e4m3fn"}, + "model.layers.0.mlp.down_proj": {"format": "float8_e4m3fn"}, + "model.layers.1.mlp.up_proj": {"format": "float8_e4m3fn"} } } } @@ -165,4 +165,4 @@ Activation quantization (e.g., for FP8 Tensor Core operations) requires `input_s 3. **Compute scales**: Derive `input_scale` from collected statistics 4. **Store in checkpoint**: Save `input_scale` parameters alongside weights -The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. \ No newline at end of file +The calibration dataset should be representative of your target use case. For diffusion models, this typically means a diverse set of prompts and generation parameters. From c2657d5fb9ccfae150c8e5d0e1b39780a0cc33e9 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:37:13 -0700 Subject: [PATCH 02/14] Fix typo. (#13382) --- comfy/text_encoders/ernie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/text_encoders/ernie.py b/comfy/text_encoders/ernie.py index 8c56c1c11..2c7df78fe 100644 --- a/comfy/text_encoders/ernie.py +++ b/comfy/text_encoders/ernie.py @@ -3,7 +3,7 @@ from comfy import sd1_clip import comfy.text_encoders.llama class Ministral3_3BTokenizer(Mistral3Tokenizer): - def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_data={}): + def __init__(self, embedding_directory=None, embedding_size=5120, embedding_key='ministral3_3b', tokenizer_data={}): return super().__init__(embedding_directory=embedding_directory, embedding_size=embedding_size, embedding_key=embedding_key, tokenizer_data=tokenizer_data) class ErnieTokenizer(sd1_clip.SD1Tokenizer): From ee2db7488df047b0a6f3de98f21bd7902c85a707 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Mon, 13 Apr 2026 08:26:19 +0300 Subject: [PATCH 03/14] feat(api-nodes): add SeeDance 2.0 nodes (#13364) Signed-off-by: bigcat88 --- comfy_api_nodes/apis/bytedance.py | 52 +++ comfy_api_nodes/nodes_bytedance.py | 611 +++++++++++++++++++++++++++-- 2 files changed, 634 insertions(+), 29 deletions(-) diff --git a/comfy_api_nodes/apis/bytedance.py b/comfy_api_nodes/apis/bytedance.py index 18455396d..3755323ac 100644 --- a/comfy_api_nodes/apis/bytedance.py +++ b/comfy_api_nodes/apis/bytedance.py @@ -52,6 +52,26 @@ class TaskImageContent(BaseModel): role: Literal["first_frame", "last_frame", "reference_image"] | None = Field(None) +class TaskVideoContentUrl(BaseModel): + url: str = Field(...) + + +class TaskVideoContent(BaseModel): + type: str = Field("video_url") + video_url: TaskVideoContentUrl = Field(...) + role: str = Field("reference_video") + + +class TaskAudioContentUrl(BaseModel): + url: str = Field(...) + + +class TaskAudioContent(BaseModel): + type: str = Field("audio_url") + audio_url: TaskAudioContentUrl = Field(...) + role: str = Field("reference_audio") + + class Text2VideoTaskCreationRequest(BaseModel): model: str = Field(...) content: list[TaskTextContent] = Field(..., min_length=1) @@ -64,6 +84,17 @@ class Image2VideoTaskCreationRequest(BaseModel): generate_audio: bool | None = Field(...) +class Seedance2TaskCreationRequest(BaseModel): + model: str = Field(...) + content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = Field(..., min_length=1) + generate_audio: bool | None = Field(None) + resolution: str | None = Field(None) + ratio: str | None = Field(None) + duration: int | None = Field(None, ge=4, le=15) + seed: int | None = Field(None, ge=0, le=2147483647) + watermark: bool | None = Field(None) + + class TaskCreationResponse(BaseModel): id: str = Field(...) @@ -77,12 +108,27 @@ class TaskStatusResult(BaseModel): video_url: str = Field(...) +class TaskStatusUsage(BaseModel): + completion_tokens: int = Field(0) + total_tokens: int = Field(0) + + class TaskStatusResponse(BaseModel): id: str = Field(...) model: str = Field(...) status: Literal["queued", "running", "cancelled", "succeeded", "failed"] = Field(...) error: TaskStatusError | None = Field(None) content: TaskStatusResult | None = Field(None) + usage: TaskStatusUsage | None = Field(None) + + +# Dollars per 1K tokens, keyed by (model_id, has_video_input). +SEEDANCE2_PRICE_PER_1K_TOKENS = { + ("dreamina-seedance-2-0-260128", False): 0.007, + ("dreamina-seedance-2-0-260128", True): 0.0043, + ("dreamina-seedance-2-0-fast-260128", False): 0.0056, + ("dreamina-seedance-2-0-fast-260128", True): 0.0033, +} RECOMMENDED_PRESETS = [ @@ -112,6 +158,12 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [ ("Custom", None, None), ] +# Seedance 2.0 reference video pixel count limits per model. +SEEDANCE2_REF_VIDEO_PIXEL_LIMITS = { + "dreamina-seedance-2-0-260128": {"min": 409_600, "max": 927_408}, + "dreamina-seedance-2-0-fast-260128": {"min": 409_600, "max": 927_408}, +} + # The time in this dictionary are given for 10 seconds duration. VIDEO_TASKS_EXECUTION_TIME = { "seedance-1-0-lite-t2v-250428": { diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index de0c22e70..1cca72f6e 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -8,16 +8,23 @@ from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.bytedance import ( RECOMMENDED_PRESETS, RECOMMENDED_PRESETS_SEEDREAM_4, + SEEDANCE2_PRICE_PER_1K_TOKENS, + SEEDANCE2_REF_VIDEO_PIXEL_LIMITS, VIDEO_TASKS_EXECUTION_TIME, Image2VideoTaskCreationRequest, ImageTaskCreationResponse, + Seedance2TaskCreationRequest, Seedream4Options, Seedream4TaskCreationRequest, + TaskAudioContent, + TaskAudioContentUrl, TaskCreationResponse, TaskImageContent, TaskImageContentUrl, TaskStatusResponse, TaskTextContent, + TaskVideoContent, + TaskVideoContentUrl, Text2ImageTaskCreationRequest, Text2VideoTaskCreationRequest, ) @@ -29,7 +36,10 @@ from comfy_api_nodes.util import ( image_tensor_pair_to_batch, poll_op, sync_op, + upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_images_to_comfyapi, + upload_video_to_comfyapi, validate_image_aspect_ratio, validate_image_dimensions, validate_string, @@ -46,12 +56,56 @@ SEEDREAM_MODELS = { # Long-running tasks endpoints(e.g., video) BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id} +BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT = "/proxy/byteplus-seedance2/api/v3/contents/generations/tasks" # + /{task_id} + +SEEDANCE_MODELS = { + "Seedance 2.0": "dreamina-seedance-2-0-260128", + "Seedance 2.0 Fast": "dreamina-seedance-2-0-fast-260128", +} DEPRECATED_MODELS = {"seedance-1-0-lite-t2v-250428", "seedance-1-0-lite-i2v-250428"} + logger = logging.getLogger(__name__) +def _validate_ref_video_pixels(video: Input.Video, model_id: str, index: int) -> None: + """Validate reference video pixel count against Seedance 2.0 model limits.""" + limits = SEEDANCE2_REF_VIDEO_PIXEL_LIMITS.get(model_id) + if not limits: + return + try: + w, h = video.get_dimensions() + except Exception: + return + pixels = w * h + min_px = limits.get("min") + max_px = limits.get("max") + if min_px and pixels < min_px: + raise ValueError( + f"Reference video {index} is too small: {w}x{h} = {pixels:,}px. " f"Minimum is {min_px:,}px for this model." + ) + if max_px and pixels > max_px: + raise ValueError( + f"Reference video {index} is too large: {w}x{h} = {pixels:,}px. " + f"Maximum is {max_px:,}px for this model. Try downscaling the video." + ) + + +def _seedance2_price_extractor(model_id: str, has_video_input: bool): + """Returns a price_extractor closure for Seedance 2.0 poll_op.""" + rate = SEEDANCE2_PRICE_PER_1K_TOKENS.get((model_id, has_video_input)) + if rate is None: + return None + + def extractor(response: TaskStatusResponse) -> float | None: + if response.usage is None: + return None + return response.usage.total_tokens * 1.43 * rate / 1_000.0 + + return extractor + + def get_image_url_from_response(response: ImageTaskCreationResponse) -> str: if response.error: error_msg = f"ByteDance request failed. Code: {response.error['code']}, message: {response.error['message']}" @@ -335,8 +389,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): mp_provided = out_num_pixels / 1_000_000.0 if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400: raise ValueError( - f"Minimum image resolution for the selected model is 3.68MP, " - f"but {mp_provided:.2f}MP provided." + f"Minimum image resolution for the selected model is 3.68MP, " f"but {mp_provided:.2f}MP provided." ) if "seedream-4-0" in model and out_num_pixels < 921600: raise ValueError( @@ -952,33 +1005,6 @@ class ByteDanceImageReferenceNode(IO.ComfyNode): ) -async def process_video_task( - cls: type[IO.ComfyNode], - payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest, - estimated_duration: int | None, -) -> IO.NodeOutput: - if payload.model in DEPRECATED_MODELS: - logger.warning( - "Model '%s' is deprecated and will be deactivated on May 13, 2026. " - "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.", - payload.model, - ) - initial_response = await sync_op( - cls, - ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), - data=payload, - response_model=TaskCreationResponse, - ) - response = await poll_op( - cls, - ApiEndpoint(path=f"{BYTEPLUS_TASK_STATUS_ENDPOINT}/{initial_response.id}"), - status_extractor=lambda r: r.status, - estimated_duration=estimated_duration, - response_model=TaskStatusResponse, - ) - return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) - - def raise_if_text_params(prompt: str, text_params: list[str]) -> None: for i in text_params: if f"--{i} " in prompt: @@ -1040,6 +1066,530 @@ PRICE_BADGE_VIDEO = IO.PriceBadge( ) +def _seedance2_text_inputs(): + return [ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text prompt for video generation.", + ), + IO.Combo.Input( + "resolution", + options=["480p", "720p"], + tooltip="Resolution of the output video.", + ), + IO.Combo.Input( + "ratio", + options=["16:9", "4:3", "1:1", "3:4", "9:16", "21:9", "adaptive"], + tooltip="Aspect ratio of the output video.", + ), + IO.Int.Input( + "duration", + default=7, + min=4, + max=15, + step=1, + tooltip="Duration of the output video in seconds (4-15).", + display_mode=IO.NumberDisplay.slider, + ), + IO.Boolean.Input( + "generate_audio", + default=True, + tooltip="Enable audio generation for the output video.", + ), + ] + + +class ByteDance2TextToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2TextToVideoNode", + display_name="ByteDance Seedance 2.0 Text to Video", + category="api node/video/ByteDance", + description="Generate video using Seedance 2.0 models based on a text prompt.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Seedance 2.0", _seedance2_text_inputs()), + IO.DynamicCombo.Option("Seedance 2.0 Fast", _seedance2_text_inputs()), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $m := widgets.model; + $pricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "720p" ? $rate720 : $rate480; + $cost := $dur * $rate * $pricePer1K / 1000; + {"type": "usd", "usd": $cost, "format": {"approximate": true}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + model_id = SEEDANCE_MODELS[model["model"]] + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=[TaskTextContent(text=model["prompt"])], + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=False), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +class ByteDance2FirstLastFrameNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2FirstLastFrameNode", + display_name="ByteDance Seedance 2.0 First-Last-Frame to Video", + category="api node/video/ByteDance", + description="Generate video using Seedance 2.0 from a first frame image and optional last frame image.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Seedance 2.0", _seedance2_text_inputs()), + IO.DynamicCombo.Option("Seedance 2.0 Fast", _seedance2_text_inputs()), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Image.Input( + "first_frame", + tooltip="First frame image for the video.", + ), + IO.Image.Input( + "last_frame", + tooltip="Last frame image for the video.", + optional=True, + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution", "model.duration"]), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $m := widgets.model; + $pricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "720p" ? $rate720 : $rate480; + $cost := $dur * $rate * $pricePer1K / 1000; + {"type": "usd", "usd": $cost, "format": {"approximate": true}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + first_frame: Input.Image, + seed: int, + watermark: bool, + last_frame: Input.Image | None = None, + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + model_id = SEEDANCE_MODELS[model["model"]] + + content: list[TaskTextContent | TaskImageContent] = [ + TaskTextContent(text=model["prompt"]), + TaskImageContent( + image_url=TaskImageContentUrl( + url=await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame.") + ), + role="first_frame", + ), + ] + if last_frame is not None: + content.append( + TaskImageContent( + image_url=TaskImageContentUrl( + url=await upload_image_to_comfyapi(cls, last_frame, wait_label="Uploading last frame.") + ), + role="last_frame", + ), + ) + + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=content, + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=False), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +def _seedance2_reference_inputs(): + return [ + *_seedance2_text_inputs(), + IO.Autogrow.Input( + "reference_images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("reference_image"), + names=[ + "image_1", + "image_2", + "image_3", + "image_4", + "image_5", + "image_6", + "image_7", + "image_8", + "image_9", + ], + min=0, + ), + ), + IO.Autogrow.Input( + "reference_videos", + template=IO.Autogrow.TemplateNames( + IO.Video.Input("reference_video"), + names=["video_1", "video_2", "video_3"], + min=0, + ), + ), + IO.Autogrow.Input( + "reference_audios", + template=IO.Autogrow.TemplateNames( + IO.Audio.Input("reference_audio"), + names=["audio_1", "audio_2", "audio_3"], + min=0, + ), + ), + ] + + +class ByteDance2ReferenceNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ByteDance2ReferenceNode", + display_name="ByteDance Seedance 2.0 Reference to Video", + category="api node/video/ByteDance", + description="Generate, edit, or extend video using Seedance 2.0 with reference images, " + "videos, and audio. Supports multimodal reference, video editing, and video extension.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Seedance 2.0", _seedance2_reference_inputs()), + IO.DynamicCombo.Option("Seedance 2.0 Fast", _seedance2_reference_inputs()), + ], + tooltip="Seedance 2.0 for maximum quality; Seedance 2.0 Fast for speed optimization.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Boolean.Input( + "watermark", + default=False, + tooltip="Whether to add a watermark to the video.", + advanced=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model", "model.resolution", "model.duration"], + input_groups=["model.reference_videos"], + ), + expr=""" + ( + $rate480 := 10044; + $rate720 := 21600; + $m := widgets.model; + $hasVideo := $lookup(inputGroups, "model.reference_videos") > 0; + $noVideoPricePer1K := $contains($m, "fast") ? 0.008008 : 0.01001; + $videoPricePer1K := $contains($m, "fast") ? 0.004719 : 0.006149; + $res := $lookup(widgets, "model.resolution"); + $dur := $lookup(widgets, "model.duration"); + $rate := $res = "720p" ? $rate720 : $rate480; + $noVideoCost := $dur * $rate * $noVideoPricePer1K / 1000; + $minVideoFactor := $ceil($dur * 5 / 3); + $minVideoCost := $minVideoFactor * $rate * $videoPricePer1K / 1000; + $maxVideoCost := (15 + $dur) * $rate * $videoPricePer1K / 1000; + $hasVideo + ? { + "type": "range_usd", + "min_usd": $minVideoCost, + "max_usd": $maxVideoCost, + "format": {"approximate": true} + } + : { + "type": "usd", + "usd": $noVideoCost, + "format": {"approximate": true} + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + seed: int, + watermark: bool, + ) -> IO.NodeOutput: + validate_string(model["prompt"], strip_whitespace=True, min_length=1) + + reference_images = model.get("reference_images", {}) + reference_videos = model.get("reference_videos", {}) + reference_audios = model.get("reference_audios", {}) + + if not reference_images and not reference_videos: + raise ValueError("At least one reference image or video is required.") + + model_id = SEEDANCE_MODELS[model["model"]] + has_video_input = len(reference_videos) > 0 + total_video_duration = 0.0 + for i, key in enumerate(reference_videos, 1): + video = reference_videos[key] + _validate_ref_video_pixels(video, model_id, i) + try: + dur = video.get_duration() + if dur < 1.8: + raise ValueError(f"Reference video {i} is too short: {dur:.1f}s. Minimum duration is 1.8 seconds.") + total_video_duration += dur + except ValueError: + raise + except Exception: + pass + if total_video_duration > 15.1: + raise ValueError(f"Total reference video duration is {total_video_duration:.1f}s. Maximum is 15.1 seconds.") + + total_audio_duration = 0.0 + for i, key in enumerate(reference_audios, 1): + audio = reference_audios[key] + dur = int(audio["waveform"].shape[-1]) / int(audio["sample_rate"]) + if dur < 1.8: + raise ValueError(f"Reference audio {i} is too short: {dur:.1f}s. Minimum duration is 1.8 seconds.") + total_audio_duration += dur + if total_audio_duration > 15.1: + raise ValueError(f"Total reference audio duration is {total_audio_duration:.1f}s. Maximum is 15.1 seconds.") + + content: list[TaskTextContent | TaskImageContent | TaskVideoContent | TaskAudioContent] = [ + TaskTextContent(text=model["prompt"]), + ] + for i, key in enumerate(reference_images, 1): + content.append( + TaskImageContent( + image_url=TaskImageContentUrl( + url=await upload_image_to_comfyapi( + cls, + image=reference_images[key], + wait_label=f"Uploading image {i}", + ), + ), + role="reference_image", + ), + ) + for i, key in enumerate(reference_videos, 1): + content.append( + TaskVideoContent( + video_url=TaskVideoContentUrl( + url=await upload_video_to_comfyapi( + cls, + reference_videos[key], + wait_label=f"Uploading video {i}", + ), + ), + ), + ) + for key in reference_audios: + content.append( + TaskAudioContent( + audio_url=TaskAudioContentUrl( + url=await upload_audio_to_comfyapi( + cls, + reference_audios[key], + container_format="mp3", + codec_name="libmp3lame", + mime_type="audio/mpeg", + ), + ), + ), + ) + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=Seedance2TaskCreationRequest( + model=model_id, + content=content, + generate_audio=model["generate_audio"], + resolution=model["resolution"], + ratio=model["ratio"], + duration=model["duration"], + seed=seed, + watermark=watermark, + ), + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_SEEDANCE2_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: r.status, + price_extractor=_seedance2_price_extractor(model_id, has_video_input=has_video_input), + poll_interval=9, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + +async def process_video_task( + cls: type[IO.ComfyNode], + payload: Text2VideoTaskCreationRequest | Image2VideoTaskCreationRequest, + estimated_duration: int | None, +) -> IO.NodeOutput: + if payload.model in DEPRECATED_MODELS: + logger.warning( + "Model '%s' is deprecated and will be deactivated on May 13, 2026. " + "Please switch to a newer model. Recommended: seedance-1-0-pro-fast-251015.", + payload.model, + ) + initial_response = await sync_op( + cls, + ApiEndpoint(path=BYTEPLUS_TASK_ENDPOINT, method="POST"), + data=payload, + response_model=TaskCreationResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"{BYTEPLUS_TASK_STATUS_ENDPOINT}/{initial_response.id}"), + status_extractor=lambda r: r.status, + estimated_duration=estimated_duration, + response_model=TaskStatusResponse, + ) + return IO.NodeOutput(await download_url_to_video_output(response.content.video_url)) + + class ByteDanceExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -1050,6 +1600,9 @@ class ByteDanceExtension(ComfyExtension): ByteDanceImageToVideoNode, ByteDanceFirstLastFrameNode, ByteDanceImageReferenceNode, + ByteDance2TextToVideoNode, + ByteDance2FirstLastFrameNode, + ByteDance2ReferenceNode, ] From 559501e4b88595b3ddf0cff98c14443f4f307c78 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Mon, 13 Apr 2026 14:19:09 +0800 Subject: [PATCH 04/14] chore: update workflow templates to v0.9.47 (#13385) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cfb4d4fb8..393c1e1ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.42.10 -comfyui-workflow-templates==0.9.45 +comfyui-workflow-templates==0.9.47 comfyui-embedded-docs==0.4.3 torch torchsde From acd718598eca0b944a1a7a82072a9dec40d3d4f7 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 13 Apr 2026 03:02:36 -0400 Subject: [PATCH 05/14] ComfyUI v0.19.0 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index 61d7672ca..0da11d5fa 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.18.1" +__version__ = "0.19.0" diff --git a/pyproject.toml b/pyproject.toml index 1fc9402a1..e8d4a9742 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.18.1" +version = "0.19.0" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10" From 402ff1cdb7a266eae58c696dfbce4a62d21070a3 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 13 Apr 2026 13:38:42 -0700 Subject: [PATCH 06/14] Fix issue with ernie image. (#13393) --- comfy/ldm/ernie/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/ldm/ernie/model.py b/comfy/ldm/ernie/model.py index 1f8f08376..3dbab8dc0 100644 --- a/comfy/ldm/ernie/model.py +++ b/comfy/ldm/ernie/model.py @@ -279,7 +279,7 @@ class ErnieImageModel(nn.Module): rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype) del image_ids, text_ids - sample = self.time_proj(timesteps.to(dtype)).to(self.time_embedding.linear_1.weight.dtype) + sample = self.time_proj(timesteps).to(dtype) c = self.time_embedding(sample) shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = [ From 722bc733196bbeef5b7ccdc95bb0e7e4156bb591 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:43:57 -0700 Subject: [PATCH 07/14] Make text generation work with ministral model. (#13395) Needs template before it works properly. --- comfy/text_encoders/llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py index 6cdc47757..6ea8e36b1 100644 --- a/comfy/text_encoders/llama.py +++ b/comfy/text_encoders/llama.py @@ -82,6 +82,7 @@ class Ministral3_3BConfig: rope_scale = None final_norm: bool = True lm_head: bool = False + stop_tokens = [2] @dataclass class Qwen25_3BConfig: @@ -969,7 +970,7 @@ class Mistral3Small24B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Ministral3_3B(BaseLlama, torch.nn.Module): +class Ministral3_3B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Ministral3_3BConfig(**config_dict) From 35dfcbbb28a8b8b2833d598403f138ed480de58d Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Tue, 14 Apr 2026 08:21:01 +0300 Subject: [PATCH 08/14] [Partner Nodes] add Sonilo Audio nodes (#13391) * feat(api-nodes): add Sonilo nodes Signed-off-by: bigcat88 * fix: do not spam frontend with each chunk arrival Signed-off-by: bigcat88 * updated pricing badge Signed-off-by: bigcat88 --------- Signed-off-by: bigcat88 --- comfy_api_nodes/nodes_sonilo.py | 287 ++++++++++++++++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 comfy_api_nodes/nodes_sonilo.py diff --git a/comfy_api_nodes/nodes_sonilo.py b/comfy_api_nodes/nodes_sonilo.py new file mode 100644 index 000000000..5518f5902 --- /dev/null +++ b/comfy_api_nodes/nodes_sonilo.py @@ -0,0 +1,287 @@ +import base64 +import json +import logging +import time +from urllib.parse import urljoin + +import aiohttp +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.util import ( + ApiEndpoint, + audio_bytes_to_audio_input, + upload_video_to_comfyapi, + validate_string, +) +from comfy_api_nodes.util._helpers import ( + default_base_url, + get_auth_header, + get_node_id, + is_processing_interrupted, +) +from comfy_api_nodes.util.common_exceptions import ProcessingInterrupted +from server import PromptServer + +logger = logging.getLogger(__name__) + + +class SoniloVideoToMusic(IO.ComfyNode): + """Generate music from video using Sonilo's AI model.""" + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="SoniloVideoToMusic", + display_name="Sonilo Video to Music", + category="api node/audio/Sonilo", + description="Generate music from video content using Sonilo's AI model. " + "Analyzes the video and creates matching music.", + inputs=[ + IO.Video.Input( + "video", + tooltip="Input video to generate music from. Maximum duration: 6 minutes.", + ), + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Optional text prompt to guide music generation. " + "Leave empty for best quality - the model will fully analyze the video content.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed for reproducibility. Currently ignored by the Sonilo " + "service but kept for graph consistency.", + ), + ], + outputs=[IO.Audio.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr='{"type":"usd","usd":0.009,"format":{"suffix":"/second"}}', + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + prompt: str = "", + seed: int = 0, + ) -> IO.NodeOutput: + video_url = await upload_video_to_comfyapi(cls, video, max_duration=360) + form = aiohttp.FormData() + form.add_field("video_url", video_url) + if prompt.strip(): + form.add_field("prompt", prompt.strip()) + audio_bytes = await _stream_sonilo_music( + cls, + ApiEndpoint(path="/proxy/sonilo/v2m/generate", method="POST"), + form, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(audio_bytes)) + + +class SoniloTextToMusic(IO.ComfyNode): + """Generate music from a text prompt using Sonilo's AI model.""" + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="SoniloTextToMusic", + display_name="Sonilo Text to Music", + category="api node/audio/Sonilo", + description="Generate music from a text prompt using Sonilo's AI model. " + "Leave duration at 0 to let the model infer it from the prompt.", + inputs=[ + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Text prompt describing the music to generate.", + ), + IO.Int.Input( + "duration", + default=0, + min=0, + max=360, + tooltip="Target duration in seconds. Set to 0 to let the model " + "infer the duration from the prompt. Maximum: 6 minutes.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed for reproducibility. Currently ignored by the Sonilo " + "service but kept for graph consistency.", + ), + ], + outputs=[IO.Audio.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["duration"]), + expr=""" + ( + widgets.duration > 0 + ? {"type":"usd","usd": 0.005 * widgets.duration} + : {"type":"usd","usd": 0.005, "format":{"suffix":"/second"}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + duration: int = 0, + seed: int = 0, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + form = aiohttp.FormData() + form.add_field("prompt", prompt) + if duration > 0: + form.add_field("duration", str(duration)) + audio_bytes = await _stream_sonilo_music( + cls, + ApiEndpoint(path="/proxy/sonilo/t2m/generate", method="POST"), + form, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(audio_bytes)) + + +async def _stream_sonilo_music( + cls: type[IO.ComfyNode], + endpoint: ApiEndpoint, + form: aiohttp.FormData, +) -> bytes: + """POST ``form`` to Sonilo, read the NDJSON stream, and return the first stream's audio bytes.""" + url = urljoin(default_base_url().rstrip("/") + "/", endpoint.path.lstrip("/")) + + headers: dict[str, str] = {} + headers.update(get_auth_header(cls)) + headers.update(endpoint.headers) + + node_id = get_node_id(cls) + start_ts = time.monotonic() + last_chunk_status_ts = 0.0 + audio_streams: dict[int, list[bytes]] = {} + title: str | None = None + + timeout = aiohttp.ClientTimeout(total=1200.0, sock_read=300.0) + async with aiohttp.ClientSession(timeout=timeout) as session: + PromptServer.instance.send_progress_text("Status: Queued", node_id) + async with session.post(url, data=form, headers=headers) as resp: + if resp.status >= 400: + msg = await _extract_error_message(resp) + raise Exception(f"Sonilo API error ({resp.status}): {msg}") + + while True: + if is_processing_interrupted(): + raise ProcessingInterrupted("Task cancelled") + + raw_line = await resp.content.readline() + if not raw_line: + break + + line = raw_line.decode("utf-8").strip() + if not line: + continue + + try: + evt = json.loads(line) + except json.JSONDecodeError: + logger.warning("Sonilo: skipping malformed NDJSON line") + continue + + evt_type = evt.get("type") + if evt_type == "error": + code = evt.get("code", "UNKNOWN") + message = evt.get("message", "Unknown error") + raise Exception(f"Sonilo generation error ({code}): {message}") + if evt_type == "duration": + duration_sec = evt.get("duration_sec") + if duration_sec is not None: + PromptServer.instance.send_progress_text( + f"Status: Generating\nVideo duration: {duration_sec:.1f}s", + node_id, + ) + elif evt_type in ("titles", "title"): + # v2m sends a "titles" list, t2m sends a scalar "title" + if evt_type == "titles": + titles = evt.get("titles", []) + if titles: + title = titles[0] + else: + title = evt.get("title") or title + if title: + PromptServer.instance.send_progress_text( + f"Status: Generating\nTitle: {title}", + node_id, + ) + elif evt_type == "audio_chunk": + stream_idx = evt.get("stream_index", 0) + chunk_data = base64.b64decode(evt["data"]) + + if stream_idx not in audio_streams: + audio_streams[stream_idx] = [] + audio_streams[stream_idx].append(chunk_data) + + now = time.monotonic() + if now - last_chunk_status_ts >= 1.0: + total_chunks = sum(len(chunks) for chunks in audio_streams.values()) + elapsed = int(now - start_ts) + status_lines = ["Status: Receiving audio"] + if title: + status_lines.append(f"Title: {title}") + status_lines.append(f"Chunks received: {total_chunks}") + status_lines.append(f"Time elapsed: {elapsed}s") + PromptServer.instance.send_progress_text("\n".join(status_lines), node_id) + last_chunk_status_ts = now + elif evt_type == "complete": + break + + if not audio_streams: + raise Exception("Sonilo API returned no audio data.") + + PromptServer.instance.send_progress_text("Status: Completed", node_id) + selected_stream = 0 if 0 in audio_streams else min(audio_streams) + return b"".join(audio_streams[selected_stream]) + + +async def _extract_error_message(resp: aiohttp.ClientResponse) -> str: + """Extract a human-readable error message from an HTTP error response.""" + try: + error_body = await resp.json() + detail = error_body.get("detail", {}) + if isinstance(detail, dict): + return detail.get("message", str(detail)) + return str(detail) + except Exception: + return await resp.text() + + +class SoniloExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [SoniloVideoToMusic, SoniloTextToMusic] + + +async def comfy_entrypoint() -> SoniloExtension: + return SoniloExtension() From fed4ac031a4a34ea5c76e41e80dddd9f048b22b6 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Tue, 14 Apr 2026 14:24:37 +0800 Subject: [PATCH 09/14] chore: update workflow templates to v0.9.50 (#13399) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 393c1e1ff..7e8dac182 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.42.10 -comfyui-workflow-templates==0.9.47 +comfyui-workflow-templates==0.9.50 comfyui-embedded-docs==0.4.3 torch torchsde From c16db7fd6978eee76fca65626c02e61eaedb5e72 Mon Sep 17 00:00:00 2001 From: Comfy Org PR Bot Date: Wed, 15 Apr 2026 03:13:35 +0900 Subject: [PATCH 10/14] Bump comfyui-frontend-package to 1.42.11 (#13398) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7e8dac182..7f065e0d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.42.10 +comfyui-frontend-package==1.42.11 comfyui-workflow-templates==0.9.50 comfyui-embedded-docs==0.4.3 torch From c5569e862794c419094ee5c9d5ad224634b9ddd6 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 14 Apr 2026 11:42:23 -0700 Subject: [PATCH 11/14] Add string output to preview text node. (#13406) --- comfy_extras/nodes_preview_any.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy_extras/nodes_preview_any.py b/comfy_extras/nodes_preview_any.py index b0a6f279d..0a1558f2b 100644 --- a/comfy_extras/nodes_preview_any.py +++ b/comfy_extras/nodes_preview_any.py @@ -11,7 +11,7 @@ class PreviewAny(): "required": {"source": (IO.ANY, {})}, } - RETURN_TYPES = () + RETURN_TYPES = (IO.STRING,) FUNCTION = "main" OUTPUT_NODE = True @@ -33,7 +33,7 @@ class PreviewAny(): except Exception: value = 'source exists, but could not be serialized.' - return {"ui": {"text": (value,)}} + return {"ui": {"text": (value,)}, "result": (value,)} NODE_CLASS_MAPPINGS = { "PreviewAny": PreviewAny, From 7ce3f64c784430e15731d344affffb48c55a0eaa Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Wed, 15 Apr 2026 08:35:27 +0800 Subject: [PATCH 12/14] Update workflow templates to v0.9.54 (#13412) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7f065e0d4..e45a20aaf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.42.11 -comfyui-workflow-templates==0.9.50 +comfyui-workflow-templates==0.9.54 comfyui-embedded-docs==0.4.3 torch torchsde From cb0bbde402cfb72559cc8b00f679d7735dff5c40 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 14 Apr 2026 19:54:47 -0700 Subject: [PATCH 13/14] Fix ernie on devices that don't support fp64. (#13414) --- comfy/ldm/ernie/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/ldm/ernie/model.py b/comfy/ldm/ernie/model.py index 3dbab8dc0..f7cdb51e6 100644 --- a/comfy/ldm/ernie/model.py +++ b/comfy/ldm/ernie/model.py @@ -15,7 +15,7 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: scale = torch.arange(0, dim, 2, dtype=torch.float64, device=device) / dim omega = 1.0 / (theta**scale) - out = torch.einsum("...n,d->...nd", pos, omega) + out = torch.einsum("...n,d->...nd", pos.to(device), omega) out = torch.stack([torch.cos(out), torch.sin(out)], dim=0) return out.to(dtype=torch.float32, device=pos.device) From 8f374716ee98d378d403ebc61250e091ecd3a25c Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 14 Apr 2026 22:56:13 -0400 Subject: [PATCH 14/14] ComfyUI v0.19.1 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index 0da11d5fa..3c6dac3d9 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.19.0" +__version__ = "0.19.1" diff --git a/pyproject.toml b/pyproject.toml index e8d4a9742..006ed9985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.19.0" +version = "0.19.1" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10"