From 349a636a2b0f15aba2930b9af905bb805d2fe30b Mon Sep 17 00:00:00 2001 From: ComfyUI Wiki Date: Tue, 10 Feb 2026 10:25:34 +0800 Subject: [PATCH 01/10] chore: update workflow templates to v0.8.37 (#12377) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4fda07fde..4e2773f5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.38.13 -comfyui-workflow-templates==0.8.31 +comfyui-workflow-templates==0.8.37 comfyui-embedded-docs==0.4.1 torch torchsde From c1b63a7e78b606bc14cd49a02e9338274db28a60 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Tue, 10 Feb 2026 04:58:27 +0200 Subject: [PATCH 02/10] fix(Moonvalley-API-Nodes): adjust "steps" parameter to not raise exception (#12370) --- comfy_api_nodes/nodes_moonvalley.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py index 08315fa2b..78a230529 100644 --- a/comfy_api_nodes/nodes_moonvalley.py +++ b/comfy_api_nodes/nodes_moonvalley.py @@ -219,8 +219,8 @@ class MoonvalleyImg2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=80, + min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) max=100, step=1, tooltip="Number of denoising steps", @@ -340,8 +340,8 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=60, + min=60, # steps should be greater or equal to cooldown_steps(36) + warmup_steps(24) max=100, step=1, display_mode=IO.NumberDisplay.number, @@ -370,7 +370,7 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode): video: Input.Video | None = None, control_type: str = "Motion Transfer", motion_intensity: int | None = 100, - steps=33, + steps=60, prompt_adherence=4.5, ) -> IO.NodeOutput: validated_video = validate_video_to_video_input(video) @@ -465,8 +465,8 @@ class MoonvalleyTxt2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=80, + min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) max=100, step=1, tooltip="Inference steps", From 8ca842a8edb26006e730e631ec1153cd42f46d3b Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Tue, 10 Feb 2026 19:34:54 +0200 Subject: [PATCH 03/10] feat(api-nodes-Kling): add new models (V3, O3) (#12389) * feat(api-nodes-Kling): add new models (V3, O3) * remove storyboard from VideoToVideo node * added check for total duration of storyboards * fixed other small things * updated display name for nodes * added "fake" seed --- comfy_api_nodes/apis/__init__.py | 8 +- comfy_api_nodes/apis/kling.py | 46 +- comfy_api_nodes/nodes_kling.py | 764 ++++++++++++++++++++++++++++--- 3 files changed, 750 insertions(+), 68 deletions(-) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py index ee2aa1ce6..46a583b5e 100644 --- a/comfy_api_nodes/apis/__init__.py +++ b/comfy_api_nodes/apis/__init__.py @@ -1197,12 +1197,6 @@ class KlingImageGenImageReferenceType(str, Enum): face = 'face' -class KlingImageGenModelName(str, Enum): - kling_v1 = 'kling-v1' - kling_v1_5 = 'kling-v1-5' - kling_v2 = 'kling-v2' - - class KlingImageGenerationsRequest(BaseModel): aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9' callback_url: Optional[AnyUrl] = Field( @@ -1218,7 +1212,7 @@ class KlingImageGenerationsRequest(BaseModel): 0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0 ) image_reference: Optional[KlingImageGenImageReferenceType] = None - model_name: Optional[KlingImageGenModelName] = 'kling-v1' + model_name: str = Field(...) n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9) negative_prompt: Optional[str] = Field( None, description='Negative text prompt', max_length=200 diff --git a/comfy_api_nodes/apis/kling.py b/comfy_api_nodes/apis/kling.py index bf54ede3e..9c0446075 100644 --- a/comfy_api_nodes/apis/kling.py +++ b/comfy_api_nodes/apis/kling.py @@ -1,12 +1,22 @@ from pydantic import BaseModel, Field +class MultiPromptEntry(BaseModel): + index: int = Field(...) + prompt: str = Field(...) + duration: str = Field(...) + + class OmniProText2VideoRequest(BaseModel): model_name: str = Field(..., description="kling-video-o1") aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'") duration: str = Field(..., description="'5' or '10'") prompt: str = Field(...) mode: str = Field("pro") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) + sound: str = Field(..., description="'on' or 'off'") class OmniParamImage(BaseModel): @@ -26,6 +36,10 @@ class OmniProFirstLastFrameRequest(BaseModel): duration: str = Field(..., description="'5' or '10'") prompt: str = Field(...) mode: str = Field("pro") + sound: str | None = Field(None, description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class OmniProReferences2VideoRequest(BaseModel): @@ -38,6 +52,10 @@ class OmniProReferences2VideoRequest(BaseModel): duration: str | None = Field(..., description="From 3 to 10.") prompt: str = Field(...) mode: str = Field("pro") + sound: str | None = Field(None, description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class TaskStatusVideoResult(BaseModel): @@ -54,6 +72,7 @@ class TaskStatusImageResult(BaseModel): class TaskStatusResults(BaseModel): videos: list[TaskStatusVideoResult] | None = Field(None) images: list[TaskStatusImageResult] | None = Field(None) + series_images: list[TaskStatusImageResult] | None = Field(None) class TaskStatusResponseData(BaseModel): @@ -77,31 +96,42 @@ class OmniImageParamImage(BaseModel): class OmniProImageRequest(BaseModel): - model_name: str = Field(..., description="kling-image-o1") - resolution: str = Field(..., description="'1k' or '2k'") + model_name: str = Field(...) + resolution: str = Field(...) aspect_ratio: str | None = Field(...) prompt: str = Field(...) mode: str = Field("pro") n: int | None = Field(1, le=9) image_list: list[OmniImageParamImage] | None = Field(..., max_length=10) + result_type: str | None = Field(None, description="Set to 'series' for series generation") + series_amount: int | None = Field(None, ge=2, le=9, description="Number of images in a series") class TextToVideoWithAudioRequest(BaseModel): - model_name: str = Field(..., description="kling-v2-6") + model_name: str = Field(...) aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'") - duration: str = Field(..., description="'5' or '10'") - prompt: str = Field(...) + duration: str = Field(...) + prompt: str | None = Field(...) + negative_prompt: str | None = Field(None) mode: str = Field("pro") sound: str = Field(..., description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class ImageToVideoWithAudioRequest(BaseModel): - model_name: str = Field(..., description="kling-v2-6") + model_name: str = Field(...) image: str = Field(...) - duration: str = Field(..., description="'5' or '10'") - prompt: str = Field(...) + image_tail: str | None = Field(None) + duration: str = Field(...) + prompt: str | None = Field(...) + negative_prompt: str | None = Field(None) mode: str = Field("pro") sound: str = Field(..., description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class MotionControlRequest(BaseModel): diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index 739fe1855..b89c85561 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -38,7 +38,6 @@ from comfy_api_nodes.apis import ( KlingImageGenerationsRequest, KlingImageGenerationsResponse, KlingImageGenImageReferenceType, - KlingImageGenModelName, KlingImageGenAspectRatio, KlingVideoEffectsRequest, KlingVideoEffectsResponse, @@ -52,6 +51,7 @@ from comfy_api_nodes.apis import ( from comfy_api_nodes.apis.kling import ( ImageToVideoWithAudioRequest, MotionControlRequest, + MultiPromptEntry, OmniImageParamImage, OmniParamImage, OmniParamVideo, @@ -71,6 +71,7 @@ from comfy_api_nodes.util import ( sync_op, tensor_to_base64_string, upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_images_to_comfyapi, upload_video_to_comfyapi, validate_image_aspect_ratio, @@ -80,6 +81,31 @@ from comfy_api_nodes.util import ( validate_video_duration, ) + +def _generate_storyboard_inputs(count: int) -> list: + inputs = [] + for i in range(1, count + 1): + inputs.extend( + [ + IO.String.Input( + f"storyboard_{i}_prompt", + multiline=True, + default="", + tooltip=f"Prompt for storyboard segment {i}. Max 512 characters.", + ), + IO.Int.Input( + f"storyboard_{i}_duration", + default=4, + min=1, + max=15, + display_mode=IO.NumberDisplay.slider, + tooltip=f"Duration for storyboard segment {i} in seconds.", + ), + ] + ) + return inputs + + KLING_API_VERSION = "v1" PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video" PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video" @@ -820,20 +846,48 @@ class OmniProTextToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProTextToVideoNode", - display_name="Kling Omni Text to Video (Pro)", + display_name="Kling 3.0 Omni Text to Video", category="api node/video/Kling", description="Use text prompts to generate videos with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), - IO.Combo.Input("duration", options=[5, 10]), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Ignored for o1 model.", + optional=True, + ), + IO.Boolean.Input("generate_audio", default=False, optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -845,11 +899,15 @@ class OmniProTextToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -864,8 +922,45 @@ class OmniProTextToVideoNode(IO.ComfyNode): aspect_ratio: str, duration: int, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: - validate_string(prompt, min_length=1, max_length=2500) + _ = seed + if model_name == "kling-video-o1": + if duration not in (5, 10): + raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"), @@ -876,6 +971,10 @@ class OmniProTextToVideoNode(IO.ComfyNode): aspect_ratio=aspect_ratio, duration=str(duration), mode="pro" if resolution == "1080p" else "std", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, + sound="on" if generate_audio else "off", ), ) return await finish_omni_video_task(cls, response) @@ -887,24 +986,26 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProFirstLastFrameNode", - display_name="Kling Omni First-Last-Frame to Video (Pro)", + display_name="Kling 3.0 Omni First-Last-Frame to Video", category="api node/video/Kling", description="Use a start frame, an optional end frame, or reference images with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), - IO.Int.Input("duration", default=5, min=3, max=10, display_mode=IO.NumberDisplay.slider), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Image.Input("first_frame"), IO.Image.Input( "end_frame", optional=True, tooltip="An optional end frame for the video. " - "This cannot be used simultaneously with 'reference_images'.", + "This cannot be used simultaneously with 'reference_images'. " + "Does not work with storyboards.", ), IO.Image.Input( "reference_images", @@ -912,6 +1013,38 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): tooltip="Up to 6 additional reference images.", ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Only supported for kling-v3-omni.", + optional=True, + ), + IO.Boolean.Input( + "generate_audio", + default=False, + optional=True, + tooltip="Generate audio for the video. Only supported for kling-v3-omni.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -923,11 +1056,15 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -944,15 +1081,59 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): end_frame: Input.Image | None = None, reference_images: Input.Image | None = None, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-video-o1": + if duration > 10: + raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") prompt = normalize_omni_prompt_references(prompt) - validate_string(prompt, min_length=1, max_length=2500) + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) if end_frame is not None and reference_images is not None: raise ValueError("The 'end_frame' input cannot be used simultaneously with 'reference_images'.") - if duration not in (5, 10) and end_frame is None and reference_images is None: + if end_frame is not None and stories_enabled: + raise ValueError("The 'end_frame' input cannot be used simultaneously with storyboards.") + if ( + model_name == "kling-video-o1" + and duration not in (5, 10) + and end_frame is None + and reference_images is None + ): raise ValueError( "Duration is only supported for 5 or 10 seconds if there is no end frame or reference images." ) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + validate_image_dimensions(first_frame, min_width=300, min_height=300) validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1)) image_list: list[OmniParamImage] = [ @@ -988,6 +1169,10 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): duration=str(duration), image_list=image_list, mode="pro" if resolution == "1080p" else "std", + sound="on" if generate_audio else "off", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, ), ) return await finish_omni_video_task(cls, response) @@ -999,24 +1184,57 @@ class OmniProImageToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProImageToVideoNode", - display_name="Kling Omni Image to Video (Pro)", + display_name="Kling 3.0 Omni Image to Video", category="api node/video/Kling", description="Use up to 7 reference images to generate a video with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), - IO.Int.Input("duration", default=3, min=3, max=10, display_mode=IO.NumberDisplay.slider), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Image.Input( "reference_images", tooltip="Up to 7 reference images.", ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Only supported for kling-v3-omni.", + optional=True, + ), + IO.Boolean.Input( + "generate_audio", + default=False, + optional=True, + tooltip="Generate audio for the video. Only supported for kling-v3-omni.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1028,11 +1246,15 @@ class OmniProImageToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -1048,9 +1270,46 @@ class OmniProImageToVideoNode(IO.ComfyNode): duration: int, reference_images: Input.Image, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-video-o1": + if duration > 10: + raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") prompt = normalize_omni_prompt_references(prompt) - validate_string(prompt, min_length=1, max_length=2500) + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + if get_number_of_images(reference_images) > 7: raise ValueError("The maximum number of reference images is 7.") for i in reference_images: @@ -1070,6 +1329,10 @@ class OmniProImageToVideoNode(IO.ComfyNode): duration=str(duration), image_list=image_list, mode="pro" if resolution == "1080p" else "std", + sound="on" if generate_audio else "off", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, ), ) return await finish_omni_video_task(cls, response) @@ -1081,11 +1344,11 @@ class OmniProVideoToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProVideoToVideoNode", - display_name="Kling Omni Video to Video (Pro)", + display_name="Kling 3.0 Omni Video to Video", category="api node/video/Kling", description="Use a video and up to 4 reference images to generate a video with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, @@ -1102,6 +1365,17 @@ class OmniProVideoToVideoNode(IO.ComfyNode): optional=True, ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1135,7 +1409,9 @@ class OmniProVideoToVideoNode(IO.ComfyNode): keep_original_sound: bool, reference_images: Input.Image | None = None, resolution: str = "1080p", + seed: int = 0, ) -> IO.NodeOutput: + _ = seed prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) validate_video_duration(reference_video, min_duration=3.0, max_duration=10.05) @@ -1179,11 +1455,11 @@ class OmniProEditVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProEditVideoNode", - display_name="Kling Omni Edit Video (Pro)", + display_name="Kling 3.0 Omni Edit Video", category="api node/video/Kling", description="Edit an existing video with the latest model from Kling.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, @@ -1198,6 +1474,17 @@ class OmniProEditVideoNode(IO.ComfyNode): optional=True, ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1229,7 +1516,9 @@ class OmniProEditVideoNode(IO.ComfyNode): keep_original_sound: bool, reference_images: Input.Image | None = None, resolution: str = "1080p", + seed: int = 0, ) -> IO.NodeOutput: + _ = seed prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) validate_video_duration(video, min_duration=3.0, max_duration=10.05) @@ -1273,27 +1562,43 @@ class OmniProImageNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProImageNode", - display_name="Kling Omni Image (Pro)", + display_name="Kling 3.0 Omni Image", category="api node/image/Kling", description="Create or edit images with the latest model from Kling.", inputs=[ - IO.Combo.Input("model_name", options=["kling-image-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-image-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the image content. " "This can include both positive and negative descriptions.", ), - IO.Combo.Input("resolution", options=["1K", "2K"]), + IO.Combo.Input("resolution", options=["1K", "2K", "4K"]), IO.Combo.Input( "aspect_ratio", options=["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3", "21:9"], ), + IO.Combo.Input( + "series_amount", + options=["disabled", "2", "3", "4", "5", "6", "7", "8", "9"], + tooltip="Generate a series of images. Not supported for kling-image-o1.", + ), IO.Image.Input( "reference_images", tooltip="Up to 10 additional reference images.", optional=True, ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Image.Output(), @@ -1305,7 +1610,16 @@ class OmniProImageNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.028}""", + depends_on=IO.PriceBadgeDepends(widgets=["resolution", "series_amount", "model_name"]), + expr=""" + ( + $prices := {"1k": 0.028, "2k": 0.028, "4k": 0.056}; + $base := $lookup($prices, widgets.resolution); + $isO1 := widgets.model_name = "kling-image-o1"; + $mult := ($isO1 or widgets.series_amount = "disabled") ? 1 : $number(widgets.series_amount); + {"type":"usd","usd": $base * $mult} + ) + """, ), ) @@ -1316,8 +1630,13 @@ class OmniProImageNode(IO.ComfyNode): prompt: str, resolution: str, aspect_ratio: str, + series_amount: str = "disabled", reference_images: Input.Image | None = None, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-image-o1" and resolution == "4K": + raise ValueError("4K resolution is not supported for kling-image-o1 model.") prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) image_list: list[OmniImageParamImage] = [] @@ -1329,6 +1648,9 @@ class OmniProImageNode(IO.ComfyNode): validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1)) for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"): image_list.append(OmniImageParamImage(image=i)) + use_series = series_amount != "disabled" + if use_series and model_name == "kling-image-o1": + raise ValueError("kling-image-o1 does not support series generation.") response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"), @@ -1339,6 +1661,8 @@ class OmniProImageNode(IO.ComfyNode): resolution=resolution.lower(), aspect_ratio=aspect_ratio, image_list=image_list if image_list else None, + result_type="series" if use_series else None, + series_amount=int(series_amount) if use_series else None, ), ) if response.code: @@ -1351,7 +1675,9 @@ class OmniProImageNode(IO.ComfyNode): response_model=TaskStatusResponse, status_extractor=lambda r: (r.data.task_status if r.data else None), ) - return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url)) + images = final_response.data.task_result.series_images or final_response.data.task_result.images + tensors = [await download_url_to_image_tensor(img.url) for img in images] + return IO.NodeOutput(torch.cat(tensors, dim=0)) class KlingCameraControlT2VNode(IO.ComfyNode): @@ -2119,7 +2445,7 @@ class KlingImageGenerationNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingImageGenerationNode", - display_name="Kling Image Generation", + display_name="Kling 3.0 Image", category="api node/image/Kling", description="Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.", inputs=[ @@ -2147,11 +2473,7 @@ class KlingImageGenerationNode(IO.ComfyNode): display_mode=IO.NumberDisplay.slider, tooltip="Subject reference similarity", ), - IO.Combo.Input( - "model_name", - options=[i.value for i in KlingImageGenModelName], - default="kling-v2", - ), + IO.Combo.Input("model_name", options=["kling-v3", "kling-v2", "kling-v1-5"]), IO.Combo.Input( "aspect_ratio", options=[i.value for i in KlingImageGenAspectRatio], @@ -2165,6 +2487,17 @@ class KlingImageGenerationNode(IO.ComfyNode): tooltip="Number of generated images", ), IO.Image.Input("image", optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Image.Output(), @@ -2183,7 +2516,7 @@ class KlingImageGenerationNode(IO.ComfyNode): $base := $contains($m,"kling-v1-5") ? (inputs.image.connected ? 0.028 : 0.014) - : ($contains($m,"kling-v1") ? 0.0035 : 0.014); + : $contains($m,"kling-v3") ? 0.028 : 0.014; {"type":"usd","usd": $base * widgets.n} ) """, @@ -2193,7 +2526,7 @@ class KlingImageGenerationNode(IO.ComfyNode): @classmethod async def execute( cls, - model_name: KlingImageGenModelName, + model_name: str, prompt: str, negative_prompt: str, image_type: KlingImageGenImageReferenceType, @@ -2202,17 +2535,11 @@ class KlingImageGenerationNode(IO.ComfyNode): n: int, aspect_ratio: KlingImageGenAspectRatio, image: torch.Tensor | None = None, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed validate_string(prompt, field_name="prompt", min_length=1, max_length=MAX_PROMPT_LENGTH_IMAGE_GEN) validate_string(negative_prompt, field_name="negative_prompt", max_length=MAX_PROMPT_LENGTH_IMAGE_GEN) - - if image is None: - image_type = None - elif model_name == KlingImageGenModelName.kling_v1: - raise ValueError(f"The model {KlingImageGenModelName.kling_v1.value} does not support reference images.") - else: - image = tensor_to_base64_string(image) - task_creation_response = await sync_op( cls, ApiEndpoint(path=PATH_IMAGE_GENERATIONS, method="POST"), @@ -2221,8 +2548,8 @@ class KlingImageGenerationNode(IO.ComfyNode): model_name=model_name, prompt=prompt, negative_prompt=negative_prompt, - image=image, - image_reference=image_type, + image=tensor_to_base64_string(image) if image is not None else None, + image_reference=image_type if image is not None else None, image_fidelity=image_fidelity, human_fidelity=human_fidelity, n=n, @@ -2252,7 +2579,7 @@ class TextToVideoWithAudio(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingTextToVideoWithAudio", - display_name="Kling Text to Video with Audio", + display_name="Kling 2.6 Text to Video with Audio", category="api node/video/Kling", inputs=[ IO.Combo.Input("model_name", options=["kling-v2-6"]), @@ -2320,7 +2647,7 @@ class ImageToVideoWithAudio(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingImageToVideoWithAudio", - display_name="Kling Image(First Frame) to Video with Audio", + display_name="Kling 2.6 Image(First Frame) to Video with Audio", category="api node/video/Kling", inputs=[ IO.Combo.Input("model_name", options=["kling-v2-6"]), @@ -2478,6 +2805,335 @@ class MotionControl(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) +class KlingVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="KlingVideoNode", + display_name="Kling 3.0 Video", + category="api node/video/Kling", + description="Generate videos with Kling V3. " + "Supports text-to-video and image-to-video with optional storyboard multi-prompt and audio generation.", + inputs=[ + IO.DynamicCombo.Input( + "multi_shot", + options=[ + IO.DynamicCombo.Option( + "disabled", + [ + IO.String.Input("prompt", multiline=True, default=""), + IO.String.Input("negative_prompt", multiline=True, default=""), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + display_mode=IO.NumberDisplay.slider, + ), + ], + ), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations.", + ), + IO.Boolean.Input("generate_audio", default=True), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "kling-v3", + [ + IO.Combo.Input("resolution", options=["1080p", "720p"]), + IO.Combo.Input( + "aspect_ratio", + options=["16:9", "9:16", "1:1"], + tooltip="Ignored in image-to-video mode.", + ), + ], + ), + ], + tooltip="Model and generation settings.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Image.Input( + "start_frame", + optional=True, + tooltip="Optional start frame image. When connected, switches to image-to-video mode.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "model.resolution", + "generate_audio", + "multi_shot", + "multi_shot.duration", + "multi_shot.storyboard_1_duration", + "multi_shot.storyboard_2_duration", + "multi_shot.storyboard_3_duration", + "multi_shot.storyboard_4_duration", + "multi_shot.storyboard_5_duration", + "multi_shot.storyboard_6_duration", + ], + ), + expr=""" + ( + $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $res := $lookup(widgets, "model.resolution"); + $audio := widgets.generate_audio ? "on" : "off"; + $rate := $lookup($lookup($rates, $res), $audio); + $ms := widgets.multi_shot; + $isSb := $ms != "disabled"; + $n := $isSb ? $number($substring($ms, 0, 1)) : 0; + $d1 := $lookup(widgets, "multi_shot.storyboard_1_duration"); + $d2 := $n >= 2 ? $lookup(widgets, "multi_shot.storyboard_2_duration") : 0; + $d3 := $n >= 3 ? $lookup(widgets, "multi_shot.storyboard_3_duration") : 0; + $d4 := $n >= 4 ? $lookup(widgets, "multi_shot.storyboard_4_duration") : 0; + $d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0; + $d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0; + $dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration"); + {"type":"usd","usd": $rate * $dur} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + multi_shot: dict, + generate_audio: bool, + model: dict, + seed: int, + start_frame: Input.Image | None = None, + ) -> IO.NodeOutput: + _ = seed + mode = "pro" if model["resolution"] == "1080p" else "std" + custom_multi_shot = False + if multi_shot["multi_shot"] == "disabled": + shot_type = None + else: + shot_type = "customize" + custom_multi_shot = True + + multi_prompt_list = None + if shot_type == "customize": + count = int(multi_shot["multi_shot"].split()[0]) + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = multi_shot[f"storyboard_{i}_prompt"] + sb_duration = multi_shot[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + duration = sum(int(e.duration) for e in multi_prompt_list) + if duration < 3 or duration > 15: + raise ValueError( + f"Total storyboard duration ({duration}s) must be between 3 and 15 seconds." + ) + else: + duration = multi_shot["duration"] + validate_string(multi_shot["prompt"], min_length=1, max_length=2500) + + if start_frame is not None: + validate_image_dimensions(start_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1)) + image_url = await upload_image_to_comfyapi(cls, start_frame, wait_label="Uploading start frame") + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"), + response_model=TaskStatusResponse, + data=ImageToVideoWithAudioRequest( + model_name=model["model"], + image=image_url, + prompt=None if custom_multi_shot else multi_shot["prompt"], + negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"], + mode=mode, + duration=str(duration), + sound="on" if generate_audio else "off", + multi_shot=True if shot_type else None, + multi_prompt=multi_prompt_list, + shot_type=shot_type, + ), + ) + poll_path = f"/proxy/kling/v1/videos/image2video/{response.data.task_id}" + else: + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/text2video", method="POST"), + response_model=TaskStatusResponse, + data=TextToVideoWithAudioRequest( + model_name=model["model"], + aspect_ratio=model["aspect_ratio"], + prompt=None if custom_multi_shot else multi_shot["prompt"], + negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"], + mode=mode, + duration=str(duration), + sound="on" if generate_audio else "off", + multi_shot=True if shot_type else None, + multi_prompt=multi_prompt_list, + shot_type=shot_type, + ), + ) + poll_path = f"/proxy/kling/v1/videos/text2video/{response.data.task_id}" + + if response.code: + raise RuntimeError( + f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=poll_path), + response_model=TaskStatusResponse, + status_extractor=lambda r: (r.data.task_status if r.data else None), + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) + + +class KlingFirstLastFrameNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="KlingFirstLastFrameNode", + display_name="Kling 3.0 First-Last-Frame to Video", + category="api node/video/Kling", + description="Generate videos with Kling V3 using first and last frames.", + inputs=[ + IO.String.Input("prompt", multiline=True, default=""), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + display_mode=IO.NumberDisplay.slider, + ), + IO.Image.Input("first_frame"), + IO.Image.Input("end_frame"), + IO.Boolean.Input("generate_audio", default=True), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "kling-v3", + [ + IO.Combo.Input("resolution", options=["1080p", "720p"]), + ], + ), + ], + tooltip="Model and generation settings.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model.resolution", "generate_audio", "duration"], + ), + expr=""" + ( + $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $res := $lookup(widgets, "model.resolution"); + $audio := widgets.generate_audio ? "on" : "off"; + $rate := $lookup($lookup($rates, $res), $audio); + {"type":"usd","usd": $rate * widgets.duration} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + duration: int, + first_frame: Input.Image, + end_frame: Input.Image, + generate_audio: bool, + model: dict, + seed: int, + ) -> IO.NodeOutput: + _ = seed + validate_string(prompt, min_length=1, max_length=2500) + validate_image_dimensions(first_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1)) + validate_image_dimensions(end_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1)) + image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame") + image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame") + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"), + response_model=TaskStatusResponse, + data=ImageToVideoWithAudioRequest( + model_name=model["model"], + image=image_url, + image_tail=image_tail_url, + prompt=prompt, + mode="pro" if model["resolution"] == "1080p" else "std", + duration=str(duration), + sound="on" if generate_audio else "off", + ), + ) + if response.code: + raise RuntimeError( + f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: (r.data.task_status if r.data else None), + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) + + class KlingExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -2504,6 +3160,8 @@ class KlingExtension(ComfyExtension): TextToVideoWithAudio, ImageToVideoWithAudio, MotionControl, + KlingVideoNode, + KlingFirstLastFrameNode, ] From 6615db925c9f84843e29db118852e14b643a1a03 Mon Sep 17 00:00:00 2001 From: ComfyUI Wiki Date: Wed, 11 Feb 2026 02:24:56 +0800 Subject: [PATCH 04/10] chore: update workflow templates to v0.8.38 (#12394) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4e2773f5d..7de6a413c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.38.13 -comfyui-workflow-templates==0.8.37 +comfyui-workflow-templates==0.8.38 comfyui-embedded-docs==0.4.1 torch torchsde From 6648ab68bc934a185c90a2a872c87dc64d093751 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 10 Feb 2026 13:26:29 -0500 Subject: [PATCH 05/10] ComfyUI v0.13.0 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index 706b37763..cf4e89816 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.12.3" +__version__ = "0.13.0" diff --git a/pyproject.toml b/pyproject.toml index f7925b92a..9dab9a50c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.12.3" +version = "0.13.0" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.10" From fe053ba5eb34c8abcc5d17a25c114340af1833aa Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Tue, 10 Feb 2026 10:37:17 -0800 Subject: [PATCH 06/10] mp: dont deep-clone objects from model_options (#12382) If there are non-trivial python objects nested in the model_options, this causes all sorts of issues. Traverse lists and dicts so clones can safely overide settings and BYO objects but stop there on the deepclone. --- comfy/model_patcher.py | 3 +-- comfy/utils.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index b9a117a7c..19c9031ea 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -19,7 +19,6 @@ from __future__ import annotations import collections -import copy import inspect import logging import math @@ -317,7 +316,7 @@ class ModelPatcher: n.object_patches = self.object_patches.copy() n.weight_wrapper_patches = self.weight_wrapper_patches.copy() - n.model_options = copy.deepcopy(self.model_options) + n.model_options = comfy.utils.deepcopy_list_dict(self.model_options) n.backup = self.backup n.object_patches_backup = self.object_patches_backup n.parent = self diff --git a/comfy/utils.py b/comfy/utils.py index 1337e2205..edd80cebe 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -1376,3 +1376,21 @@ def string_to_seed(data): else: crc >>= 1 return crc ^ 0xFFFFFFFF + +def deepcopy_list_dict(obj, memo=None): + if memo is None: + memo = {} + + obj_id = id(obj) + if obj_id in memo: + return memo[obj_id] + + if isinstance(obj, dict): + res = {deepcopy_list_dict(k, memo): deepcopy_list_dict(v, memo) for k, v in obj.items()} + elif isinstance(obj, list): + res = [deepcopy_list_dict(i, memo) for i in obj] + else: + res = obj + + memo[obj_id] = res + return res From f719f9c06266e7944683009b403e995d4c61d5f0 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Tue, 10 Feb 2026 10:37:46 -0800 Subject: [PATCH 07/10] sd: delay VAE dtype archive until after override (#12388) VAEs have host specific dtype logic that should override the dynamic _model_dtype. Defer the archiving of model dtypes until after. --- comfy/sd.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index bc9407405..f65e7cadd 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -793,8 +793,6 @@ class VAE: self.first_stage_model = AutoencoderKL(**(config['params'])) self.first_stage_model = self.first_stage_model.eval() - model_management.archive_model_dtypes(self.first_stage_model) - if device is None: device = model_management.vae_device() self.device = device @@ -803,6 +801,7 @@ class VAE: dtype = model_management.vae_dtype(self.device, self.working_dtypes) self.vae_dtype = dtype self.first_stage_model.to(self.vae_dtype) + model_management.archive_model_dtypes(self.first_stage_model) self.output_device = model_management.intermediate_device() mp = comfy.model_patcher.CoreModelPatcher From 123a7874a97c4a8b8f06d4b7c2b1a566b8f0d057 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Tue, 10 Feb 2026 10:38:28 -0800 Subject: [PATCH 08/10] ops: Fix vanilla-fp8 loaded lora quality (#12390) This was missing the stochastic rounding required for fp8 downcast to be consistent with model_patcher.patch_weight_to_device. Missed in testing as I spend too much time with quantized tensors and overlooked the simpler ones. --- comfy/ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy/ops.py b/comfy/ops.py index ea0d70702..33803b223 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -169,8 +169,8 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu if orig.dtype == dtype and len(fns) == 0: #The layer actually wants our freshly saved QT x = y - else: - y = x + elif update_weight: + y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key)) if update_weight: orig.copy_(y) for f in fns: From 00fff6019ecf0f4306005579e93cef0cd51a3a1c Mon Sep 17 00:00:00 2001 From: guill Date: Tue, 10 Feb 2026 14:37:14 -0800 Subject: [PATCH 09/10] feat(jobs): add 3d to PREVIEWABLE_MEDIA_TYPES for first-class 3D output support (#12381) Co-authored-by: Jedrzej Kosinski --- comfy_execution/jobs.py | 79 +++++++++++-- tests/execution/test_jobs.py | 208 ++++++++++++++++++++++++++++++++++- 2 files changed, 271 insertions(+), 16 deletions(-) diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py index bf091a448..370014fb6 100644 --- a/comfy_execution/jobs.py +++ b/comfy_execution/jobs.py @@ -20,10 +20,60 @@ class JobStatus: # Media types that can be previewed in the frontend -PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio'}) +PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio', '3d'}) # 3D file extensions for preview fallback (no dedicated media_type exists) -THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb'}) +THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb', '.usdz'}) + + +def has_3d_extension(filename: str) -> bool: + lower = filename.lower() + return any(lower.endswith(ext) for ext in THREE_D_EXTENSIONS) + + +def normalize_output_item(item): + """Normalize a single output list item for the jobs API. + + Returns the normalized item, or None to exclude it. + String items with 3D extensions become {filename, type, subfolder} dicts. + """ + if item is None: + return None + if isinstance(item, str): + if has_3d_extension(item): + return {'filename': item, 'type': 'output', 'subfolder': '', 'mediaType': '3d'} + return None + if isinstance(item, dict): + return item + return None + + +def normalize_outputs(outputs: dict) -> dict: + """Normalize raw node outputs for the jobs API. + + Transforms string 3D filenames into file output dicts and removes + None items. All other items (non-3D strings, dicts, etc.) are + preserved as-is. + """ + normalized = {} + for node_id, node_outputs in outputs.items(): + if not isinstance(node_outputs, dict): + normalized[node_id] = node_outputs + continue + normalized_node = {} + for media_type, items in node_outputs.items(): + if media_type == 'animated' or not isinstance(items, list): + normalized_node[media_type] = items + continue + normalized_items = [] + for item in items: + if item is None: + continue + norm = normalize_output_item(item) + normalized_items.append(norm if norm is not None else item) + normalized_node[media_type] = normalized_items + normalized[node_id] = normalized_node + return normalized def _extract_job_metadata(extra_data: dict) -> tuple[Optional[int], Optional[str]]: @@ -45,9 +95,9 @@ def is_previewable(media_type: str, item: dict) -> bool: Maintains backwards compatibility with existing logic. Priority: - 1. media_type is 'images', 'video', or 'audio' + 1. media_type is 'images', 'video', 'audio', or '3d' 2. format field starts with 'video/' or 'audio/' - 3. filename has a 3D extension (.obj, .fbx, .gltf, .glb) + 3. filename has a 3D extension (.obj, .fbx, .gltf, .glb, .usdz) """ if media_type in PREVIEWABLE_MEDIA_TYPES: return True @@ -139,7 +189,7 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs: }) if include_outputs: - job['outputs'] = outputs + job['outputs'] = normalize_outputs(outputs) job['execution_status'] = status_info job['workflow'] = { 'prompt': prompt, @@ -171,18 +221,23 @@ def get_outputs_summary(outputs: dict) -> tuple[int, Optional[dict]]: continue for item in items: - count += 1 - - if not isinstance(item, dict): + normalized = normalize_output_item(item) + if normalized is None: continue - if preview_output is None and is_previewable(media_type, item): + count += 1 + + if preview_output is not None: + continue + + if isinstance(normalized, dict) and is_previewable(media_type, normalized): enriched = { - **item, + **normalized, 'nodeId': node_id, - 'mediaType': media_type } - if item.get('type') == 'output': + if 'mediaType' not in normalized: + enriched['mediaType'] = media_type + if normalized.get('type') == 'output': preview_output = enriched elif fallback_preview is None: fallback_preview = enriched diff --git a/tests/execution/test_jobs.py b/tests/execution/test_jobs.py index 4d2f9ed36..83c36fe48 100644 --- a/tests/execution/test_jobs.py +++ b/tests/execution/test_jobs.py @@ -5,8 +5,11 @@ from comfy_execution.jobs import ( is_previewable, normalize_queue_item, normalize_history_item, + normalize_output_item, + normalize_outputs, get_outputs_summary, apply_sorting, + has_3d_extension, ) @@ -35,8 +38,8 @@ class TestIsPreviewable: """Unit tests for is_previewable()""" def test_previewable_media_types(self): - """Images, video, audio media types should be previewable.""" - for media_type in ['images', 'video', 'audio']: + """Images, video, audio, 3d media types should be previewable.""" + for media_type in ['images', 'video', 'audio', '3d']: assert is_previewable(media_type, {}) is True def test_non_previewable_media_types(self): @@ -46,7 +49,7 @@ class TestIsPreviewable: def test_3d_extensions_previewable(self): """3D file extensions should be previewable regardless of media_type.""" - for ext in ['.obj', '.fbx', '.gltf', '.glb']: + for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']: item = {'filename': f'model{ext}'} assert is_previewable('files', item) is True @@ -160,7 +163,7 @@ class TestGetOutputsSummary: def test_3d_files_previewable(self): """3D file extensions should be previewable.""" - for ext in ['.obj', '.fbx', '.gltf', '.glb']: + for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']: outputs = { 'node1': { 'files': [{'filename': f'model{ext}', 'type': 'output'}] @@ -192,6 +195,64 @@ class TestGetOutputsSummary: assert preview['mediaType'] == 'images' assert preview['subfolder'] == 'outputs' + def test_string_3d_filename_creates_preview(self): + """String items with 3D extensions should synthesize a preview (Preview3D node output). + Only the .glb counts — nulls and non-file strings are excluded.""" + outputs = { + 'node1': { + 'result': ['preview3d_abc123.glb', None, None] + } + } + count, preview = get_outputs_summary(outputs) + assert count == 1 + assert preview is not None + assert preview['filename'] == 'preview3d_abc123.glb' + assert preview['mediaType'] == '3d' + assert preview['nodeId'] == 'node1' + assert preview['type'] == 'output' + + def test_string_non_3d_filename_no_preview(self): + """String items without 3D extensions should not create a preview.""" + outputs = { + 'node1': { + 'result': ['data.json', None] + } + } + count, preview = get_outputs_summary(outputs) + assert count == 0 + assert preview is None + + def test_string_3d_filename_used_as_fallback(self): + """String 3D preview should be used when no dict items are previewable.""" + outputs = { + 'node1': { + 'latents': [{'filename': 'latent.safetensors'}], + }, + 'node2': { + 'result': ['model.glb', None] + } + } + count, preview = get_outputs_summary(outputs) + assert preview is not None + assert preview['filename'] == 'model.glb' + assert preview['mediaType'] == '3d' + + +class TestHas3DExtension: + """Unit tests for has_3d_extension()""" + + def test_recognized_extensions(self): + for ext in ['.obj', '.fbx', '.gltf', '.glb', '.usdz']: + assert has_3d_extension(f'model{ext}') is True + + def test_case_insensitive(self): + assert has_3d_extension('MODEL.GLB') is True + assert has_3d_extension('Scene.GLTF') is True + + def test_non_3d_extensions(self): + for name in ['photo.png', 'video.mp4', 'data.json', 'model']: + assert has_3d_extension(name) is False + class TestApplySorting: """Unit tests for apply_sorting()""" @@ -395,3 +456,142 @@ class TestNormalizeHistoryItem: 'prompt': {'nodes': {'1': {}}}, 'extra_data': {'create_time': 1234567890, 'client_id': 'abc'}, } + + def test_include_outputs_normalizes_3d_strings(self): + """Detail view should transform string 3D filenames into file output dicts.""" + history_item = { + 'prompt': ( + 5, + 'prompt-3d', + {'nodes': {}}, + {'create_time': 1234567890}, + ['node1'], + ), + 'status': {'status_str': 'success', 'completed': True, 'messages': []}, + 'outputs': { + 'node1': { + 'result': ['preview3d_abc123.glb', None, None] + } + }, + } + job = normalize_history_item('prompt-3d', history_item, include_outputs=True) + + assert job['outputs_count'] == 1 + result_items = job['outputs']['node1']['result'] + assert len(result_items) == 1 + assert result_items[0] == { + 'filename': 'preview3d_abc123.glb', + 'type': 'output', + 'subfolder': '', + 'mediaType': '3d', + } + + def test_include_outputs_preserves_dict_items(self): + """Detail view normalization should pass dict items through unchanged.""" + history_item = { + 'prompt': ( + 5, + 'prompt-img', + {'nodes': {}}, + {'create_time': 1234567890}, + ['node1'], + ), + 'status': {'status_str': 'success', 'completed': True, 'messages': []}, + 'outputs': { + 'node1': { + 'images': [ + {'filename': 'photo.png', 'type': 'output', 'subfolder': ''}, + ] + } + }, + } + job = normalize_history_item('prompt-img', history_item, include_outputs=True) + + assert job['outputs_count'] == 1 + assert job['outputs']['node1']['images'] == [ + {'filename': 'photo.png', 'type': 'output', 'subfolder': ''}, + ] + + +class TestNormalizeOutputItem: + """Unit tests for normalize_output_item()""" + + def test_none_returns_none(self): + assert normalize_output_item(None) is None + + def test_string_3d_extension_synthesizes_dict(self): + result = normalize_output_item('model.glb') + assert result == {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'} + + def test_string_non_3d_extension_returns_none(self): + assert normalize_output_item('data.json') is None + + def test_string_no_extension_returns_none(self): + assert normalize_output_item('camera_info_string') is None + + def test_dict_passes_through(self): + item = {'filename': 'test.png', 'type': 'output'} + assert normalize_output_item(item) is item + + def test_other_types_return_none(self): + assert normalize_output_item(42) is None + assert normalize_output_item(True) is None + + +class TestNormalizeOutputs: + """Unit tests for normalize_outputs()""" + + def test_empty_outputs(self): + assert normalize_outputs({}) == {} + + def test_dict_items_pass_through(self): + outputs = { + 'node1': { + 'images': [{'filename': 'a.png', 'type': 'output'}], + } + } + result = normalize_outputs(outputs) + assert result == outputs + + def test_3d_string_synthesized(self): + outputs = { + 'node1': { + 'result': ['model.glb', None, None], + } + } + result = normalize_outputs(outputs) + assert result == { + 'node1': { + 'result': [ + {'filename': 'model.glb', 'type': 'output', 'subfolder': '', 'mediaType': '3d'}, + ], + } + } + + def test_animated_key_preserved(self): + outputs = { + 'node1': { + 'images': [{'filename': 'a.png', 'type': 'output'}], + 'animated': [True], + } + } + result = normalize_outputs(outputs) + assert result['node1']['animated'] == [True] + + def test_non_dict_node_outputs_preserved(self): + outputs = {'node1': 'unexpected_value'} + result = normalize_outputs(outputs) + assert result == {'node1': 'unexpected_value'} + + def test_none_items_filtered_but_other_types_preserved(self): + outputs = { + 'node1': { + 'result': ['data.json', None, [1, 2, 3]], + } + } + result = normalize_outputs(outputs) + assert result == { + 'node1': { + 'result': ['data.json', [1, 2, 3]], + } + } From dbe70b6821994ce92d9cf211cc685862d0b6c0ca Mon Sep 17 00:00:00 2001 From: AustinMroz Date: Tue, 10 Feb 2026 14:42:21 -0800 Subject: [PATCH 10/10] Add a VideoSlice node (#12107) * Base TrimVideo implementation * Raise error if as_trimmed call fails * Bigger max start_time, tooltips, and formatting * Count packets unless codec has subframes * Remove incorrect nested decode * Add null check for audio streams * Support non-strict duration * Added strict_duration bool to node definition * Empty commit for approval * Fix duration * Support 5.1 audio layout on save --------- Co-authored-by: Jedrzej Kosinski --- comfy_api/latest/_input/video_types.py | 15 ++ comfy_api/latest/_input_impl/video_types.py | 201 ++++++++++++++------ comfy_extras/nodes_video.py | 51 +++++ 3 files changed, 207 insertions(+), 60 deletions(-) diff --git a/comfy_api/latest/_input/video_types.py b/comfy_api/latest/_input/video_types.py index e634a0311..451e9526e 100644 --- a/comfy_api/latest/_input/video_types.py +++ b/comfy_api/latest/_input/video_types.py @@ -34,6 +34,21 @@ class VideoInput(ABC): """ pass + @abstractmethod + def as_trimmed( + self, + start_time: float | None = None, + duration: float | None = None, + strict_duration: bool = False, + ) -> VideoInput | None: + """ + Create a new VideoInput which is trimmed to have the corresponding start_time and duration + + Returns: + A new VideoInput, or None if the result would have negative duration + """ + pass + def get_stream_source(self) -> Union[str, io.BytesIO]: """ Get a streamable source for the video. This allows processing without diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 1405d0b81..3463ed1c9 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -6,6 +6,7 @@ from typing import Optional from .._input import AudioInput, VideoInput import av import io +import itertools import json import numpy as np import math @@ -29,7 +30,6 @@ def container_to_output_format(container_format: str | None) -> str | None: formats = container_format.split(",") return formats[0] - def get_open_write_kwargs( dest: str | io.BytesIO, container_format: str, to_format: str | None ) -> dict: @@ -57,12 +57,14 @@ class VideoFromFile(VideoInput): Class representing video input from a file. """ - def __init__(self, file: str | io.BytesIO): + def __init__(self, file: str | io.BytesIO, *, start_time: float=0, duration: float=0): """ Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object containing the file contents. """ self.__file = file + self.__start_time = start_time + self.__duration = duration def get_stream_source(self) -> str | io.BytesIO: """ @@ -96,6 +98,16 @@ class VideoFromFile(VideoInput): Returns: Duration in seconds """ + raw_duration = self._get_raw_duration() + if self.__start_time < 0: + duration_from_start = min(raw_duration, -self.__start_time) + else: + duration_from_start = raw_duration - self.__start_time + if self.__duration: + return min(self.__duration, duration_from_start) + return duration_from_start + + def _get_raw_duration(self) -> float: if isinstance(self.__file, io.BytesIO): self.__file.seek(0) with av.open(self.__file, mode="r") as container: @@ -113,9 +125,13 @@ class VideoFromFile(VideoInput): if video_stream and video_stream.average_rate: frame_count = 0 container.seek(0) - for packet in container.demux(video_stream): - for _ in packet.decode(): - frame_count += 1 + frame_iterator = ( + container.decode(video_stream) + if video_stream.codec.capabilities & 0x100 + else container.demux(video_stream) + ) + for packet in frame_iterator: + frame_count += 1 if frame_count > 0: return float(frame_count / video_stream.average_rate) @@ -131,36 +147,54 @@ class VideoFromFile(VideoInput): with av.open(self.__file, mode="r") as container: video_stream = self._get_first_video_stream(container) - # 1. Prefer the frames field if available - if video_stream.frames and video_stream.frames > 0: + # 1. Prefer the frames field if available and usable + if ( + video_stream.frames + and video_stream.frames > 0 + and not self.__start_time + and not self.__duration + ): return int(video_stream.frames) # 2. Try to estimate from duration and average_rate using only metadata - if container.duration is not None and video_stream.average_rate: - duration_seconds = float(container.duration / av.time_base) - estimated_frames = int(round(duration_seconds * float(video_stream.average_rate))) - if estimated_frames > 0: - return estimated_frames - if ( getattr(video_stream, "duration", None) is not None and getattr(video_stream, "time_base", None) is not None and video_stream.average_rate ): - duration_seconds = float(video_stream.duration * video_stream.time_base) + raw_duration = float(video_stream.duration * video_stream.time_base) + if self.__start_time < 0: + duration_from_start = min(raw_duration, -self.__start_time) + else: + duration_from_start = raw_duration - self.__start_time + duration_seconds = min(self.__duration, duration_from_start) estimated_frames = int(round(duration_seconds * float(video_stream.average_rate))) if estimated_frames > 0: return estimated_frames # 3. Last resort: decode frames and count them (streaming) - frame_count = 0 - container.seek(0) - for packet in container.demux(video_stream): - for _ in packet.decode(): - frame_count += 1 - - if frame_count == 0: - raise ValueError(f"Could not determine frame count for file '{self.__file}'") + if self.__start_time < 0: + start_time = max(self._get_raw_duration() + self.__start_time, 0) + else: + start_time = self.__start_time + frame_count = 1 + start_pts = int(start_time / video_stream.time_base) + end_pts = int((start_time + self.__duration) / video_stream.time_base) + container.seek(start_pts, stream=video_stream) + frame_iterator = ( + container.decode(video_stream) + if video_stream.codec.capabilities & 0x100 + else container.demux(video_stream) + ) + for frame in frame_iterator: + if frame.pts >= start_pts: + break + else: + raise ValueError(f"Could not determine frame count for file '{self.__file}'\nNo frames exist for start_time {self.__start_time}") + for frame in frame_iterator: + if frame.pts >= end_pts: + break + frame_count += 1 return frame_count def get_frame_rate(self) -> Fraction: @@ -199,9 +233,21 @@ class VideoFromFile(VideoInput): return container.format.name def get_components_internal(self, container: InputContainer) -> VideoComponents: + video_stream = self._get_first_video_stream(container) + if self.__start_time < 0: + start_time = max(self._get_raw_duration() + self.__start_time, 0) + else: + start_time = self.__start_time # Get video frames frames = [] - for frame in container.decode(video=0): + start_pts = int(start_time / video_stream.time_base) + end_pts = int((start_time + self.__duration) / video_stream.time_base) + container.seek(start_pts, stream=video_stream) + for frame in container.decode(video_stream): + if frame.pts < start_pts: + continue + if self.__duration and frame.pts >= end_pts: + break img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3) img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3) frames.append(img) @@ -209,31 +255,44 @@ class VideoFromFile(VideoInput): images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0) # Get frame rate - video_stream = next(s for s in container.streams if s.type == 'video') - frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1) + frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1) # Get audio if available audio = None - try: - container.seek(0) # Reset the container to the beginning - for stream in container.streams: - if stream.type != 'audio': - continue - assert isinstance(stream, av.AudioStream) - audio_frames = [] - for packet in container.demux(stream): - for frame in packet.decode(): - assert isinstance(frame, av.AudioFrame) - audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) - if len(audio_frames) > 0: - audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) - audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) - audio = AudioInput({ - "waveform": audio_tensor, - "sample_rate": int(stream.sample_rate) if stream.sample_rate else 1, - }) - except StopIteration: - pass # No audio stream + container.seek(start_pts, stream=video_stream) + # Use last stream for consistency + if len(container.streams.audio): + audio_stream = container.streams.audio[-1] + audio_frames = [] + resample = av.audio.resampler.AudioResampler(format='fltp').resample + frames = itertools.chain.from_iterable( + map(resample, container.decode(audio_stream)) + ) + + has_first_frame = False + for frame in frames: + offset_seconds = start_time - frame.pts * audio_stream.time_base + to_skip = int(offset_seconds * audio_stream.sample_rate) + if to_skip < frame.samples: + has_first_frame = True + break + if has_first_frame: + audio_frames.append(frame.to_ndarray()[..., to_skip:]) + + for frame in frames: + if frame.time > start_time + self.__duration: + break + audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) + if len(audio_frames) > 0: + audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) + if self.__duration: + audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] + + audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) + audio = AudioInput({ + "waveform": audio_tensor, + "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, + }) metadata = container.metadata return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata) @@ -250,7 +309,7 @@ class VideoFromFile(VideoInput): path: str | io.BytesIO, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, ): if isinstance(self.__file, io.BytesIO): self.__file.seek(0) # Reset the BytesIO object to the beginning @@ -262,15 +321,14 @@ class VideoFromFile(VideoInput): reuse_streams = False if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: reuse_streams = False + if self.__start_time or self.__duration: + reuse_streams = False if not reuse_streams: components = self.get_components_internal(container) video = VideoFromComponents(components) return video.save_to( - path, - format=format, - codec=codec, - metadata=metadata + path, format=format, codec=codec, metadata=metadata ) streams = container.streams @@ -304,10 +362,21 @@ class VideoFromFile(VideoInput): output_container.mux(packet) def _get_first_video_stream(self, container: InputContainer): - video_stream = next((s for s in container.streams if s.type == "video"), None) - if video_stream is None: - raise ValueError(f"No video stream found in file '{self.__file}'") - return video_stream + if len(container.streams.video): + return container.streams.video[0] + raise ValueError(f"No video stream found in file '{self.__file}'") + + def as_trimmed( + self, start_time: float = 0, duration: float = 0, strict_duration: bool = True + ) -> VideoInput | None: + trimmed = VideoFromFile( + self.get_stream_source(), + start_time=start_time + self.__start_time, + duration=duration, + ) + if trimmed.get_duration() < duration and strict_duration: + return None + return trimmed class VideoFromComponents(VideoInput): @@ -322,7 +391,7 @@ class VideoFromComponents(VideoInput): return VideoComponents( images=self.__components.images, audio=self.__components.audio, - frame_rate=self.__components.frame_rate + frame_rate=self.__components.frame_rate, ) def save_to( @@ -330,7 +399,7 @@ class VideoFromComponents(VideoInput): path: str, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, ): if format != VideoContainer.AUTO and format != VideoContainer.MP4: raise ValueError("Only MP4 format is supported for now") @@ -357,7 +426,10 @@ class VideoFromComponents(VideoInput): audio_stream: Optional[av.AudioStream] = None if self.__components.audio: audio_sample_rate = int(self.__components.audio['sample_rate']) - audio_stream = output.add_stream('aac', rate=audio_sample_rate) + waveform = self.__components.audio['waveform'] + waveform = waveform[0, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] + layout = {1: 'mono', 2: 'stereo', 6: '5.1'}.get(waveform.shape[0], 'stereo') + audio_stream = output.add_stream('aac', rate=audio_sample_rate, layout=layout) # Encode video for i, frame in enumerate(self.__components.images): @@ -372,12 +444,21 @@ class VideoFromComponents(VideoInput): output.mux(packet) if audio_stream and self.__components.audio: - waveform = self.__components.audio['waveform'] - waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] - frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().cpu().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo') + frame = av.AudioFrame.from_ndarray(waveform.float().cpu().numpy(), format='fltp', layout=layout) frame.sample_rate = audio_sample_rate frame.pts = 0 output.mux(audio_stream.encode(frame)) # Flush encoder output.mux(audio_stream.encode(None)) + + def as_trimmed( + self, + start_time: float | None = None, + duration: float | None = None, + strict_duration: bool = True, + ) -> VideoInput | None: + if self.get_duration() < start_time + duration: + return None + #TODO Consider tracking duration and trimming at time of save? + return VideoFromFile(self.get_stream_source(), start_time=start_time, duration=duration) diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index ccf7b63d3..cd765a7c1 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -202,6 +202,56 @@ class LoadVideo(io.ComfyNode): return True +class VideoSlice(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="Video Slice", + display_name="Video Slice", + search_aliases=[ + "trim video duration", + "skip first frames", + "frame load cap", + "start time", + ], + category="image/video", + inputs=[ + io.Video.Input("video"), + io.Float.Input( + "start_time", + default=0.0, + max=1e5, + min=-1e5, + step=0.001, + tooltip="Start time in seconds", + ), + io.Float.Input( + "duration", + default=0.0, + min=0.0, + step=0.001, + tooltip="Duration in seconds, or 0 for unlimited duration", + ), + io.Boolean.Input( + "strict_duration", + default=False, + tooltip="If True, when the specified duration is not possible, an error will be raised.", + ), + ], + outputs=[ + io.Video.Output(), + ], + ) + + @classmethod + def execute(cls, video: io.Video.Type, start_time: float, duration: float, strict_duration: bool) -> io.NodeOutput: + trimmed = video.as_trimmed(start_time, duration, strict_duration=strict_duration) + if trimmed is not None: + return io.NodeOutput(trimmed) + raise ValueError( + f"Failed to slice video:\nSource duration: {video.get_duration()}\nStart time: {start_time}\nTarget duration: {duration}" + ) + class VideoExtension(ComfyExtension): @override @@ -212,6 +262,7 @@ class VideoExtension(ComfyExtension): CreateVideo, GetVideoComponents, LoadVideo, + VideoSlice, ] async def comfy_entrypoint() -> VideoExtension: