From 4d360f9c9d33eb5b2a3e5d8fff8c2eca290a9960 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 4 Jun 2026 11:23:52 +0300 Subject: [PATCH 01/12] [Partner Nodes] fix (Seedance 2.0): prevent 1080p first/last-frame stretch jump (#14251) Signed-off-by: bigcat88 --- comfy_api_nodes/nodes_bytedance.py | 67 +++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index d8885a7e5..c30ddc446 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -7,6 +7,7 @@ from io import BytesIO import torch from typing_extensions import override +from comfy.utils import common_upscale from comfy_api.latest import IO, ComfyExtension, Input, Types from comfy_api_nodes.apis.bytedance import ( RECOMMENDED_PRESETS, @@ -131,6 +132,44 @@ def _prepare_seedance_image(image: Input.Image) -> Input.Image: return image +# Supported output aspect ratios, used to pre-size FLF frames to matching pixel pair to avoid the 1080p stretch jump. +SEEDANCE2_RATIO_WH = { + "16:9": (16, 9), + "4:3": (4, 3), + "1:1": (1, 1), + "3:4": (3, 4), + "9:16": (9, 16), + "21:9": (21, 9), +} +SEEDANCE2_RES_SHORT_SIDE = {"480p": 480, "720p": 720, "1080p": 1080} + + +def _seedance2_target_dims(resolution: str, ratio: str, image: torch.Tensor) -> tuple[int, int]: + """Exact supported output (width, height) for (resolution, ratio). + + The shorter side equals the resolution number (e.g. 1080p 16:9 -> 1920x1080). For ratio + "adaptive" (or any unexpected value) the ratio is derived from the image's own aspect, snapped + to the nearest supported ratio, so the output keeps the frame's orientation. + """ + short = SEEDANCE2_RES_SHORT_SIDE[resolution] + if ratio not in SEEDANCE2_RATIO_WH: + aspect = image.shape[-2] / image.shape[-3] # W / H; tensor is (B, H, W, C) + ratio = min(SEEDANCE2_RATIO_WH, key=lambda k: abs(SEEDANCE2_RATIO_WH[k][0] / SEEDANCE2_RATIO_WH[k][1] - aspect)) + rw, rh = SEEDANCE2_RATIO_WH[ratio] + if rw >= rh: # landscape or square: shorter side is the height + out_w, out_h = round(short * rw / rh), short + else: # portrait: shorter side is the width + out_w, out_h = short, round(short * rh / rw) + return out_w - out_w % 2, out_h - out_h % 2 + + +def _resize_to_exact(image: torch.Tensor, width: int, height: int) -> torch.Tensor: + """Center-crop to the target aspect and resize to exactly width x height (lanczos).""" + samples = image.movedim(-1, 1) # (B, H, W, C) -> (B, C, H, W) + resized = common_upscale(samples, width, height, "lanczos", "center") + return resized.movedim(1, -1) + + async def _resolve_reference_assets( cls: type[IO.ComfyNode], asset_ids: list[str], @@ -1790,10 +1829,28 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode): if last_frame is not None and last_frame_asset_id: raise ValueError("Provide only one of last_frame or last_frame_asset_id, not both.") - if first_frame is not None: - first_frame = _prepare_seedance_image(first_frame) - if last_frame is not None: - last_frame = _prepare_seedance_image(last_frame) + request_ratio = model["ratio"] + if first_frame_asset_id or last_frame_asset_id: + if first_frame is not None: + first_frame = _prepare_seedance_image(first_frame) + if last_frame is not None: + last_frame = _prepare_seedance_image(last_frame) + else: + # The 1080p FLF stretch fix (pre-size frames to a supported pixel pair + submit ratio="adaptive") + # only applies to local image inputs we can resize. + request_ratio = "adaptive" + target_dims: tuple[int, int] | None = None + if first_frame is not None: + validate_image_aspect_ratio(first_frame, (2, 5), (5, 2), strict=False) # 0.4 to 2.5 + validate_image_dimensions(first_frame, min_width=300, min_height=300) + target_dims = _seedance2_target_dims(model["resolution"], model["ratio"], first_frame) + first_frame = _resize_to_exact(first_frame, *target_dims) + if last_frame is not None: + validate_image_aspect_ratio(last_frame, (2, 5), (5, 2), strict=False) # 0.4 to 2.5 + validate_image_dimensions(last_frame, min_width=300, min_height=300) + if target_dims is None: + target_dims = _seedance2_target_dims(model["resolution"], model["ratio"], last_frame) + last_frame = _resize_to_exact(last_frame, *target_dims) asset_ids_to_resolve = [a for a in (first_frame_asset_id, last_frame_asset_id) if a] image_assets: dict[str, str] = {} @@ -1844,7 +1901,7 @@ class ByteDance2FirstLastFrameNode(IO.ComfyNode): content=content, generate_audio=model["generate_audio"], resolution=model["resolution"], - ratio=model["ratio"], + ratio=request_ratio, duration=model["duration"], seed=seed, watermark=watermark, From 0a92dd9c096a490f861d6a173ea5f4302a73c613 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:47:20 +0300 Subject: [PATCH 02/12] [Partner Nodes] feat: add Bria Green Background node (#14277) --- comfy_api_nodes/apis/bria.py | 25 ++++++ comfy_api_nodes/nodes_bria.py | 156 ++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) diff --git a/comfy_api_nodes/apis/bria.py b/comfy_api_nodes/apis/bria.py index e08a519a8..7a98428c3 100644 --- a/comfy_api_nodes/apis/bria.py +++ b/comfy_api_nodes/apis/bria.py @@ -97,3 +97,28 @@ class BriaRemoveVideoBackgroundResult(BaseModel): class BriaRemoveVideoBackgroundResponse(BaseModel): status: str = Field(...) result: BriaRemoveVideoBackgroundResult | None = Field(None) + + +class BriaVideoGreenScreenRequest(BaseModel): + video: str = Field(..., description="Publicly accessible URL of the input video.") + green_shade: str = Field( + default="broadcast_green", + description="Solid chroma-key shade applied behind the foreground " + "(broadcast_green, chroma_green, or blue_screen).", + ) + output_container_and_codec: str = Field(...) + preserve_audio: bool = Field(True) + seed: int = Field(...) + + +class BriaVideoReplaceBackgroundRequest(BaseModel): + video: str = Field(..., description="Publicly accessible URL of the input (foreground) video.") + background_url: str = Field( + ..., + description="Publicly accessible URL of the background image or video to composite behind " + "the foreground. Stretched to the foreground frame; match its aspect ratio for " + "undistorted results.", + ) + output_container_and_codec: str = Field(...) + preserve_audio: bool = Field(True) + seed: int = Field(...) diff --git a/comfy_api_nodes/nodes_bria.py b/comfy_api_nodes/nodes_bria.py index ce2c9e9be..e138fafa9 100644 --- a/comfy_api_nodes/nodes_bria.py +++ b/comfy_api_nodes/nodes_bria.py @@ -12,6 +12,8 @@ from comfy_api_nodes.apis.bria import ( BriaRemoveVideoBackgroundRequest, BriaRemoveVideoBackgroundResponse, BriaStatusResponse, + BriaVideoGreenScreenRequest, + BriaVideoReplaceBackgroundRequest, InputModerationSettings, ) from comfy_api_nodes.util import ( @@ -319,6 +321,158 @@ class BriaRemoveVideoBackground(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(response.result.video_url)) +class BriaVideoGreenScreen(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="BriaVideoGreenScreen", + display_name="Bria Video Green Screen", + category="partner/video/Bria", + description="Replace a video's background with a solid chroma-key screen using Bria.", + inputs=[ + IO.Video.Input("video"), + IO.Combo.Input( + "green_shade", + options=["broadcast_green", "chroma_green", "blue_screen"], + tooltip="Solid chroma-key shade applied behind the foreground: " + "broadcast_green (#00B140), chroma_green (#00FF00), or blue_screen (#0000FF).", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Video.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + green_shade: str, + seed: int, + ) -> IO.NodeOutput: + validate_video_duration(video, max_duration=60.0) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/bria/v2/video/edit/green_screen", method="POST"), + data=BriaVideoGreenScreenRequest( + video=await upload_video_to_comfyapi(cls, video), + green_shade=green_shade, + output_container_and_codec="mp4_h264", + seed=seed, + ), + response_model=BriaStatusResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"), + status_extractor=lambda r: r.status, + response_model=BriaRemoveVideoBackgroundResponse, + ) + return IO.NodeOutput(await download_url_to_video_output(response.result.video_url)) + + +class BriaVideoReplaceBackground(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="BriaVideoReplaceBackground", + display_name="Bria Video Replace Background", + category="partner/video/Bria", + description="Replace a video's background with a supplied image or video using Bria. " + "The output keeps the foreground's resolution and frame rate; a background with a " + "different aspect ratio is stretched to fit, so match it for undistorted results.", + inputs=[ + IO.Video.Input("video", tooltip="Foreground video whose background is replaced."), + IO.Image.Input( + "background_image", + optional=True, + tooltip="Background image to composite behind the foreground. " + "Provide either a background image or a background video, not both.", + ), + IO.Video.Input( + "background_video", + optional=True, + tooltip="Background video to composite behind the foreground. " + "Provide either a background image or a background video, not both.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Video.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + seed: int, + background_image: Input.Image | None = None, + background_video: Input.Video | None = None, + ) -> IO.NodeOutput: + if (background_image is None) == (background_video is None): + raise ValueError("Provide either a background image or a background video, not both.") + validate_video_duration(video, max_duration=60.0) + if background_video is not None: + validate_video_duration(background_video, max_duration=60.0) + background_url = await upload_video_to_comfyapi(cls, background_video, wait_label="Uploading background") + else: + background_url = await upload_image_to_comfyapi(cls, background_image, wait_label="Uploading background") + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/bria/v2/video/edit/replace_background", method="POST"), + data=BriaVideoReplaceBackgroundRequest( + video=await upload_video_to_comfyapi(cls, video), + background_url=background_url, + output_container_and_codec="mp4_h264", + seed=seed, + ), + response_model=BriaStatusResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"), + status_extractor=lambda r: r.status, + response_model=BriaRemoveVideoBackgroundResponse, + ) + return IO.NodeOutput(await download_url_to_video_output(response.result.video_url)) + + def _video_to_images_and_mask(video: Input.Video) -> tuple[Input.Image, Input.Mask]: """Decode a transparent webm (VP9 + alpha) into image frames and an alpha mask. @@ -416,6 +570,8 @@ class BriaExtension(ComfyExtension): BriaImageEditNode, BriaRemoveImageBackground, BriaRemoveVideoBackground, + BriaVideoGreenScreen, + # BriaVideoReplaceBackground, # server returns Status 500 when we pass background video BriaTransparentVideoBackground, ] From 1f9e7df52ac07ee6956500d02ba85ab09082e9f9 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 4 Jun 2026 18:24:22 +0300 Subject: [PATCH 03/12] [Partner Nodes] feat: add Krea 2 Medium Turbo model (#14280) --- comfy_api_nodes/nodes_krea.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/comfy_api_nodes/nodes_krea.py b/comfy_api_nodes/nodes_krea.py index 34369f05f..b9e6268f2 100644 --- a/comfy_api_nodes/nodes_krea.py +++ b/comfy_api_nodes/nodes_krea.py @@ -42,9 +42,11 @@ async def _upload_image_to_krea_assets(cls: type[IO.ComfyNode], image: Input.Ima _MODEL_MEDIUM = "Krea 2 Medium" +_MODEL_MEDIUM_TURBO = "Krea 2 Medium Turbo" _MODEL_LARGE = "Krea 2 Large" _MODEL_ENDPOINTS: dict[str, str] = { _MODEL_MEDIUM: "/proxy/krea/generate/image/krea/krea-2/medium", + _MODEL_MEDIUM_TURBO: "/proxy/krea/generate/image/krea/krea-2/medium-turbo", _MODEL_LARGE: "/proxy/krea/generate/image/krea/krea-2/large", } @@ -57,7 +59,7 @@ _UUID_RE = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F def _krea_model_inputs() -> list: - """Nested inputs shared by both Krea 2 Medium and Large under the DynamicCombo.""" + """Nested inputs shared by Krea 2 Medium, Medium Turbo and Large under the DynamicCombo.""" return [ IO.Combo.Input( "aspect_ratio", @@ -123,6 +125,7 @@ class Krea2ImageNode(IO.ComfyNode): "model", options=[ IO.DynamicCombo.Option(_MODEL_MEDIUM, _krea_model_inputs()), + IO.DynamicCombo.Option(_MODEL_MEDIUM_TURBO, _krea_model_inputs()), IO.DynamicCombo.Option(_MODEL_LARGE, _krea_model_inputs()), ], tooltip="Krea 2 Medium is best for expressive illustrations; " @@ -151,14 +154,15 @@ class Krea2ImageNode(IO.ComfyNode): ), expr=""" ( - $isLarge := widgets.model = "krea 2 large"; + $rates := { + "krea 2 medium turbo": {"text": 0.015, "style": 0.0175, "moodboard": 0.02}, + "krea 2 medium": {"text": 0.03, "style": 0.035, "moodboard": 0.04}, + "krea 2 large": {"text": 0.06, "style": 0.065, "moodboard": 0.07} + }; + $r := $lookup($rates, widgets.model); $hasMoodboard := $length($lookup(widgets, "model.moodboard_id")) > 0; $hasStyle := $lookup(inputs, "model.style_reference").connected; - $usd := $hasMoodboard - ? ($isLarge ? 0.07 : 0.04) - : ($hasStyle - ? ($isLarge ? 0.065 : 0.035) - : ($isLarge ? 0.06 : 0.03)); + $usd := $hasMoodboard ? $r.moodboard : ($hasStyle ? $r.style : $r.text); {"type":"usd","usd": $usd} ) """, From 27b5c423a6ff44a0b90f879b05d58b9a17d86528 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Thu, 4 Jun 2026 19:32:15 +0300 Subject: [PATCH 04/12] [Partner Nodes] feat: add seed input to Flux Erase node (#14283) Signed-off-by: bigcat88 --- comfy_api_nodes/apis/bfl.py | 1 + comfy_api_nodes/nodes_bfl.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/comfy_api_nodes/apis/bfl.py b/comfy_api_nodes/apis/bfl.py index 2ad651122..4c950da84 100644 --- a/comfy_api_nodes/apis/bfl.py +++ b/comfy_api_nodes/apis/bfl.py @@ -43,6 +43,7 @@ class BFLFluxEraseRequest(BaseModel): "white (255) marks areas to remove, black (0) marks areas to preserve.", ) dilate_pixels: int = Field(10) + seed: int | None = Field(None) output_format: str = Field("png") diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py index 79961ff9d..259c54ef9 100644 --- a/comfy_api_nodes/nodes_bfl.py +++ b/comfy_api_nodes/nodes_bfl.py @@ -534,6 +534,15 @@ class FluxEraseNode(IO.ComfyNode): max=25, tooltip="Expands the mask boundaries to ensure clean coverage of the object's edges.", ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="The random seed used for creating the noise.", + optional=True, + ), ], outputs=[IO.Image.Output()], hidden=[ @@ -553,6 +562,7 @@ class FluxEraseNode(IO.ComfyNode): image: Input.Image, mask: Input.Image, dilate_pixels: int = 10, + seed: int = 0, ) -> IO.NodeOutput: validate_image_dimensions(image, min_width=256, min_height=256) mask = resize_mask_to_image(mask, image) @@ -565,6 +575,7 @@ class FluxEraseNode(IO.ComfyNode): image=tensor_to_base64_string(image[:, :, :, :3]), # make sure image will have alpha channel removed mask=mask, dilate_pixels=dilate_pixels, + seed=seed, ), ) From 6ecca5f468ac5a24c450d601867de4d366f701fc Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Fri, 5 Jun 2026 00:40:44 +0800 Subject: [PATCH 05/12] chore: update workflow templates to v0.9.98 (#14284) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 79d38fc06..6f88daafa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ comfyui-frontend-package==1.44.19 -comfyui-workflow-templates==0.9.94 +comfyui-workflow-templates==0.9.98 comfyui-embedded-docs==0.5.2 torch torchsde From 4e1f7cb1db1c26bb9ee61cf1875776517e2abae8 Mon Sep 17 00:00:00 2001 From: Comfy Org PR Bot Date: Fri, 5 Jun 2026 03:41:33 +0900 Subject: [PATCH 06/12] Bump comfyui-frontend-package to 1.45.15 (#14265) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6f88daafa..8b64c60a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.44.19 +comfyui-frontend-package==1.45.15 comfyui-workflow-templates==0.9.98 comfyui-embedded-docs==0.5.2 torch From 514bb8ba21626da3126847321513f9863c88ce2c Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Thu, 4 Jun 2026 19:20:22 -0700 Subject: [PATCH 07/12] Fix ideogram if model dtype gets set to fp8. (#14291) --- comfy/ldm/ideogram4/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/comfy/ldm/ideogram4/model.py b/comfy/ldm/ideogram4/model.py index 3b02a243a..b86c65bf0 100644 --- a/comfy/ldm/ideogram4/model.py +++ b/comfy/ldm/ideogram4/model.py @@ -174,7 +174,7 @@ class Ideogram4Transformer(nn.Module): llm = self.llm_cond_proj(llm) * text_mask h[:, :L_text] = h[:, :L_text] + llm - h = h + self.embed_image_indicator((indicator == OUTPUT_IMAGE_INDICATOR).to(torch.long)) + h = h + self.embed_image_indicator((indicator == OUTPUT_IMAGE_INDICATOR).to(torch.long), out_dtype=h.dtype) # Qwen3-VL interleaved MRoPE; position_ids (B, L, 3) -> (3, L) (same across batch). freqs_cis = precompute_freqs_cis( @@ -235,7 +235,7 @@ class Ideogram4Transformer2DModel(Ideogram4Transformer): def _run_conditional(self, x_chunk, context_chunk, attn_mask_chunk, t_chunk, gh, gw, transformer_options): B = x_chunk.shape[0] device = x_chunk.device - img_tokens = self._img_to_tokens(x_chunk).to(self.dtype) + img_tokens = self._img_to_tokens(x_chunk) L_img = img_tokens.shape[1] L_text = context_chunk.shape[1] L = L_text + L_img @@ -268,7 +268,7 @@ class Ideogram4Transformer2DModel(Ideogram4Transformer): def _run_image_only(self, x_chunk, t_chunk, gh, gw, transformer_options): B = x_chunk.shape[0] device = x_chunk.device - img_tokens = self._img_to_tokens(x_chunk).to(self.dtype) + img_tokens = self._img_to_tokens(x_chunk) L_img = img_tokens.shape[1] position_ids = self._image_position_ids(gh, gw, device).unsqueeze(0).expand(B, L_img, 3) From ab0d8a9203fbad76b0ccca723bbf9ba0c257ddfe Mon Sep 17 00:00:00 2001 From: Alexis Rolland Date: Thu, 4 Jun 2026 19:29:41 -0700 Subject: [PATCH 08/12] Consolidate audio nodes into SaveAudioAdvanced node (CORE-202) (#13871) --- comfy_api/latest/_ui.py | 2 +- comfy_extras/nodes_audio.py | 58 +++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/comfy_api/latest/_ui.py b/comfy_api/latest/_ui.py index 6592f6b1d..b48713d41 100644 --- a/comfy_api/latest/_ui.py +++ b/comfy_api/latest/_ui.py @@ -285,7 +285,7 @@ class AudioSaveHelper: results = [] for batch_number, waveform in enumerate(audio["waveform"].cpu()): filename_with_batch_num = filename.replace("%batch_num%", str(batch_number)) - file = f"{filename_with_batch_num}_{counter:05}_.{format}" + file = f"{filename_with_batch_num}_{counter:05}.{format}" output_path = os.path.join(full_output_folder, file) # Use original sample rate initially diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index ff078f74c..1dc97ecd7 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -158,7 +158,7 @@ class SaveAudio(IO.ComfyNode): return IO.Schema( node_id="SaveAudio", search_aliases=["export flac"], - display_name="Save Audio (FLAC)", + display_name="Save Audio (FLAC) (Deprecated)", category="audio", essentials_category="Audio", inputs=[ @@ -167,6 +167,7 @@ class SaveAudio(IO.ComfyNode): ], hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], is_output_node=True, + is_deprecated=True, ) @classmethod @@ -186,7 +187,7 @@ class SaveAudioMP3(IO.ComfyNode): return IO.Schema( node_id="SaveAudioMP3", search_aliases=["export mp3"], - display_name="Save Audio (MP3)", + display_name="Save Audio (MP3) (Deprecated)", category="audio", essentials_category="Audio", inputs=[ @@ -196,6 +197,7 @@ class SaveAudioMP3(IO.ComfyNode): ], hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], is_output_node=True, + is_deprecated=True, ) @classmethod @@ -217,7 +219,7 @@ class SaveAudioOpus(IO.ComfyNode): return IO.Schema( node_id="SaveAudioOpus", search_aliases=["export opus"], - display_name="Save Audio (Opus)", + display_name="Save Audio (Opus) (Deprecated)", category="audio", inputs=[ IO.Audio.Input("audio"), @@ -226,6 +228,7 @@ class SaveAudioOpus(IO.ComfyNode): ], hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], is_output_node=True, + is_deprecated=True, ) @classmethod @@ -241,6 +244,54 @@ class SaveAudioOpus(IO.ComfyNode): save_opus = execute # TODO: remove +class SaveAudioAdvanced(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="SaveAudioAdvanced", + search_aliases=["save audio", "export audio", "output audio", "write audio", "flac", "mp3", "opus"], + display_name="Save Audio (Advanced)", + description="Saves the input audio to your ComfyUI output directory.", + category="audio", + inputs=[ + IO.Audio.Input("audio", tooltip="The audio to save."), + IO.String.Input( + "filename_prefix", + default="audio/ComfyUI", + tooltip=( + "The prefix for the file to save. May include formatting tokens " + "such as %date:yyyy-MM-dd%." + ), + ), + IO.DynamicCombo.Input( + "format", + options=[ + IO.DynamicCombo.Option("flac", []), + IO.DynamicCombo.Option("mp3", [ + IO.Combo.Input("quality", options=["V0", "128k", "320k"], default="V0"), + ]), + IO.DynamicCombo.Option("opus", [ + IO.Combo.Input("quality", options=["64k", "96k", "128k", "192k", "320k"], default="128k"), + ]), + ], + tooltip="The file format in which to save the audio.", + ), + ], + hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], + is_output_node=True, + ) + + @classmethod + def execute(cls, audio, filename_prefix: str, format: dict) -> IO.NodeOutput: + file_format = format.get("format", None) + quality = format.get("quality", None) + if quality: + ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=file_format, quality=quality) + else: + ui=UI.AudioSaveHelper.get_save_audio_ui(audio, filename_prefix=filename_prefix, cls=cls, format=file_format) + return IO.NodeOutput(ui=ui) + + class PreviewAudio(IO.ComfyNode): @classmethod def define_schema(cls): @@ -822,6 +873,7 @@ class AudioExtension(ComfyExtension): SaveAudio, SaveAudioMP3, SaveAudioOpus, + SaveAudioAdvanced, LoadAudio, PreviewAudio, ConditioningStableAudio, From 5aa71b9bc28809a16596bb9fa3d0a6300d8e3f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Fri, 5 Jun 2026 10:04:10 +0300 Subject: [PATCH 09/12] Enable cfg1 optimization for DualModelGuider with CFGGuider (#14290) * Enable cfg1 optimization for DualModelGuider * Fix CFG Override tooltip --- comfy_extras/nodes_custom_sampler.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 2f4ff1f70..3e97084a4 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -933,9 +933,10 @@ class Guider_DualModel(comfy.samplers.CFGGuider): def predict_noise(self, x, timestep, model_options={}, seed=None): positive = self.conds.get("positive", None) - if self.uncond_inner is None: # cfg == 1 or no negative -> single model, cond only - return comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0] cond = comfy.samplers.calc_cond_batch(self.inner_model, [positive], x, timestep, model_options)[0] + # uncond model not loaded (base cfg==1/no negative), or cfg driven to 1.0 this step -> single model, cond only + if self.uncond_inner is None or (math.isclose(self.cfg, 1.0) and not model_options.get("disable_cfg1_optimization", False)): + return cond uncond_model_options = model_options if "multigpu_clones" in model_options: # TODO: support multigpu instead of just running uncond on a single GPU @@ -1140,7 +1141,7 @@ class CFGOverride(io.ComfyNode): return io.Schema( node_id="CFGOverride", display_name="CFG Override", - description="Override cfg to a fixed value over a [start, end] percent slice of the steps. " + description="Override cfg to a fixed value over a [start, end] percent (sigma) range. " "With multiple overrides, the one nearest the sampler wins on overlap.", category="sampling/custom_sampling", inputs=[ From 410df2725336dc0e34214f6a127c761a1879cca8 Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Sat, 6 Jun 2026 01:39:35 +1000 Subject: [PATCH 10/12] Fix interoperation with external source of pinned memory pressure (#14252) * mm: split off registration helper to doer and headroom calc * pinned_memory: implement registration comfy side Move away from Aimdo buffer registrations which seem fraught with danger and do it comfy side. Just start with the basic move. * pinned_memory: do registrations as portable memory * pinned_memory: discard async errors on registration fail Like the good ol days. * pinned_memory: implement abs shortfall retry If pinned registration happens to fail despite the previous budget ensures, consider the allocation shortfall, ensure it again, and try again. This allows comfy pins to interoperate with other software that might be doing substantive pinning. --- comfy/model_management.py | 6 ++++-- comfy/pinned_memory.py | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index dfd58bf1b..8e786c0a5 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -651,8 +651,7 @@ def ensure_pin_budget(size, evict_active=False): to_free = shortfall + PIN_PRESSURE_HYSTERESIS return free_pins(to_free, evict_active=evict_active) >= shortfall -def ensure_pin_registerable(size, evict_active=True): - shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY +def free_registrations(shortfall, evict_active=True): if MAX_PINNED_MEMORY <= 0: return False if shortfall <= 0: @@ -674,6 +673,9 @@ def ensure_pin_registerable(size, evict_active=True): return True return shortfall <= REGISTERABLE_PIN_HYSTERESIS +def ensure_pin_registerable(size, evict_active=True): + return free_registrations(TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY, evict_active=evict_active) + class LoadedModel: def __init__(self, model: ModelPatcher): self._set_model(model) diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py index ffe12e0dc..cb77c517a 100644 --- a/comfy/pinned_memory.py +++ b/comfy/pinned_memory.py @@ -89,13 +89,26 @@ def pin_memory(module, subset="weights", size=None): not comfy.model_management.ensure_pin_registerable(registerable_size)): return _steal_pin(module, stack, buckets, size, priority) + extended = False try: - hostbuf.extend(size=size) + hostbuf.extend(size=size, register=False) + extended = True + pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] + pin.untyped_storage()._comfy_hostbuf = hostbuf + if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0: + comfy.model_management.discard_cuda_async_error() + comfy.model_management.free_registrations(size) + if torch.cuda.cudart().cudaHostRegister(pin.data_ptr(), size, 1) != 0: + comfy.model_management.discard_cuda_async_error() + del pin + hostbuf.truncate(offset, do_unregister=False) + return _steal_pin(module, stack, buckets, size, priority) except RuntimeError: + if extended: + hostbuf.truncate(offset, do_unregister=False) return _steal_pin(module, stack, buckets, size, priority) - module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] - module._pin.untyped_storage()._comfy_hostbuf = hostbuf + module._pin = pin stack.append((module, offset)) module._pin_registered = True module._pin_stack_index = len(stack) - 1 From ec6aa979a627ecbfb7847dad85a4a26a0a3a424a Mon Sep 17 00:00:00 2001 From: rattus <46076784+rattus128@users.noreply.github.com> Date: Sat, 6 Jun 2026 01:40:03 +1000 Subject: [PATCH 11/12] aimdo 049 (#14300) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8b64c60a9..613553d8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,7 @@ SQLAlchemy>=2.0.0 filelock av>=16.0.0 comfy-kitchen==0.2.10 -comfy-aimdo==0.4.8 +comfy-aimdo==0.4.9 requests simpleeval>=1.0.0 blake3 From 4a00126e9cd3ea59e33ff0c75e628cb7585ab164 Mon Sep 17 00:00:00 2001 From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:31:55 +0300 Subject: [PATCH 12/12] [Partner Nodes] feat: add new Gemini text node (#14299) --- comfy_api_nodes/apis/gemini.py | 13 +- comfy_api_nodes/nodes_gemini.py | 402 +++++++++++++++++++++++++++----- 2 files changed, 356 insertions(+), 59 deletions(-) diff --git a/comfy_api_nodes/apis/gemini.py b/comfy_api_nodes/apis/gemini.py index 22879fe18..caaba8f36 100644 --- a/comfy_api_nodes/apis/gemini.py +++ b/comfy_api_nodes/apis/gemini.py @@ -108,13 +108,19 @@ class GeminiVideoMetadata(BaseModel): startOffset: GeminiOffset | None = Field(None) +class GeminiThinkingConfig(BaseModel): + includeThoughts: bool | None = Field(None) + thinkingLevel: str = Field(...) + + class GeminiGenerationConfig(BaseModel): - maxOutputTokens: int | None = Field(None, ge=16, le=8192) + maxOutputTokens: int | None = Field(None, ge=16, le=65536) seed: int | None = Field(None) stopSequences: list[str] | None = Field(None) temperature: float | None = Field(None, ge=0.0, le=2.0) topK: int | None = Field(None, ge=1) topP: float | None = Field(None, ge=0.0, le=1.0) + thinkingConfig: GeminiThinkingConfig | None = Field(None) class GeminiImageOutputOptions(BaseModel): @@ -128,11 +134,6 @@ class GeminiImageConfig(BaseModel): imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions) -class GeminiThinkingConfig(BaseModel): - includeThoughts: bool | None = Field(None) - thinkingLevel: str = Field(...) - - class GeminiImageGenerationConfig(GeminiGenerationConfig): responseModalities: list[str] | None = Field(None) imageConfig: GeminiImageConfig | None = Field(None) diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py index e75ef3835..2699d2792 100644 --- a/comfy_api_nodes/nodes_gemini.py +++ b/comfy_api_nodes/nodes_gemini.py @@ -8,7 +8,7 @@ import os from enum import Enum from fnmatch import fnmatch from io import BytesIO -from typing import Literal +from typing import Any, Literal import torch from typing_extensions import override @@ -19,6 +19,7 @@ from comfy_api_nodes.apis.gemini import ( GeminiContent, GeminiFileData, GeminiGenerateContentRequest, + GeminiGenerationConfig, GeminiGenerateContentResponse, GeminiImageConfig, GeminiImageGenerateContentRequest, @@ -40,13 +41,18 @@ from comfy_api_nodes.util import ( get_number_of_images, sync_op, tensor_to_base64_string, + upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_images_to_comfyapi, + upload_video_to_comfyapi, validate_string, video_to_base64_string, ) GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini" GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024 # 20 MB +GEMINI_URL_INPUT_BUDGET = 10 +GEMINI_MAX_INLINE_BYTES = 18 * 1024 * 1024 GEMINI_IMAGE_SYS_PROMPT = ( "You are an expert image-generation engine. You must ALWAYS produce an image.\n" "Interpret all user input—regardless of " @@ -285,6 +291,140 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N return final_price / 1_000_000.0 +def create_video_parts(video_input: Input.Video) -> list[GeminiPart]: + """Convert a single video input to Gemini API compatible parts (inline MP4/H.264).""" + base_64_string = video_to_base64_string( + video_input, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264 + ) + return [ + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.video_mp4, + data=base_64_string, + ) + ) + ] + + +def create_audio_parts(audio_input: Input.Audio) -> list[GeminiPart]: + """Convert an audio input to Gemini API compatible parts (one inline MP3 part per batch item).""" + audio_parts: list[GeminiPart] = [] + for batch_index in range(audio_input["waveform"].shape[0]): + # Recreate an IO.AUDIO object for the given batch dimension index + audio_at_index = Input.Audio( + waveform=audio_input["waveform"][batch_index].unsqueeze(0), + sample_rate=audio_input["sample_rate"], + ) + # Convert to MP3 format for compatibility with Gemini API + audio_bytes = audio_to_base64_string( + audio_at_index, + container_format="mp3", + codec_name="libmp3lame", + ) + audio_parts.append( + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.audio_mp3, + data=audio_bytes, + ) + ) + ) + return audio_parts + + +def _flatten_images(images: list[Input.Image]) -> list[torch.Tensor]: + """Expand any batched image tensors into individual (H, W, C) frames, preserving order.""" + frames: list[torch.Tensor] = [] + for img in images: + if len(img.shape) == 4: + frames.extend(img[i] for i in range(img.shape[0])) + else: + frames.append(img) + return frames + + +def _flatten_audio(audios: list[Input.Audio]) -> list[Input.Audio]: + """Expand any batched audio inputs into individual single-clip audio inputs, preserving order.""" + clips: list[Input.Audio] = [] + for audio in audios: + waveform = audio["waveform"] + for i in range(waveform.shape[0]): + clips.append(Input.Audio(waveform=waveform[i].unsqueeze(0), sample_rate=audio["sample_rate"])) + return clips + + +async def _media_url_part(cls: type[IO.ComfyNode], kind: str, payload: Any) -> GeminiPart: + """Upload a single media unit to ComfyAPI storage and return a fileData (URL) part.""" + if kind == "image": + url = await upload_image_to_comfyapi(cls, payload, mime_type="image/png", wait_label="Uploading image") + return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.image_png, fileUri=url)) + if kind == "audio": + url = await upload_audio_to_comfyapi( + cls, payload, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mp3" + ) + return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.audio_mp3, fileUri=url)) + url = await upload_video_to_comfyapi(cls, payload, wait_label="Uploading video") + return GeminiPart(fileData=GeminiFileData(mimeType=GeminiMimeType.video_mp4, fileUri=url)) + + +def _media_inline_part(kind: str, payload: Any) -> tuple[GeminiPart, int]: + """Encode a single media unit as an inline base64 part; returns (part, base64_length).""" + if kind == "image": + data = tensor_to_base64_string(payload, mime_type="image/webp") + mime = GeminiMimeType.image_webp + elif kind == "audio": + data = audio_to_base64_string(payload, container_format="mp3", codec_name="libmp3lame") + mime = GeminiMimeType.audio_mp3 + else: + data = video_to_base64_string( + payload, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264 + ) + mime = GeminiMimeType.video_mp4 + return GeminiPart(inlineData=GeminiInlineData(mimeType=mime, data=data)), len(data) + + +async def build_gemini_media_parts( + cls: type[IO.ComfyNode], + images: list[Input.Image], + audios: list[Input.Audio], + videos: list[Input.Video], + *, + url_budget: int = GEMINI_URL_INPUT_BUDGET, + max_inline_bytes: int = GEMINI_MAX_INLINE_BYTES, +) -> list[GeminiPart]: + """Build Gemini parts for multimodal inputs (images, audio, video). + + fileData URLs are preferred for every media type: the upload is fetched directly by the + model, keeping the request body tiny regardless of media size. The URL budget is shared + across all media and assigned largest-first (video, then audio, then images), so that if it + is ever exhausted the inline-base64 overflow is limited to the smallest items. Total inline + payload is capped by `max_inline_bytes`. + """ + units: list[tuple[str, Any]] = ( + [("video", v) for v in videos] + + [("audio", a) for a in _flatten_audio(audios)] + + [("image", f) for f in _flatten_images(images)] + ) + + parts: list[GeminiPart] = [] + url_used = 0 + inline_bytes = 0 + for kind, payload in units: + if url_used < url_budget: + parts.append(await _media_url_part(cls, kind, payload)) + url_used += 1 + continue + part, nbytes = _media_inline_part(kind, payload) + inline_bytes += nbytes + if inline_bytes > max_inline_bytes: + raise ValueError( + f"Too much media to send inline (over {max_inline_bytes // (1024 * 1024)}MB after the first " + f"{url_budget} inputs are uploaded as URLs). Reduce the number or size of attached media." + ) + parts.append(part) + return parts + + class GeminiNode(IO.ComfyNode): """ Node to generate text responses from a Gemini model. @@ -407,58 +547,9 @@ class GeminiNode(IO.ComfyNode): ) """, ), + is_deprecated=True, ) - @classmethod - def create_video_parts(cls, video_input: Input.Video) -> list[GeminiPart]: - """Convert video input to Gemini API compatible parts.""" - - base_64_string = video_to_base64_string( - video_input, container_format=Types.VideoContainer.MP4, codec=Types.VideoCodec.H264 - ) - return [ - GeminiPart( - inlineData=GeminiInlineData( - mimeType=GeminiMimeType.video_mp4, - data=base_64_string, - ) - ) - ] - - @classmethod - def create_audio_parts(cls, audio_input: Input.Audio) -> list[GeminiPart]: - """ - Convert audio input to Gemini API compatible parts. - - Args: - audio_input: Audio input from ComfyUI, containing waveform tensor and sample rate. - - Returns: - List of GeminiPart objects containing the encoded audio. - """ - audio_parts: list[GeminiPart] = [] - for batch_index in range(audio_input["waveform"].shape[0]): - # Recreate an IO.AUDIO object for the given batch dimension index - audio_at_index = Input.Audio( - waveform=audio_input["waveform"][batch_index].unsqueeze(0), - sample_rate=audio_input["sample_rate"], - ) - # Convert to MP3 format for compatibility with Gemini API - audio_bytes = audio_to_base64_string( - audio_at_index, - container_format="mp3", - codec_name="libmp3lame", - ) - audio_parts.append( - GeminiPart( - inlineData=GeminiInlineData( - mimeType=GeminiMimeType.audio_mp3, - data=audio_bytes, - ) - ) - ) - return audio_parts - @classmethod async def execute( cls, @@ -482,9 +573,9 @@ class GeminiNode(IO.ComfyNode): if images is not None: parts.extend(await create_image_parts(cls, images)) if audio is not None: - parts.extend(cls.create_audio_parts(audio)) + parts.extend(create_audio_parts(audio)) if video is not None: - parts.extend(cls.create_video_parts(video)) + parts.extend(create_video_parts(video)) if files is not None: parts.extend(files) @@ -512,6 +603,210 @@ class GeminiNode(IO.ComfyNode): return IO.NodeOutput(output_text or "Empty response from Gemini model...") +GEMINI_V2_MODELS: dict[str, str] = { + "Gemini 3.1 Pro": "gemini-3.1-pro-preview", + "Gemini 3.1 Flash-Lite": "gemini-3.1-flash-lite-preview", +} + + +def _gemini_text_model_inputs(thinking_default: str) -> list[Input]: + """Per-model inputs revealed by the model DynamicCombo (shared media + sampling controls).""" + return [ + IO.Autogrow.Input( + "images", + template=IO.Autogrow.TemplateNames( + IO.Image.Input("image"), + names=[f"image_{i}" for i in range(1, 17)], + min=0, + ), + tooltip="Optional image(s) to use as context for the model. Up to 16 images.", + ), + IO.Autogrow.Input( + "audio", + template=IO.Autogrow.TemplateNames( + IO.Audio.Input("audio"), + names=["audio_1"], + min=0, + ), + tooltip="Optional audio clip to use as context for the model.", + ), + IO.Autogrow.Input( + "video", + template=IO.Autogrow.TemplateNames( + IO.Video.Input("video"), + names=["video_1"], + min=0, + ), + tooltip="Optional video clip to use as context for the model.", + ), + IO.Custom("GEMINI_INPUT_FILES").Input( + "files", + optional=True, + tooltip="Optional file(s) to use as context for the model. " + "Accepts inputs from the Gemini Input Files node.", + ), + IO.Combo.Input( + "thinking_level", + options=["LOW", "HIGH"], + default=thinking_default, + tooltip="How hard the model reasons internally before answering. " + "HIGH improves quality on difficult tasks but costs more (thinking) tokens and is slower.", + ), + IO.Float.Input( + "temperature", + default=1.0, + min=0.0, + max=2.0, + step=0.01, + tooltip="Controls randomness. Lower is more focused/deterministic, higher is more creative.", + advanced=True, + ), + IO.Float.Input( + "top_p", + default=0.95, + min=0.0, + max=1.0, + step=0.01, + tooltip="Nucleus sampling: sample from the smallest token set whose cumulative probability reaches top_p.", + advanced=True, + ), + IO.Int.Input( + "max_output_tokens", + default=32768, + min=16, + max=65536, + tooltip="Maximum tokens to generate, including the model's internal thinking. " + "With thinking_level HIGH, a low value can leave no room for the answer; raise this if " + "responses come back empty or truncated. The model stops early when finished, so a higher " + "cap costs nothing extra for short replies.", + advanced=True, + ), + ] + + +class GeminiNodeV2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="GeminiNodeV2", + display_name="Google Gemini", + category="partner/text/Gemini", + essentials_category="Text Generation", + description="Generate text responses with Google's Gemini models. Provide a text prompt and, " + "optionally, one or more images, audio clips, videos, or files as multimodal context.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Text input to the model. Include detailed instructions, questions, or context.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option("Gemini 3.1 Pro", _gemini_text_model_inputs("HIGH")), + IO.DynamicCombo.Option("Gemini 3.1 Flash-Lite", _gemini_text_model_inputs("LOW")), + ], + tooltip="The Gemini model used to generate the response.", + ), + IO.Int.Input( + "seed", + default=42, + min=0, + max=2147483647, + control_after_generate=True, + tooltip="Seed for sampling. Set to 0 for a random seed. Deterministic output isn't guaranteed.", + ), + IO.String.Input( + "system_prompt", + multiline=True, + default="", + optional=True, + advanced=True, + tooltip="Foundational instructions that dictate the model's behavior.", + ), + ], + outputs=[ + IO.String.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $m := widgets.model; + $contains($m, "lite") ? { + "type": "list_usd", + "usd": [0.00025, 0.0015], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } : { + "type": "list_usd", + "usd": [0.002, 0.012], + "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: dict, + seed: int, + system_prompt: str = "", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + model_id = GEMINI_V2_MODELS[model["model"]] + + parts: list[GeminiPart] = [GeminiPart(text=prompt)] + images = [t for t in (model.get("images") or {}).values() if t is not None] + audios = [a for a in (model.get("audio") or {}).values() if a is not None] + videos = [v for v in (model.get("video") or {}).values() if v is not None] + if images or audios or videos: + parts.extend(await build_gemini_media_parts(cls, images, audios, videos)) + files = model.get("files") + if files is not None: + parts.extend(files) + + gemini_system_prompt = None + if system_prompt: + gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None) + + response = await sync_op( + cls, + endpoint=ApiEndpoint(path=f"{GEMINI_BASE_ENDPOINT}/{model_id}", method="POST"), + data=GeminiGenerateContentRequest( + contents=[ + GeminiContent( + role=GeminiRole.user, + parts=parts, + ) + ], + generationConfig=GeminiGenerationConfig( + temperature=model["temperature"], + topP=model["top_p"], + maxOutputTokens=model["max_output_tokens"], + seed=seed if seed > 0 else None, + thinkingConfig=GeminiThinkingConfig(thinkingLevel=model["thinking_level"]), + ), + systemInstruction=gemini_system_prompt, + ), + response_model=GeminiGenerateContentResponse, + price_extractor=calculate_tokens_price, + ) + + output_text = get_text_from_response(response) + return IO.NodeOutput(output_text or "Empty response from Gemini model...") + + class GeminiInputFiles(IO.ComfyNode): """ Loads and formats input files for use with the Gemini API. @@ -1222,6 +1517,7 @@ class GeminiExtension(ComfyExtension): async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ GeminiNode, + GeminiNodeV2, GeminiImage, GeminiImage2, GeminiNanoBanana2,