From 3d082c32065e0653490b9a4ae45dd33b6c7bffb7 Mon Sep 17 00:00:00 2001
From: Christian Byrne <cbyrne@comfy.org>
Date: Mon, 15 Dec 2025 20:35:37 -0800
Subject: [PATCH 01/11] bump comfyui-frontend-package to 1.34.9 (patch)
 (#11342)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 117260515..9b9e61683 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.34.8
+comfyui-frontend-package==1.34.9
 comfyui-workflow-templates==0.7.59
 comfyui-embedded-docs==0.3.1
 torch

From 645ee1881e739b3013eeb26dbb335280bfbf443e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 15 Dec 2025 20:38:12 -0800
Subject: [PATCH 02/11] Inpainting for z image fun control. Use the
 ZImageFunControlnet node. (#11346)

image -> control image ex: pose
inpaint_image -> image for inpainting
mask -> inpaint mask
---
 comfy_extras/nodes_model_patch.py | 77 ++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 16 deletions(-)

diff --git a/comfy_extras/nodes_model_patch.py b/comfy_extras/nodes_model_patch.py
index fdd5d0d3f..2a0cfcf18 100644
--- a/comfy_extras/nodes_model_patch.py
+++ b/comfy_extras/nodes_model_patch.py
@@ -313,22 +313,46 @@ class ZImageControlPatch:
         self.inpaint_image = inpaint_image
         self.mask = mask
         self.strength = strength
-        self.encoded_image = self.encode_latent_cond(image)
-        self.encoded_image_size = (image.shape[1], image.shape[2])
+        self.is_inpaint = self.model_patch.model.additional_in_dim > 0
+
+        skip_encoding = False
+        if self.image is not None and self.inpaint_image is not None:
+            if self.image.shape != self.inpaint_image.shape:
+                skip_encoding = True
+
+        if skip_encoding:
+            self.encoded_image = None
+        else:
+            self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
+            if self.image is None:
+                self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
+            else:
+                self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
         self.temp_data = None
 
-    def encode_latent_cond(self, control_image, inpaint_image=None):
-        latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
-        if self.model_patch.model.additional_in_dim > 0:
-            if self.mask is None:
-                mask_ = torch.zeros_like(latent_image)[:, :1]
-            else:
-                mask_ = comfy.utils.common_upscale(self.mask.mean(dim=1, keepdim=True), latent_image.shape[-1], latent_image.shape[-2], "bilinear", "none")
+    def encode_latent_cond(self, control_image=None, inpaint_image=None):
+        latent_image = None
+        if control_image is not None:
+            latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
+
+        if self.is_inpaint:
             if inpaint_image is None:
                 inpaint_image = torch.ones_like(control_image) * 0.5
 
+            if self.mask is not None:
+                mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
+                inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5
+
             inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))
 
+            if self.mask is None:
+                mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
+            else:
+                mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
+
+            if latent_image is None:
+                latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))
+
             return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
         else:
             return latent_image
@@ -344,13 +368,18 @@ class ZImageControlPatch:
         block_type = kwargs.get("block_type", "")
         spacial_compression = self.vae.spacial_compression_encode()
         if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
-            image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
+            image_scaled = None
+            if self.image is not None:
+                image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
+                self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])
+
             inpaint_scaled = None
             if self.inpaint_image is not None:
                 inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
+                self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])
+
             loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
-            self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1), inpaint_scaled)
-            self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
+            self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
             comfy.model_management.load_models_gpu(loaded_models)
 
         cnet_blocks = self.model_patch.model.n_control_layers
@@ -391,7 +420,8 @@ class ZImageControlPatch:
 
     def to(self, device_or_dtype):
         if isinstance(device_or_dtype, torch.device):
-            self.encoded_image = self.encoded_image.to(device_or_dtype)
+            if self.encoded_image is not None:
+                self.encoded_image = self.encoded_image.to(device_or_dtype)
             self.temp_data = None
         return self
 
@@ -414,9 +444,12 @@ class QwenImageDiffsynthControlnet:
 
     CATEGORY = "advanced/loaders/qwen"
 
-    def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
+    def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
         model_patched = model.clone()
-        image = image[:, :, :, :3]
+        if image is not None:
+            image = image[:, :, :, :3]
+        if inpaint_image is not None:
+            inpaint_image = inpaint_image[:, :, :, :3]
         if mask is not None:
             if mask.ndim == 3:
                 mask = mask.unsqueeze(1)
@@ -425,13 +458,24 @@ class QwenImageDiffsynthControlnet:
             mask = 1.0 - mask
 
         if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
-            patch = ZImageControlPatch(model_patch, vae, image, strength, mask=mask)
+            patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
             model_patched.set_model_noise_refiner_patch(patch)
             model_patched.set_model_double_block_patch(patch)
         else:
             model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
         return (model_patched,)
 
+class ZImageFunControlnet(QwenImageDiffsynthControlnet):
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "model_patch": ("MODEL_PATCH",),
+                              "vae": ("VAE",),
+                              "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
+                              },
+                "optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
+
+    CATEGORY = "advanced/loaders/zimage"
 
 class UsoStyleProjectorPatch:
     def __init__(self, model_patch, encoded_image):
@@ -479,5 +523,6 @@ class USOStyleReference:
 NODE_CLASS_MAPPINGS = {
     "ModelPatchLoader": ModelPatchLoader,
     "QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
+    "ZImageFunControlnet": ZImageFunControlnet,
     "USOStyleReference": USOStyleReference,
 }

From bc606d7d645f9edfcac7cca3558210d3ee391d94 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 15 Dec 2025 22:26:55 -0800
Subject: [PATCH 03/11] Add a way to set the default ref method in the qwen
 image code. (#11349)

---
 comfy/ldm/qwen_image/model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
index 96590088b..8481f7711 100644
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -322,6 +322,7 @@ class QwenImageTransformer2DModel(nn.Module):
         pooled_projection_dim: int = 768,
         guidance_embeds: bool = False,
         axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
+        default_ref_method="index",
         image_model=None,
         final_layer=True,
         dtype=None,
@@ -334,6 +335,7 @@ class QwenImageTransformer2DModel(nn.Module):
         self.in_channels = in_channels
         self.out_channels = out_channels or in_channels
         self.inner_dim = num_attention_heads * attention_head_dim
+        self.default_ref_method = default_ref_method
 
         self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))
 
@@ -416,7 +418,7 @@ class QwenImageTransformer2DModel(nn.Module):
             h = 0
             w = 0
             index = 0
-            ref_method = kwargs.get("ref_latents_method", "index")
+            ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
             index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
             timestep_zero = ref_method == "index_timestep_zero"
             for ref in ref_latents:

From 9304e47351be8d178a093b30bcaf5d72c3a2baf5 Mon Sep 17 00:00:00 2001
From: Benjamin Lu <benjaminlu1107@gmail.com>
Date: Mon, 15 Dec 2025 23:24:18 -0800
Subject: [PATCH 04/11] Update workflows for new release process (#11064)

* Update release workflows for branch process

* Adjust branch order in workflow triggers

* Revert changes in test workflows
---
 .github/workflows/test-ci.yml        | 1 +
 .github/workflows/test-execution.yml | 4 ++--
 .github/workflows/test-launch.yml    | 4 ++--
 .github/workflows/test-unit.yml      | 4 ++--
 .github/workflows/update-version.yml | 1 +
 5 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-ci.yml b/.github/workflows/test-ci.yml
index 1660ec8e3..adfc5dd32 100644
--- a/.github/workflows/test-ci.yml
+++ b/.github/workflows/test-ci.yml
@@ -5,6 +5,7 @@ on:
   push:
     branches:
       - master
+      - release/**
     paths-ignore:
       - 'app/**'
       - 'input/**'
diff --git a/.github/workflows/test-execution.yml b/.github/workflows/test-execution.yml
index 00ef07ebf..9012633d8 100644
--- a/.github/workflows/test-execution.yml
+++ b/.github/workflows/test-execution.yml
@@ -2,9 +2,9 @@ name: Execution Tests
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:
diff --git a/.github/workflows/test-launch.yml b/.github/workflows/test-launch.yml
index 1735fd83b..fd70aff23 100644
--- a/.github/workflows/test-launch.yml
+++ b/.github/workflows/test-launch.yml
@@ -2,9 +2,9 @@ name: Test server launches without errors
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:
diff --git a/.github/workflows/test-unit.yml b/.github/workflows/test-unit.yml
index 00caf5b8a..d05179cd3 100644
--- a/.github/workflows/test-unit.yml
+++ b/.github/workflows/test-unit.yml
@@ -2,9 +2,9 @@ name: Unit Tests
 
 on:
   push:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
   pull_request:
-    branches: [ main, master ]
+    branches: [ main, master, release/** ]
 
 jobs:
   test:
diff --git a/.github/workflows/update-version.yml b/.github/workflows/update-version.yml
index d9d488974..c2343cc39 100644
--- a/.github/workflows/update-version.yml
+++ b/.github/workflows/update-version.yml
@@ -6,6 +6,7 @@ on:
       - "pyproject.toml"
     branches:
       - master
+      - release/**
 
 jobs:
   update-version:

From 65e2103b09f66e45438445fb0e99709ae7639869 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Tue, 16 Dec 2025 23:51:48 +0200
Subject: [PATCH 05/11] feat(api-nodes): add Wan2.6 model to video nodes
 (#11357)

---
 comfy_api_nodes/nodes_wan.py | 162 ++++++++++++++++++++---------------
 1 file changed, 95 insertions(+), 67 deletions(-)

diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py
index 2aab3c2ff..17b680e13 100644
--- a/comfy_api_nodes/nodes_wan.py
+++ b/comfy_api_nodes/nodes_wan.py
@@ -1,7 +1,5 @@
 import re
-from typing import Optional
 
-import torch
 from pydantic import BaseModel, Field
 from typing_extensions import override
 
@@ -21,26 +19,26 @@ from comfy_api_nodes.util import (
 
 class Text2ImageInputField(BaseModel):
     prompt: str = Field(...)
-    negative_prompt: Optional[str] = Field(None)
+    negative_prompt: str | None = Field(None)
 
 
 class Image2ImageInputField(BaseModel):
     prompt: str = Field(...)
-    negative_prompt: Optional[str] = Field(None)
+    negative_prompt: str | None = Field(None)
     images: list[str] = Field(..., min_length=1, max_length=2)
 
 
 class Text2VideoInputField(BaseModel):
     prompt: str = Field(...)
-    negative_prompt: Optional[str] = Field(None)
-    audio_url: Optional[str] = Field(None)
+    negative_prompt: str | None = Field(None)
+    audio_url: str | None = Field(None)
 
 
 class Image2VideoInputField(BaseModel):
     prompt: str = Field(...)
-    negative_prompt: Optional[str] = Field(None)
+    negative_prompt: str | None = Field(None)
     img_url: str = Field(...)
-    audio_url: Optional[str] = Field(None)
+    audio_url: str | None = Field(None)
 
 
 class Txt2ImageParametersField(BaseModel):
@@ -52,7 +50,7 @@ class Txt2ImageParametersField(BaseModel):
 
 
 class Image2ImageParametersField(BaseModel):
-    size: Optional[str] = Field(None)
+    size: str | None = Field(None)
     n: int = Field(1, description="Number of images to generate.")  # we support only value=1
     seed: int = Field(..., ge=0, le=2147483647)
     watermark: bool = Field(True)
@@ -61,19 +59,21 @@ class Image2ImageParametersField(BaseModel):
 class Text2VideoParametersField(BaseModel):
     size: str = Field(...)
     seed: int = Field(..., ge=0, le=2147483647)
-    duration: int = Field(5, ge=5, le=10)
+    duration: int = Field(5, ge=5, le=15)
     prompt_extend: bool = Field(True)
     watermark: bool = Field(True)
-    audio: bool = Field(False, description="Should be audio generated automatically")
+    audio: bool = Field(False, description="Whether to generate audio automatically.")
+    shot_type: str = Field("single")
 
 
 class Image2VideoParametersField(BaseModel):
     resolution: str = Field(...)
     seed: int = Field(..., ge=0, le=2147483647)
-    duration: int = Field(5, ge=5, le=10)
+    duration: int = Field(5, ge=5, le=15)
     prompt_extend: bool = Field(True)
     watermark: bool = Field(True)
-    audio: bool = Field(False, description="Should be audio generated automatically")
+    audio: bool = Field(False, description="Whether to generate audio automatically.")
+    shot_type: str = Field("single")
 
 
 class Text2ImageTaskCreationRequest(BaseModel):
@@ -106,39 +106,39 @@ class TaskCreationOutputField(BaseModel):
 
 
 class TaskCreationResponse(BaseModel):
-    output: Optional[TaskCreationOutputField] = Field(None)
+    output: TaskCreationOutputField | None = Field(None)
     request_id: str = Field(...)
-    code: Optional[str] = Field(None, description="The error code of the failed request.")
-    message: Optional[str] = Field(None, description="Details of the failed request.")
+    code: str | None = Field(None, description="Error code for the failed request.")
+    message: str | None = Field(None, description="Details about the failed request.")
 
 
 class TaskResult(BaseModel):
-    url: Optional[str] = Field(None)
-    code: Optional[str] = Field(None)
-    message: Optional[str] = Field(None)
+    url: str | None = Field(None)
+    code: str | None = Field(None)
+    message: str | None = Field(None)
 
 
 class ImageTaskStatusOutputField(TaskCreationOutputField):
     task_id: str = Field(...)
     task_status: str = Field(...)
-    results: Optional[list[TaskResult]] = Field(None)
+    results: list[TaskResult] | None = Field(None)
 
 
 class VideoTaskStatusOutputField(TaskCreationOutputField):
     task_id: str = Field(...)
     task_status: str = Field(...)
-    video_url: Optional[str] = Field(None)
-    code: Optional[str] = Field(None)
-    message: Optional[str] = Field(None)
+    video_url: str | None = Field(None)
+    code: str | None = Field(None)
+    message: str | None = Field(None)
 
 
 class ImageTaskStatusResponse(BaseModel):
-    output: Optional[ImageTaskStatusOutputField] = Field(None)
+    output: ImageTaskStatusOutputField | None = Field(None)
     request_id: str = Field(...)
 
 
 class VideoTaskStatusResponse(BaseModel):
-    output: Optional[VideoTaskStatusOutputField] = Field(None)
+    output: VideoTaskStatusOutputField | None = Field(None)
     request_id: str = Field(...)
 
 
@@ -152,7 +152,7 @@ class WanTextToImageApi(IO.ComfyNode):
             node_id="WanTextToImageApi",
             display_name="Wan Text to Image",
             category="api node/image/Wan",
-            description="Generates image based on text prompt.",
+            description="Generates an image based on a text prompt.",
             inputs=[
                 IO.Combo.Input(
                     "model",
@@ -164,13 +164,13 @@ class WanTextToImageApi(IO.ComfyNode):
                     "prompt",
                     multiline=True,
                     default="",
-                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
+                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
                 ),
                 IO.String.Input(
                     "negative_prompt",
                     multiline=True,
                     default="",
-                    tooltip="Negative text prompt to guide what to avoid.",
+                    tooltip="Negative prompt describing what to avoid.",
                     optional=True,
                 ),
                 IO.Int.Input(
@@ -209,7 +209,7 @@ class WanTextToImageApi(IO.ComfyNode):
                 IO.Boolean.Input(
                     "watermark",
                     default=True,
-                    tooltip='Whether to add an "AI generated" watermark to the result.',
+                    tooltip="Whether to add an AI-generated watermark to the result.",
                     optional=True,
                 ),
             ],
@@ -252,7 +252,7 @@ class WanTextToImageApi(IO.ComfyNode):
             ),
         )
         if not initial_response.output:
-            raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
         response = await poll_op(
             cls,
             ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@@ -272,7 +272,7 @@ class WanImageToImageApi(IO.ComfyNode):
             display_name="Wan Image to Image",
             category="api node/image/Wan",
             description="Generates an image from one or two input images and a text prompt. "
-            "The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).",
+            "The output image is currently fixed at 1.6 MP, and its aspect ratio matches the input image(s).",
             inputs=[
                 IO.Combo.Input(
                     "model",
@@ -282,19 +282,19 @@ class WanImageToImageApi(IO.ComfyNode):
                 ),
                 IO.Image.Input(
                     "image",
-                    tooltip="Single-image editing or multi-image fusion, maximum 2 images.",
+                    tooltip="Single-image editing or multi-image fusion. Maximum 2 images.",
                 ),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     default="",
-                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
+                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
                 ),
                 IO.String.Input(
                     "negative_prompt",
                     multiline=True,
                     default="",
-                    tooltip="Negative text prompt to guide what to avoid.",
+                    tooltip="Negative prompt describing what to avoid.",
                     optional=True,
                 ),
                 # redo this later as an optional combo of recommended resolutions
@@ -328,7 +328,7 @@ class WanImageToImageApi(IO.ComfyNode):
                 IO.Boolean.Input(
                     "watermark",
                     default=True,
-                    tooltip='Whether to add an "AI generated" watermark to the result.',
+                    tooltip="Whether to add an AI-generated watermark to the result.",
                     optional=True,
                 ),
             ],
@@ -347,7 +347,7 @@ class WanImageToImageApi(IO.ComfyNode):
     async def execute(
         cls,
         model: str,
-        image: torch.Tensor,
+        image: Input.Image,
         prompt: str,
         negative_prompt: str = "",
         # width: int = 1024,
@@ -357,7 +357,7 @@ class WanImageToImageApi(IO.ComfyNode):
     ):
         n_images = get_number_of_images(image)
         if n_images not in (1, 2):
-            raise ValueError(f"Expected 1 or 2 input images, got {n_images}.")
+            raise ValueError(f"Expected 1 or 2 input images, but got {n_images}.")
         images = []
         for i in image:
             images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096))
@@ -376,7 +376,7 @@ class WanImageToImageApi(IO.ComfyNode):
             ),
         )
         if not initial_response.output:
-            raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
         response = await poll_op(
             cls,
             ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@@ -395,25 +395,25 @@ class WanTextToVideoApi(IO.ComfyNode):
             node_id="WanTextToVideoApi",
             display_name="Wan Text to Video",
             category="api node/video/Wan",
-            description="Generates video based on text prompt.",
+            description="Generates a video based on a text prompt.",
             inputs=[
                 IO.Combo.Input(
                     "model",
-                    options=["wan2.5-t2v-preview"],
-                    default="wan2.5-t2v-preview",
+                    options=["wan2.5-t2v-preview", "wan2.6-t2v"],
+                    default="wan2.6-t2v",
                     tooltip="Model to use.",
                 ),
                 IO.String.Input(
                     "prompt",
                     multiline=True,
                     default="",
-                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
+                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
                 ),
                 IO.String.Input(
                     "negative_prompt",
                     multiline=True,
                     default="",
-                    tooltip="Negative text prompt to guide what to avoid.",
+                    tooltip="Negative prompt describing what to avoid.",
                     optional=True,
                 ),
                 IO.Combo.Input(
@@ -433,23 +433,23 @@ class WanTextToVideoApi(IO.ComfyNode):
                         "1080p: 4:3 (1632x1248)",
                         "1080p: 3:4 (1248x1632)",
                     ],
-                    default="480p: 1:1 (624x624)",
+                    default="720p: 1:1 (960x960)",
                     optional=True,
                 ),
                 IO.Int.Input(
                     "duration",
                     default=5,
                     min=5,
-                    max=10,
+                    max=15,
                     step=5,
                     display_mode=IO.NumberDisplay.number,
-                    tooltip="Available durations: 5 and 10 seconds",
+                    tooltip="A 15-second duration is available only for the Wan 2.6 model.",
                     optional=True,
                 ),
                 IO.Audio.Input(
                     "audio",
                     optional=True,
-                    tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
+                    tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
                 ),
                 IO.Int.Input(
                     "seed",
@@ -466,7 +466,7 @@ class WanTextToVideoApi(IO.ComfyNode):
                     "generate_audio",
                     default=False,
                     optional=True,
-                    tooltip="If there is no audio input, generate audio automatically.",
+                    tooltip="If no audio input is provided, generate audio automatically.",
                 ),
                 IO.Boolean.Input(
                     "prompt_extend",
@@ -477,7 +477,15 @@ class WanTextToVideoApi(IO.ComfyNode):
                 IO.Boolean.Input(
                     "watermark",
                     default=True,
-                    tooltip='Whether to add an "AI generated" watermark to the result.',
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    optional=True,
+                ),
+                IO.Combo.Input(
+                    "shot_type",
+                    options=["single", "multi"],
+                    tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
+                    "single continuous shot or multiple shots with cuts. "
+                    "This parameter takes effect only when prompt_extend is True.",
                     optional=True,
                 ),
             ],
@@ -498,14 +506,19 @@ class WanTextToVideoApi(IO.ComfyNode):
         model: str,
         prompt: str,
         negative_prompt: str = "",
-        size: str = "480p: 1:1 (624x624)",
+        size: str = "720p: 1:1 (960x960)",
         duration: int = 5,
-        audio: Optional[Input.Audio] = None,
+        audio: Input.Audio | None = None,
         seed: int = 0,
         generate_audio: bool = False,
         prompt_extend: bool = True,
         watermark: bool = True,
+        shot_type: str = "single",
     ):
+        if "480p" in size and model == "wan2.6-t2v":
+            raise ValueError("The Wan 2.6 model does not support 480p.")
+        if duration == 15 and model == "wan2.5-t2v-preview":
+            raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
         width, height = RES_IN_PARENS.search(size).groups()
         audio_url = None
         if audio is not None:
@@ -526,11 +539,12 @@ class WanTextToVideoApi(IO.ComfyNode):
                     audio=generate_audio,
                     prompt_extend=prompt_extend,
                     watermark=watermark,
+                    shot_type=shot_type,
                 ),
             ),
         )
         if not initial_response.output:
-            raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
         response = await poll_op(
             cls,
             ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
@@ -549,12 +563,12 @@ class WanImageToVideoApi(IO.ComfyNode):
             node_id="WanImageToVideoApi",
             display_name="Wan Image to Video",
             category="api node/video/Wan",
-            description="Generates video based on the first frame and text prompt.",
+            description="Generates a video from the first frame and a text prompt.",
             inputs=[
                 IO.Combo.Input(
                     "model",
-                    options=["wan2.5-i2v-preview"],
-                    default="wan2.5-i2v-preview",
+                    options=["wan2.5-i2v-preview", "wan2.6-i2v"],
+                    default="wan2.6-i2v",
                     tooltip="Model to use.",
                 ),
                 IO.Image.Input(
@@ -564,13 +578,13 @@ class WanImageToVideoApi(IO.ComfyNode):
                     "prompt",
                     multiline=True,
                     default="",
-                    tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
+                    tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
                 ),
                 IO.String.Input(
                     "negative_prompt",
                     multiline=True,
                     default="",
-                    tooltip="Negative text prompt to guide what to avoid.",
+                    tooltip="Negative prompt describing what to avoid.",
                     optional=True,
                 ),
                 IO.Combo.Input(
@@ -580,23 +594,23 @@ class WanImageToVideoApi(IO.ComfyNode):
                         "720P",
                         "1080P",
                     ],
-                    default="480P",
+                    default="720P",
                     optional=True,
                 ),
                 IO.Int.Input(
                     "duration",
                     default=5,
                     min=5,
-                    max=10,
+                    max=15,
                     step=5,
                     display_mode=IO.NumberDisplay.number,
-                    tooltip="Available durations: 5 and 10 seconds",
+                    tooltip="Duration 15 available only for WAN2.6 model.",
                     optional=True,
                 ),
                 IO.Audio.Input(
                     "audio",
                     optional=True,
-                    tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
+                    tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
                 ),
                 IO.Int.Input(
                     "seed",
@@ -613,7 +627,7 @@ class WanImageToVideoApi(IO.ComfyNode):
                     "generate_audio",
                     default=False,
                     optional=True,
-                    tooltip="If there is no audio input, generate audio automatically.",
+                    tooltip="If no audio input is provided, generate audio automatically.",
                 ),
                 IO.Boolean.Input(
                     "prompt_extend",
@@ -624,7 +638,15 @@ class WanImageToVideoApi(IO.ComfyNode):
                 IO.Boolean.Input(
                     "watermark",
                     default=True,
-                    tooltip='Whether to add an "AI generated" watermark to the result.',
+                    tooltip="Whether to add an AI-generated watermark to the result.",
+                    optional=True,
+                ),
+                IO.Combo.Input(
+                    "shot_type",
+                    options=["single", "multi"],
+                    tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
+                    "single continuous shot or multiple shots with cuts. "
+                    "This parameter takes effect only when prompt_extend is True.",
                     optional=True,
                 ),
             ],
@@ -643,19 +665,24 @@ class WanImageToVideoApi(IO.ComfyNode):
     async def execute(
         cls,
         model: str,
-        image: torch.Tensor,
+        image: Input.Image,
         prompt: str,
         negative_prompt: str = "",
-        resolution: str = "480P",
+        resolution: str = "720P",
         duration: int = 5,
-        audio: Optional[Input.Audio] = None,
+        audio: Input.Audio | None = None,
         seed: int = 0,
         generate_audio: bool = False,
         prompt_extend: bool = True,
         watermark: bool = True,
+        shot_type: str = "single",
     ):
         if get_number_of_images(image) != 1:
             raise ValueError("Exactly one input image is required.")
+        if "480P" in resolution and model == "wan2.6-i2v":
+            raise ValueError("The Wan 2.6 model does not support 480P.")
+        if duration == 15 and model == "wan2.5-i2v-preview":
+            raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
         image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000)
         audio_url = None
         if audio is not None:
@@ -677,11 +704,12 @@ class WanImageToVideoApi(IO.ComfyNode):
                     audio=generate_audio,
                     prompt_extend=prompt_extend,
                     watermark=watermark,
+                    shot_type=shot_type,
                 ),
             ),
         )
         if not initial_response.output:
-            raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
+            raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
         response = await poll_op(
             cls,
             ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),

From ffdd53b327f7ebd48cf81a1c8b06d846cf354a66 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Tue, 16 Dec 2025 14:03:17 -0800
Subject: [PATCH 06/11] Check state dict key to auto enable the
 index_timestep_zero ref method. (#11362)

---
 comfy/ldm/qwen_image/model.py | 3 +++
 comfy/model_detection.py      | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py
index 8481f7711..902af30ed 100644
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@@ -363,6 +363,9 @@ class QwenImageTransformer2DModel(nn.Module):
             for _ in range(num_layers)
         ])
 
+        if self.default_ref_method == "index_timestep_zero":
+            self.register_buffer("__index_timestep_zero__", torch.tensor([]))
+
         if final_layer:
             self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
             self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device)
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index dd6a703f6..7148c77fd 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -259,7 +259,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                 dit_config["nerf_tile_size"] = 512
                 dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
                 dit_config["nerf_embedder_dtype"] = torch.float32
-                if "__x0__" in state_dict_keys: # x0 pred
+                if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                     dit_config["use_x0"] = True
                 else:
                     dit_config["use_x0"] = False
@@ -618,6 +618,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["image_model"] = "qwen_image"
         dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1]
         dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.')
+        if "{}__index_timestep_zero__".format(key_prefix) in state_dict_keys:  # 2511
+            dit_config["default_ref_method"] = "index_timestep_zero"
         return dit_config
 
     if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5

From 827bb1512b17e349238e69b2d4f463390a5b0d14 Mon Sep 17 00:00:00 2001
From: chaObserv <154517000+chaObserv@users.noreply.github.com>
Date: Wed, 17 Dec 2025 12:35:43 +0800
Subject: [PATCH 07/11] Add exp_heun_2_x0 sampler series (#11360)

---
 comfy/k_diffusion/sampling.py        | 11 +++++++++++
 comfy/samplers.py                    |  2 +-
 comfy_extras/nodes_custom_sampler.py | 11 ++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index 753c66afa..c004b3b47 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1618,6 +1618,17 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
             x = x + sde_noise * sigmas[i + 1] * s_noise
     return x
 
+@torch.no_grad()
+def sample_exp_heun_2_x0(model, x, sigmas, extra_args=None, callback=None, disable=None, solver_type="phi_2"):
+    """Deterministic exponential Heun second order method in data prediction (x0) and logSNR time."""
+    return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None, r=1.0, solver_type=solver_type)
+
+
+@torch.no_grad()
+def sample_exp_heun_2_x0_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type="phi_2"):
+    """Stochastic exponential Heun second order method in data prediction (x0) and logSNR time."""
+    return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=1.0, solver_type=solver_type)
+
 
 @torch.no_grad()
 def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):
diff --git a/comfy/samplers.py b/comfy/samplers.py
index fa4640842..8340d376c 100755
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -720,7 +720,7 @@ class Sampler:
         sigma = float(sigmas[0])
         return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma
 
-KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2","dpm_2", "dpm_2_ancestral",
+KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2", "exp_heun_2_x0", "exp_heun_2_x0_sde", "dpm_2", "dpm_2_ancestral",
                   "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
                   "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_2m_sde_heun", "dpmpp_2m_sde_heun_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
                   "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",
diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index 71ea4e9ec..7ee4caac1 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -671,7 +671,16 @@ class SamplerSEEDS2(io.ComfyNode):
                 io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"),
                 io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"),
             ],
-            outputs=[io.Sampler.Output()]
+            outputs=[io.Sampler.Output()],
+            description=(
+                "This sampler node can represent multiple samplers:\n\n"
+                "seeds_2\n"
+                "- default setting\n\n"
+                "exp_heun_2_x0\n"
+                "- solver_type=phi_2, r=1.0, eta=0.0\n\n"
+                "exp_heun_2_x0_sde\n"
+                "- solver_type=phi_2, r=1.0, eta=1.0, s_noise=1.0"
+            )
         )
 
     @classmethod

From 3a5f239cb622d7d8b1706d0b63c469dfef2eaf73 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 17 Dec 2025 03:46:11 -0500
Subject: [PATCH 08/11] ComfyUI v0.5.0

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 2f083edaf..5edf270e7 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.4.0"
+__version__ = "0.5.0"
diff --git a/pyproject.toml b/pyproject.toml
index e4d3d616a..c402f278c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.4.0"
+version = "0.5.0"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"

From 887143854bb2ae1e0f975e4461f376844a1628c8 Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 17 Dec 2025 19:43:41 +0200
Subject: [PATCH 09/11] feat(api-nodes): add GPT-Image-1.5 (#11368)

---
 comfy_api_nodes/apis/openai_api.py  |  52 +++++++
 comfy_api_nodes/nodes_openai.py     | 209 +++++++++++++++-------------
 comfy_api_nodes/util/conversions.py |   2 +-
 3 files changed, 168 insertions(+), 95 deletions(-)
 create mode 100644 comfy_api_nodes/apis/openai_api.py

diff --git a/comfy_api_nodes/apis/openai_api.py b/comfy_api_nodes/apis/openai_api.py
new file mode 100644
index 000000000..ae5bb2673
--- /dev/null
+++ b/comfy_api_nodes/apis/openai_api.py
@@ -0,0 +1,52 @@
+from pydantic import BaseModel, Field
+
+
+class Datum2(BaseModel):
+    b64_json: str | None = Field(None, description="Base64 encoded image data")
+    revised_prompt: str | None = Field(None, description="Revised prompt")
+    url: str | None = Field(None, description="URL of the image")
+
+
+class InputTokensDetails(BaseModel):
+    image_tokens: int | None = None
+    text_tokens: int | None = None
+
+
+class Usage(BaseModel):
+    input_tokens: int | None = None
+    input_tokens_details: InputTokensDetails | None = None
+    output_tokens: int | None = None
+    total_tokens: int | None = None
+
+
+class OpenAIImageGenerationResponse(BaseModel):
+    data: list[Datum2] | None = None
+    usage: Usage | None = None
+
+
+class OpenAIImageEditRequest(BaseModel):
+    background: str | None = Field(None, description="Background transparency")
+    model: str = Field(...)
+    moderation: str | None = Field(None)
+    n: int | None = Field(None, description="The number of images to generate")
+    output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
+    output_format: str | None = Field(None)
+    prompt: str = Field(...)
+    quality: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
+    size: str | None = Field(None, description="Size of the output image")
+
+
+class OpenAIImageGenerationRequest(BaseModel):
+    background: str | None = Field(None, description="Background transparency")
+    model: str | None = Field(None)
+    moderation: str | None = Field(None)
+    n: int | None = Field(
+        None,
+        description="The number of images to generate.",
+    )
+    output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
+    output_format: str | None = Field(None)
+    prompt: str = Field(...)
+    quality: str | None = Field(None, description="The quality of the generated image")
+    size: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
+    style: str | None = Field(None, description="Style of the image (only for dall-e-3)")
diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py
index c8da5464b..a6205a34f 100644
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -1,46 +1,45 @@
-from io import BytesIO
+import base64
 import os
 from enum import Enum
-from inspect import cleandoc
+from io import BytesIO
+
 import numpy as np
 import torch
 from PIL import Image
-import folder_paths
-import base64
-from comfy_api.latest import IO, ComfyExtension
 from typing_extensions import override
 
-
+import folder_paths
+from comfy_api.latest import IO, ComfyExtension, Input
 from comfy_api_nodes.apis import (
-    OpenAIImageGenerationRequest,
-    OpenAIImageEditRequest,
-    OpenAIImageGenerationResponse,
-    OpenAICreateResponse,
-    OpenAIResponse,
     CreateModelResponseProperties,
-    Item,
-    OutputContent,
-    InputImageContent,
     Detail,
-    InputTextContent,
-    InputMessage,
-    InputMessageContentList,
     InputContent,
     InputFileContent,
+    InputImageContent,
+    InputMessage,
+    InputMessageContentList,
+    InputTextContent,
+    Item,
+    OpenAICreateResponse,
+    OpenAIResponse,
+    OutputContent,
+)
+from comfy_api_nodes.apis.openai_api import (
+    OpenAIImageEditRequest,
+    OpenAIImageGenerationRequest,
+    OpenAIImageGenerationResponse,
 )
-
 from comfy_api_nodes.util import (
-    downscale_image_tensor,
-    download_url_to_bytesio,
-    validate_string,
-    tensor_to_base64_string,
     ApiEndpoint,
-    sync_op,
+    download_url_to_bytesio,
+    downscale_image_tensor,
     poll_op,
+    sync_op,
+    tensor_to_base64_string,
     text_filepath_to_data_uri,
+    validate_string,
 )
 
-
 RESPONSES_ENDPOINT = "/proxy/openai/v1/responses"
 STARTING_POINT_ID_PATTERN = r"<starting_point_id:(.*)>"
 
@@ -98,9 +97,6 @@ async def validate_and_cast_response(response, timeout: int = None) -> torch.Ten
 
 
 class OpenAIDalle2(IO.ComfyNode):
-    """
-    Generates images synchronously via OpenAI's DALL·E 2 endpoint.
-    """
 
     @classmethod
     def define_schema(cls):
@@ -108,7 +104,7 @@ class OpenAIDalle2(IO.ComfyNode):
             node_id="OpenAIDalle2",
             display_name="OpenAI DALL·E 2",
             category="api node/image/OpenAI",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates images synchronously via OpenAI's DALL·E 2 endpoint.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -234,9 +230,6 @@ class OpenAIDalle2(IO.ComfyNode):
 
 
 class OpenAIDalle3(IO.ComfyNode):
-    """
-    Generates images synchronously via OpenAI's DALL·E 3 endpoint.
-    """
 
     @classmethod
     def define_schema(cls):
@@ -244,7 +237,7 @@ class OpenAIDalle3(IO.ComfyNode):
             node_id="OpenAIDalle3",
             display_name="OpenAI DALL·E 3",
             category="api node/image/OpenAI",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates images synchronously via OpenAI's DALL·E 3 endpoint.",
             inputs=[
                 IO.String.Input(
                     "prompt",
@@ -326,10 +319,16 @@ class OpenAIDalle3(IO.ComfyNode):
         return IO.NodeOutput(await validate_and_cast_response(response))
 
 
+def calculate_tokens_price_image_1(response: OpenAIImageGenerationResponse) -> float | None:
+    # https://platform.openai.com/docs/pricing
+    return ((response.usage.input_tokens * 10.0) + (response.usage.output_tokens * 40.0)) / 1_000_000.0
+
+
+def calculate_tokens_price_image_1_5(response: OpenAIImageGenerationResponse) -> float | None:
+    return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 32.0)) / 1_000_000.0
+
+
 class OpenAIGPTImage1(IO.ComfyNode):
-    """
-    Generates images synchronously via OpenAI's GPT Image 1 endpoint.
-    """
 
     @classmethod
     def define_schema(cls):
@@ -337,13 +336,13 @@ class OpenAIGPTImage1(IO.ComfyNode):
             node_id="OpenAIGPTImage1",
             display_name="OpenAI GPT Image 1",
             category="api node/image/OpenAI",
-            description=cleandoc(cls.__doc__ or ""),
+            description="Generates images synchronously via OpenAI's GPT Image 1 endpoint.",
             inputs=[
                 IO.String.Input(
                     "prompt",
                     default="",
                     multiline=True,
-                    tooltip="Text prompt for GPT Image 1",
+                    tooltip="Text prompt for GPT Image",
                 ),
                 IO.Int.Input(
                     "seed",
@@ -365,8 +364,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
                 ),
                 IO.Combo.Input(
                     "background",
-                    default="opaque",
-                    options=["opaque", "transparent"],
+                    default="auto",
+                    options=["auto", "opaque", "transparent"],
                     tooltip="Return image with or without background",
                     optional=True,
                 ),
@@ -397,6 +396,11 @@ class OpenAIGPTImage1(IO.ComfyNode):
                     tooltip="Optional mask for inpainting (white areas will be replaced)",
                     optional=True,
                 ),
+                IO.Combo.Input(
+                    "model",
+                    options=["gpt-image-1", "gpt-image-1.5"],
+                    optional=True,
+                ),
             ],
             outputs=[
                 IO.Image.Output(),
@@ -412,32 +416,34 @@ class OpenAIGPTImage1(IO.ComfyNode):
     @classmethod
     async def execute(
         cls,
-        prompt,
-        seed=0,
-        quality="low",
-        background="opaque",
-        image=None,
-        mask=None,
-        n=1,
-        size="1024x1024",
+        prompt: str,
+        seed: int = 0,
+        quality: str = "low",
+        background: str = "opaque",
+        image: Input.Image | None = None,
+        mask: Input.Image | None = None,
+        n: int = 1,
+        size: str = "1024x1024",
+        model: str = "gpt-image-1",
     ) -> IO.NodeOutput:
         validate_string(prompt, strip_whitespace=False)
-        model = "gpt-image-1"
-        path = "/proxy/openai/images/generations"
-        content_type = "application/json"
-        request_class = OpenAIImageGenerationRequest
-        files = []
+
+        if mask is not None and image is None:
+            raise ValueError("Cannot use a mask without an input image")
+
+        if model == "gpt-image-1":
+            price_extractor = calculate_tokens_price_image_1
+        elif model == "gpt-image-1.5":
+            price_extractor = calculate_tokens_price_image_1_5
+        else:
+            raise ValueError(f"Unknown model: {model}")
 
         if image is not None:
-            path = "/proxy/openai/images/edits"
-            request_class = OpenAIImageEditRequest
-            content_type = "multipart/form-data"
-
+            files = []
             batch_size = image.shape[0]
-
             for i in range(batch_size):
-                single_image = image[i : i + 1]
-                scaled_image = downscale_image_tensor(single_image).squeeze()
+                single_image = image[i: i + 1]
+                scaled_image = downscale_image_tensor(single_image, total_pixels=2048*2048).squeeze()
 
                 image_np = (scaled_image.numpy() * 255).astype(np.uint8)
                 img = Image.fromarray(image_np)
@@ -450,44 +456,59 @@ class OpenAIGPTImage1(IO.ComfyNode):
                 else:
                     files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
 
-        if mask is not None:
-            if image is None:
-                raise Exception("Cannot use a mask without an input image")
-            if image.shape[0] != 1:
-                raise Exception("Cannot use a mask with multiple image")
-            if mask.shape[1:] != image.shape[1:-1]:
-                raise Exception("Mask and Image must be the same size")
-            batch, height, width = mask.shape
-            rgba_mask = torch.zeros(height, width, 4, device="cpu")
-            rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
+            if mask is not None:
+                if image.shape[0] != 1:
+                    raise Exception("Cannot use a mask with multiple image")
+                if mask.shape[1:] != image.shape[1:-1]:
+                    raise Exception("Mask and Image must be the same size")
+                _, height, width = mask.shape
+                rgba_mask = torch.zeros(height, width, 4, device="cpu")
+                rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
 
-            scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()
+                scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0), total_pixels=2048*2048).squeeze()
 
-            mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
-            mask_img = Image.fromarray(mask_np)
-            mask_img_byte_arr = BytesIO()
-            mask_img.save(mask_img_byte_arr, format="PNG")
-            mask_img_byte_arr.seek(0)
-            files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
-
-        # Build the operation
-        response = await sync_op(
-            cls,
-            ApiEndpoint(path=path, method="POST"),
-            response_model=OpenAIImageGenerationResponse,
-            data=request_class(
-                model=model,
-                prompt=prompt,
-                quality=quality,
-                background=background,
-                n=n,
-                seed=seed,
-                size=size,
-            ),
-            files=files if files else None,
-            content_type=content_type,
-        )
+                mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
+                mask_img = Image.fromarray(mask_np)
+                mask_img_byte_arr = BytesIO()
+                mask_img.save(mask_img_byte_arr, format="PNG")
+                mask_img_byte_arr.seek(0)
+                files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
 
+            response = await sync_op(
+                cls,
+                ApiEndpoint(path="/proxy/openai/images/edits", method="POST"),
+                response_model=OpenAIImageGenerationResponse,
+                data=OpenAIImageEditRequest(
+                    model=model,
+                    prompt=prompt,
+                    quality=quality,
+                    background=background,
+                    n=n,
+                    seed=seed,
+                    size=size,
+                    moderation="low",
+                ),
+                content_type="multipart/form-data",
+                files=files,
+                price_extractor=price_extractor,
+            )
+        else:
+            response = await sync_op(
+                cls,
+                ApiEndpoint(path="/proxy/openai/images/generations", method="POST"),
+                response_model=OpenAIImageGenerationResponse,
+                data=OpenAIImageGenerationRequest(
+                    model=model,
+                    prompt=prompt,
+                    quality=quality,
+                    background=background,
+                    n=n,
+                    seed=seed,
+                    size=size,
+                    moderation="low",
+                ),
+                price_extractor=price_extractor,
+            )
         return IO.NodeOutput(await validate_and_cast_response(response))
 
 
diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py
index c57457580..d64239c86 100644
--- a/comfy_api_nodes/util/conversions.py
+++ b/comfy_api_nodes/util/conversions.py
@@ -129,7 +129,7 @@ def pil_to_bytesio(img: Image.Image, mime_type: str = "image/png") -> BytesIO:
     return img_byte_arr
 
 
-def downscale_image_tensor(image, total_pixels=1536 * 1024) -> torch.Tensor:
+def downscale_image_tensor(image: torch.Tensor, total_pixels: int = 1536 * 1024) -> torch.Tensor:
     """Downscale input image tensor to roughly the specified total pixels."""
     samples = image.movedim(-1, 1)
     total = int(total_pixels)

From c08f97f34407a1bc6cc8d1447d6c12893399acba Mon Sep 17 00:00:00 2001
From: Alexander Piskun <13381981+bigcat88@users.noreply.github.com>
Date: Wed, 17 Dec 2025 20:24:25 +0200
Subject: [PATCH 10/11] fix regression in V3 nodes processing (#11375)

---
 comfy_api/latest/_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
index 2b634d172..4b14e5ded 100644
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -1556,12 +1556,12 @@ class _ComfyNodeBaseInternal(_ComfyNodeInternal):
 
     @final
     @classmethod
-    def PREPARE_CLASS_CLONE(cls, v3_data: V3Data) -> type[ComfyNode]:
+    def PREPARE_CLASS_CLONE(cls, v3_data: V3Data | None) -> type[ComfyNode]:
         """Creates clone of real node class to prevent monkey-patching."""
         c_type: type[ComfyNode] = cls if is_class(cls) else type(cls)
         type_clone: type[ComfyNode] = shallow_clone_class(c_type)
         # set hidden
-        type_clone.hidden = HiddenHolder.from_dict(v3_data["hidden_inputs"])
+        type_clone.hidden = HiddenHolder.from_dict(v3_data["hidden_inputs"] if v3_data else None)
         return type_clone
 
     @final

From 5d9ad0c6bf177095aea5026cd872b1faf873669b Mon Sep 17 00:00:00 2001
From: chaObserv <154517000+chaObserv@users.noreply.github.com>
Date: Thu, 18 Dec 2025 02:57:40 +0800
Subject: [PATCH 11/11] Fix the last step with non-zero sigma in sa_solver
 (#11380)

---
 comfy/k_diffusion/sampling.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index c004b3b47..1ba9edad7 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -1776,7 +1776,7 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F
         # Predictor
         if sigmas[i + 1] == 0:
             # Denoising step
-            x = denoised
+            x_pred = denoised
         else:
             tau_t = tau_func(sigmas[i + 1])
             curr_lambdas = lambdas[i - predictor_order_used + 1:i + 1]
@@ -1797,7 +1797,7 @@ def sample_sa_solver(model, x, sigmas, extra_args=None, callback=None, disable=F
             if tau_t > 0 and s_noise > 0:
                 noise = noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * tau_t ** 2 * h).expm1().neg().sqrt() * s_noise
                 x_pred = x_pred + noise
-    return x
+    return x_pred
 
 
 @torch.no_grad()