Merge branch 'Comfy-Org:master' into master

2026-07-06 14:41:11 +08:00 · 2026-05-11 15:28:33 +02:00 · 2026-05-11 15:28:33 +02:00 · 4fcd10b486
commit 4fcd10b486
parent 3296060404 428c323780
1 changed files with 313 additions and 0 deletions
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@ -27,6 +27,7 @@ from comfy_api_nodes.util import (
    ApiEndpoint,
    download_url_to_bytesio,
    downscale_image_tensor,
    get_number_of_images,
    poll_op,
    sync_op,
    tensor_to_base64_string,
@ -372,6 +373,7 @@ class OpenAIGPTImage1(IO.ComfyNode):
            display_name="OpenAI GPT Image 2",
            category="api node/image/OpenAI",
            description="Generates images synchronously via OpenAI's GPT Image endpoint.",
            is_deprecated=True,
            inputs=[
                IO.String.Input(
                    "prompt",
@ -640,6 +642,316 @@ class OpenAIGPTImage1(IO.ComfyNode):
        return IO.NodeOutput(await validate_and_cast_response(response))
 def _gpt_image_shared_inputs():
    """Inputs shared by all GPT Image models (quality + reference images + mask)."""
    return [
        IO.Combo.Input(
            "quality",
            default="low",
            options=["low", "medium", "high"],
            tooltip="Image quality, affects cost and generation time.",
        ),
        IO.Autogrow.Input(
            "images",
            template=IO.Autogrow.TemplateNames(
                IO.Image.Input("image"),
                names=[f"image_{i}" for i in range(1, 17)],
                min=0,
            ),
            tooltip="Optional reference image(s) for image editing. Up to 16 images.",
        ),
        IO.Mask.Input(
            "mask",
            optional=True,
            tooltip="Optional mask for inpainting (white areas will be replaced). "
            "Requires exactly one reference image.",
        ),
    ]
 def _gpt_image_legacy_model_inputs():
    """Per-model widget set for legacy gpt-image-1 / gpt-image-1.5 (4 base sizes, transparent bg allowed)."""
    return [
        IO.Combo.Input(
            "size",
            default="auto",
            options=["auto", "1024x1024", "1024x1536", "1536x1024"],
            tooltip="Image size.",
        ),
        IO.Combo.Input(
            "background",
            default="auto",
            options=["auto", "opaque", "transparent"],
            tooltip="Return image with or without background.",
        ),
        *_gpt_image_shared_inputs(),
    ]
 class OpenAIGPTImageNodeV2(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
            node_id="OpenAIGPTImageNodeV2",
            display_name="OpenAI GPT Image 2",
            category="api node/image/OpenAI",
            description="Generates images via OpenAI's GPT Image endpoint.",
            inputs=[
                IO.String.Input(
                    "prompt",
                    default="",
                    multiline=True,
                    tooltip="Text prompt for GPT Image",
                ),
                IO.DynamicCombo.Input(
                    "model",
                    options=[
                        IO.DynamicCombo.Option(
                            "gpt-image-2",
                            [
                                IO.Combo.Input(
                                    "size",
                                    default="auto",
                                    options=[
                                        "auto",
                                        "1024x1024",
                                        "1024x1536",
                                        "1536x1024",
                                        "2048x2048",
                                        "2048x1152",
                                        "1152x2048",
                                        "3840x2160",
                                        "2160x3840",
                                        "Custom",
                                    ],
                                    tooltip="Image size. Select 'Custom' to use the custom width and height.",
                                ),
                                IO.Int.Input(
                                    "custom_width",
                                    default=1024,
                                    min=1024,
                                    max=3840,
                                    step=16,
                                    tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.",
                                ),
                                IO.Int.Input(
                                    "custom_height",
                                    default=1024,
                                    min=1024,
                                    max=3840,
                                    step=16,
                                    tooltip="Used only when `size` is 'Custom'. Must be a multiple of 16.",
                                ),
                                IO.Combo.Input(
                                    "background",
                                    default="auto",
                                    options=["auto", "opaque"],
                                    tooltip="Return image with or without background.",
                                ),
                                *_gpt_image_shared_inputs(),
                            ],
                        ),
                        IO.DynamicCombo.Option("gpt-image-1.5", _gpt_image_legacy_model_inputs()),
                        IO.DynamicCombo.Option("gpt-image-1", _gpt_image_legacy_model_inputs()),
                    ],
                ),
                IO.Int.Input(
                    "n",
                    default=1,
                    min=1,
                    max=8,
                    step=1,
                    tooltip="How many images to generate",
                    display_mode=IO.NumberDisplay.number,
                ),
                IO.Int.Input(
                    "seed",
                    default=0,
                    min=0,
                    max=2147483647,
                    step=1,
                    display_mode=IO.NumberDisplay.number,
                    control_after_generate=True,
                    tooltip="not implemented yet in backend",
                ),
            ],
            outputs=[IO.Image.Output()],
            hidden=[
                IO.Hidden.auth_token_comfy_org,
                IO.Hidden.api_key_comfy_org,
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
            price_badge=IO.PriceBadge(
                depends_on=IO.PriceBadgeDepends(widgets=["model", "model.quality", "n"]),
                expr="""
                (
                  $ranges := {
                    "gpt-image-1": {
                      "low":    [0.011, 0.02],
                      "medium": [0.042, 0.07],
                      "high":   [0.167, 0.25]
                    },
                    "gpt-image-1.5": {
                      "low":    [0.009, 0.02],
                      "medium": [0.034, 0.062],
                      "high":   [0.133, 0.22]
                    },
                    "gpt-image-2": {
                      "low":    [0.0048, 0.019],
                      "medium": [0.041, 0.168],
                      "high":   [0.165, 0.67]
                    }
                  };
                  $range := $lookup($lookup($ranges, widgets.model), $lookup(widgets, "model.quality"));
                  $nRaw := widgets.n;
                  $n := ($nRaw != null and $nRaw != 0) ? $nRaw : 1;
                  ($n = 1)
                    ? {"type":"range_usd","min_usd": $range[0], "max_usd": $range[1], "format": {"approximate": true}}
                    : {
                        "type":"range_usd",
                        "min_usd": $range[0] * $n,
                        "max_usd": $range[1] * $n,
                        "format": { "suffix": "/Run", "approximate": true }
                      }
                )
                """,
            ),
        )
    @classmethod
    async def execute(
        cls,
        prompt: str,
        model: dict,
        n: int,
        seed: int,
    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=False)
        model_id = model["model"]
        size = model["size"]
        background = model["background"]
        quality = model["quality"]
        custom_width = model.get("custom_width", 1024)
        custom_height = model.get("custom_height", 1024)
        images_dict = model.get("images") or {}
        image_tensors: list[Input.Image] = [t for t in images_dict.values() if t is not None]
        n_images = sum(get_number_of_images(t) for t in image_tensors)
        mask = model.get("mask")
        if mask is not None and n_images == 0:
            raise ValueError("Cannot use a mask without an input image")
        if size == "Custom":
            if custom_width % 16 != 0 or custom_height % 16 != 0:
                raise ValueError(
                    f"Custom width and height must be multiples of 16, got {custom_width}x{custom_height}"
                )
            if max(custom_width, custom_height) > 3840:
                raise ValueError(
                    f"Custom resolution max edge must be <= 3840, got {custom_width}x{custom_height}"
                )
            ratio = max(custom_width, custom_height) / min(custom_width, custom_height)
            if ratio > 3:
                raise ValueError(
                    f"Custom resolution aspect ratio must not exceed 3:1, got {custom_width}x{custom_height}"
                )
            total_pixels = custom_width * custom_height
            if not 655_360 <= total_pixels <= 8_294_400:
                raise ValueError(
                    f"Custom resolution total pixels must be between 655,360 and 8,294,400, got {total_pixels}"
                )
            size = f"{custom_width}x{custom_height}"
        if model_id == "gpt-image-1":
            price_extractor = calculate_tokens_price_image_1
        elif model_id == "gpt-image-1.5":
            price_extractor = calculate_tokens_price_image_1_5
        elif model_id == "gpt-image-2":
            price_extractor = calculate_tokens_price_image_2_0
        else:
            raise ValueError(f"Unknown model: {model_id}")
        if image_tensors:
            flat: list[torch.Tensor] = []
            for tensor in image_tensors:
                if len(tensor.shape) == 4:
                    flat.extend(tensor[i : i + 1] for i in range(tensor.shape[0]))
                else:
                    flat.append(tensor.unsqueeze(0))
            files = []
            for i, single_image in enumerate(flat):
                scaled_image = downscale_image_tensor(single_image, total_pixels=2048 * 2048).squeeze()
                image_np = (scaled_image.numpy() * 255).astype(np.uint8)
                img = Image.fromarray(image_np)
                img_byte_arr = BytesIO()
                img.save(img_byte_arr, format="PNG")
                img_byte_arr.seek(0)
                if len(flat) == 1:
                    files.append(("image", (f"image_{i}.png", img_byte_arr, "image/png")))
                else:
                    files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
            if mask is not None:
                if len(flat) != 1:
                    raise Exception("Cannot use a mask with multiple image")
                ref_image = flat[0]
                if mask.shape[1:] != ref_image.shape[1:-1]:
                    raise Exception("Mask and Image must be the same size")
                _, height, width = mask.shape
                rgba_mask = torch.zeros(height, width, 4, device="cpu")
                rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
                scaled_mask = downscale_image_tensor(
                    rgba_mask.unsqueeze(0), total_pixels=2048 * 2048
                ).squeeze()
                mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
                mask_img = Image.fromarray(mask_np)
                mask_img_byte_arr = BytesIO()
                mask_img.save(mask_img_byte_arr, format="PNG")
                mask_img_byte_arr.seek(0)
                files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
            response = await sync_op(
                cls,
                ApiEndpoint(path="/proxy/openai/images/edits", method="POST"),
                response_model=OpenAIImageGenerationResponse,
                data=OpenAIImageEditRequest(
                    model=model_id,
                    prompt=prompt,
                    quality=quality,
                    background=background,
                    n=n,
                    size=size,
                    moderation="low",
                ),
                content_type="multipart/form-data",
                files=files,
                price_extractor=price_extractor,
            )
        else:
            response = await sync_op(
                cls,
                ApiEndpoint(path="/proxy/openai/images/generations", method="POST"),
                response_model=OpenAIImageGenerationResponse,
                data=OpenAIImageGenerationRequest(
                    model=model_id,
                    prompt=prompt,
                    quality=quality,
                    background=background,
                    n=n,
                    size=size,
                    moderation="low",
                ),
                price_extractor=price_extractor,
            )
        return IO.NodeOutput(await validate_and_cast_response(response))
 class OpenAIChatNode(IO.ComfyNode):
    """
    Node to generate text responses from an OpenAI model.
@ -999,6 +1311,7 @@ class OpenAIExtension(ComfyExtension):
            OpenAIDalle2,
            OpenAIDalle3,
            OpenAIGPTImage1,
            OpenAIGPTImageNodeV2,
            OpenAIChatNode,
            OpenAIInputFiles,
            OpenAIChatConfig,