mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-23 04:50:49 +08:00
feat(api-nodes): add GPT-Image-1.5 (#11368)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
This commit is contained in:
parent
3a5f239cb6
commit
887143854b
52
comfy_api_nodes/apis/openai_api.py
Normal file
52
comfy_api_nodes/apis/openai_api.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class Datum2(BaseModel):
|
||||||
|
b64_json: str | None = Field(None, description="Base64 encoded image data")
|
||||||
|
revised_prompt: str | None = Field(None, description="Revised prompt")
|
||||||
|
url: str | None = Field(None, description="URL of the image")
|
||||||
|
|
||||||
|
|
||||||
|
class InputTokensDetails(BaseModel):
|
||||||
|
image_tokens: int | None = None
|
||||||
|
text_tokens: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class Usage(BaseModel):
|
||||||
|
input_tokens: int | None = None
|
||||||
|
input_tokens_details: InputTokensDetails | None = None
|
||||||
|
output_tokens: int | None = None
|
||||||
|
total_tokens: int | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIImageGenerationResponse(BaseModel):
|
||||||
|
data: list[Datum2] | None = None
|
||||||
|
usage: Usage | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIImageEditRequest(BaseModel):
|
||||||
|
background: str | None = Field(None, description="Background transparency")
|
||||||
|
model: str = Field(...)
|
||||||
|
moderation: str | None = Field(None)
|
||||||
|
n: int | None = Field(None, description="The number of images to generate")
|
||||||
|
output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
|
||||||
|
output_format: str | None = Field(None)
|
||||||
|
prompt: str = Field(...)
|
||||||
|
quality: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
|
||||||
|
size: str | None = Field(None, description="Size of the output image")
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIImageGenerationRequest(BaseModel):
|
||||||
|
background: str | None = Field(None, description="Background transparency")
|
||||||
|
model: str | None = Field(None)
|
||||||
|
moderation: str | None = Field(None)
|
||||||
|
n: int | None = Field(
|
||||||
|
None,
|
||||||
|
description="The number of images to generate.",
|
||||||
|
)
|
||||||
|
output_compression: int | None = Field(None, description="Compression level for JPEG or WebP (0-100)")
|
||||||
|
output_format: str | None = Field(None)
|
||||||
|
prompt: str = Field(...)
|
||||||
|
quality: str | None = Field(None, description="The quality of the generated image")
|
||||||
|
size: str | None = Field(None, description="Size of the image (e.g., 1024x1024, 1536x1024, auto)")
|
||||||
|
style: str | None = Field(None, description="Style of the image (only for dall-e-3)")
|
||||||
@ -1,46 +1,45 @@
|
|||||||
from io import BytesIO
|
import base64
|
||||||
import os
|
import os
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from inspect import cleandoc
|
from io import BytesIO
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import folder_paths
|
|
||||||
import base64
|
|
||||||
from comfy_api.latest import IO, ComfyExtension
|
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
|
import folder_paths
|
||||||
|
from comfy_api.latest import IO, ComfyExtension, Input
|
||||||
from comfy_api_nodes.apis import (
|
from comfy_api_nodes.apis import (
|
||||||
OpenAIImageGenerationRequest,
|
|
||||||
OpenAIImageEditRequest,
|
|
||||||
OpenAIImageGenerationResponse,
|
|
||||||
OpenAICreateResponse,
|
|
||||||
OpenAIResponse,
|
|
||||||
CreateModelResponseProperties,
|
CreateModelResponseProperties,
|
||||||
Item,
|
|
||||||
OutputContent,
|
|
||||||
InputImageContent,
|
|
||||||
Detail,
|
Detail,
|
||||||
InputTextContent,
|
|
||||||
InputMessage,
|
|
||||||
InputMessageContentList,
|
|
||||||
InputContent,
|
InputContent,
|
||||||
InputFileContent,
|
InputFileContent,
|
||||||
|
InputImageContent,
|
||||||
|
InputMessage,
|
||||||
|
InputMessageContentList,
|
||||||
|
InputTextContent,
|
||||||
|
Item,
|
||||||
|
OpenAICreateResponse,
|
||||||
|
OpenAIResponse,
|
||||||
|
OutputContent,
|
||||||
|
)
|
||||||
|
from comfy_api_nodes.apis.openai_api import (
|
||||||
|
OpenAIImageEditRequest,
|
||||||
|
OpenAIImageGenerationRequest,
|
||||||
|
OpenAIImageGenerationResponse,
|
||||||
)
|
)
|
||||||
|
|
||||||
from comfy_api_nodes.util import (
|
from comfy_api_nodes.util import (
|
||||||
downscale_image_tensor,
|
|
||||||
download_url_to_bytesio,
|
|
||||||
validate_string,
|
|
||||||
tensor_to_base64_string,
|
|
||||||
ApiEndpoint,
|
ApiEndpoint,
|
||||||
sync_op,
|
download_url_to_bytesio,
|
||||||
|
downscale_image_tensor,
|
||||||
poll_op,
|
poll_op,
|
||||||
|
sync_op,
|
||||||
|
tensor_to_base64_string,
|
||||||
text_filepath_to_data_uri,
|
text_filepath_to_data_uri,
|
||||||
|
validate_string,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
RESPONSES_ENDPOINT = "/proxy/openai/v1/responses"
|
RESPONSES_ENDPOINT = "/proxy/openai/v1/responses"
|
||||||
STARTING_POINT_ID_PATTERN = r"<starting_point_id:(.*)>"
|
STARTING_POINT_ID_PATTERN = r"<starting_point_id:(.*)>"
|
||||||
|
|
||||||
@ -98,9 +97,6 @@ async def validate_and_cast_response(response, timeout: int = None) -> torch.Ten
|
|||||||
|
|
||||||
|
|
||||||
class OpenAIDalle2(IO.ComfyNode):
|
class OpenAIDalle2(IO.ComfyNode):
|
||||||
"""
|
|
||||||
Generates images synchronously via OpenAI's DALL·E 2 endpoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def define_schema(cls):
|
def define_schema(cls):
|
||||||
@ -108,7 +104,7 @@ class OpenAIDalle2(IO.ComfyNode):
|
|||||||
node_id="OpenAIDalle2",
|
node_id="OpenAIDalle2",
|
||||||
display_name="OpenAI DALL·E 2",
|
display_name="OpenAI DALL·E 2",
|
||||||
category="api node/image/OpenAI",
|
category="api node/image/OpenAI",
|
||||||
description=cleandoc(cls.__doc__ or ""),
|
description="Generates images synchronously via OpenAI's DALL·E 2 endpoint.",
|
||||||
inputs=[
|
inputs=[
|
||||||
IO.String.Input(
|
IO.String.Input(
|
||||||
"prompt",
|
"prompt",
|
||||||
@ -234,9 +230,6 @@ class OpenAIDalle2(IO.ComfyNode):
|
|||||||
|
|
||||||
|
|
||||||
class OpenAIDalle3(IO.ComfyNode):
|
class OpenAIDalle3(IO.ComfyNode):
|
||||||
"""
|
|
||||||
Generates images synchronously via OpenAI's DALL·E 3 endpoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def define_schema(cls):
|
def define_schema(cls):
|
||||||
@ -244,7 +237,7 @@ class OpenAIDalle3(IO.ComfyNode):
|
|||||||
node_id="OpenAIDalle3",
|
node_id="OpenAIDalle3",
|
||||||
display_name="OpenAI DALL·E 3",
|
display_name="OpenAI DALL·E 3",
|
||||||
category="api node/image/OpenAI",
|
category="api node/image/OpenAI",
|
||||||
description=cleandoc(cls.__doc__ or ""),
|
description="Generates images synchronously via OpenAI's DALL·E 3 endpoint.",
|
||||||
inputs=[
|
inputs=[
|
||||||
IO.String.Input(
|
IO.String.Input(
|
||||||
"prompt",
|
"prompt",
|
||||||
@ -326,10 +319,16 @@ class OpenAIDalle3(IO.ComfyNode):
|
|||||||
return IO.NodeOutput(await validate_and_cast_response(response))
|
return IO.NodeOutput(await validate_and_cast_response(response))
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_tokens_price_image_1(response: OpenAIImageGenerationResponse) -> float | None:
|
||||||
|
# https://platform.openai.com/docs/pricing
|
||||||
|
return ((response.usage.input_tokens * 10.0) + (response.usage.output_tokens * 40.0)) / 1_000_000.0
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_tokens_price_image_1_5(response: OpenAIImageGenerationResponse) -> float | None:
|
||||||
|
return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 32.0)) / 1_000_000.0
|
||||||
|
|
||||||
|
|
||||||
class OpenAIGPTImage1(IO.ComfyNode):
|
class OpenAIGPTImage1(IO.ComfyNode):
|
||||||
"""
|
|
||||||
Generates images synchronously via OpenAI's GPT Image 1 endpoint.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def define_schema(cls):
|
def define_schema(cls):
|
||||||
@ -337,13 +336,13 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
|||||||
node_id="OpenAIGPTImage1",
|
node_id="OpenAIGPTImage1",
|
||||||
display_name="OpenAI GPT Image 1",
|
display_name="OpenAI GPT Image 1",
|
||||||
category="api node/image/OpenAI",
|
category="api node/image/OpenAI",
|
||||||
description=cleandoc(cls.__doc__ or ""),
|
description="Generates images synchronously via OpenAI's GPT Image 1 endpoint.",
|
||||||
inputs=[
|
inputs=[
|
||||||
IO.String.Input(
|
IO.String.Input(
|
||||||
"prompt",
|
"prompt",
|
||||||
default="",
|
default="",
|
||||||
multiline=True,
|
multiline=True,
|
||||||
tooltip="Text prompt for GPT Image 1",
|
tooltip="Text prompt for GPT Image",
|
||||||
),
|
),
|
||||||
IO.Int.Input(
|
IO.Int.Input(
|
||||||
"seed",
|
"seed",
|
||||||
@ -365,8 +364,8 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
|||||||
),
|
),
|
||||||
IO.Combo.Input(
|
IO.Combo.Input(
|
||||||
"background",
|
"background",
|
||||||
default="opaque",
|
default="auto",
|
||||||
options=["opaque", "transparent"],
|
options=["auto", "opaque", "transparent"],
|
||||||
tooltip="Return image with or without background",
|
tooltip="Return image with or without background",
|
||||||
optional=True,
|
optional=True,
|
||||||
),
|
),
|
||||||
@ -397,6 +396,11 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
|||||||
tooltip="Optional mask for inpainting (white areas will be replaced)",
|
tooltip="Optional mask for inpainting (white areas will be replaced)",
|
||||||
optional=True,
|
optional=True,
|
||||||
),
|
),
|
||||||
|
IO.Combo.Input(
|
||||||
|
"model",
|
||||||
|
options=["gpt-image-1", "gpt-image-1.5"],
|
||||||
|
optional=True,
|
||||||
|
),
|
||||||
],
|
],
|
||||||
outputs=[
|
outputs=[
|
||||||
IO.Image.Output(),
|
IO.Image.Output(),
|
||||||
@ -412,32 +416,34 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
|||||||
@classmethod
|
@classmethod
|
||||||
async def execute(
|
async def execute(
|
||||||
cls,
|
cls,
|
||||||
prompt,
|
prompt: str,
|
||||||
seed=0,
|
seed: int = 0,
|
||||||
quality="low",
|
quality: str = "low",
|
||||||
background="opaque",
|
background: str = "opaque",
|
||||||
image=None,
|
image: Input.Image | None = None,
|
||||||
mask=None,
|
mask: Input.Image | None = None,
|
||||||
n=1,
|
n: int = 1,
|
||||||
size="1024x1024",
|
size: str = "1024x1024",
|
||||||
|
model: str = "gpt-image-1",
|
||||||
) -> IO.NodeOutput:
|
) -> IO.NodeOutput:
|
||||||
validate_string(prompt, strip_whitespace=False)
|
validate_string(prompt, strip_whitespace=False)
|
||||||
model = "gpt-image-1"
|
|
||||||
path = "/proxy/openai/images/generations"
|
if mask is not None and image is None:
|
||||||
content_type = "application/json"
|
raise ValueError("Cannot use a mask without an input image")
|
||||||
request_class = OpenAIImageGenerationRequest
|
|
||||||
files = []
|
if model == "gpt-image-1":
|
||||||
|
price_extractor = calculate_tokens_price_image_1
|
||||||
|
elif model == "gpt-image-1.5":
|
||||||
|
price_extractor = calculate_tokens_price_image_1_5
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown model: {model}")
|
||||||
|
|
||||||
if image is not None:
|
if image is not None:
|
||||||
path = "/proxy/openai/images/edits"
|
files = []
|
||||||
request_class = OpenAIImageEditRequest
|
|
||||||
content_type = "multipart/form-data"
|
|
||||||
|
|
||||||
batch_size = image.shape[0]
|
batch_size = image.shape[0]
|
||||||
|
|
||||||
for i in range(batch_size):
|
for i in range(batch_size):
|
||||||
single_image = image[i : i + 1]
|
single_image = image[i: i + 1]
|
||||||
scaled_image = downscale_image_tensor(single_image).squeeze()
|
scaled_image = downscale_image_tensor(single_image, total_pixels=2048*2048).squeeze()
|
||||||
|
|
||||||
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
|
image_np = (scaled_image.numpy() * 255).astype(np.uint8)
|
||||||
img = Image.fromarray(image_np)
|
img = Image.fromarray(image_np)
|
||||||
@ -450,44 +456,59 @@ class OpenAIGPTImage1(IO.ComfyNode):
|
|||||||
else:
|
else:
|
||||||
files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
|
files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png")))
|
||||||
|
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
if image is None:
|
if image.shape[0] != 1:
|
||||||
raise Exception("Cannot use a mask without an input image")
|
raise Exception("Cannot use a mask with multiple image")
|
||||||
if image.shape[0] != 1:
|
if mask.shape[1:] != image.shape[1:-1]:
|
||||||
raise Exception("Cannot use a mask with multiple image")
|
raise Exception("Mask and Image must be the same size")
|
||||||
if mask.shape[1:] != image.shape[1:-1]:
|
_, height, width = mask.shape
|
||||||
raise Exception("Mask and Image must be the same size")
|
rgba_mask = torch.zeros(height, width, 4, device="cpu")
|
||||||
batch, height, width = mask.shape
|
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
|
||||||
rgba_mask = torch.zeros(height, width, 4, device="cpu")
|
|
||||||
rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu()
|
|
||||||
|
|
||||||
scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0)).squeeze()
|
scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0), total_pixels=2048*2048).squeeze()
|
||||||
|
|
||||||
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
|
mask_np = (scaled_mask.numpy() * 255).astype(np.uint8)
|
||||||
mask_img = Image.fromarray(mask_np)
|
mask_img = Image.fromarray(mask_np)
|
||||||
mask_img_byte_arr = BytesIO()
|
mask_img_byte_arr = BytesIO()
|
||||||
mask_img.save(mask_img_byte_arr, format="PNG")
|
mask_img.save(mask_img_byte_arr, format="PNG")
|
||||||
mask_img_byte_arr.seek(0)
|
mask_img_byte_arr.seek(0)
|
||||||
files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
|
files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png")))
|
||||||
|
|
||||||
# Build the operation
|
|
||||||
response = await sync_op(
|
|
||||||
cls,
|
|
||||||
ApiEndpoint(path=path, method="POST"),
|
|
||||||
response_model=OpenAIImageGenerationResponse,
|
|
||||||
data=request_class(
|
|
||||||
model=model,
|
|
||||||
prompt=prompt,
|
|
||||||
quality=quality,
|
|
||||||
background=background,
|
|
||||||
n=n,
|
|
||||||
seed=seed,
|
|
||||||
size=size,
|
|
||||||
),
|
|
||||||
files=files if files else None,
|
|
||||||
content_type=content_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
response = await sync_op(
|
||||||
|
cls,
|
||||||
|
ApiEndpoint(path="/proxy/openai/images/edits", method="POST"),
|
||||||
|
response_model=OpenAIImageGenerationResponse,
|
||||||
|
data=OpenAIImageEditRequest(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
quality=quality,
|
||||||
|
background=background,
|
||||||
|
n=n,
|
||||||
|
seed=seed,
|
||||||
|
size=size,
|
||||||
|
moderation="low",
|
||||||
|
),
|
||||||
|
content_type="multipart/form-data",
|
||||||
|
files=files,
|
||||||
|
price_extractor=price_extractor,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await sync_op(
|
||||||
|
cls,
|
||||||
|
ApiEndpoint(path="/proxy/openai/images/generations", method="POST"),
|
||||||
|
response_model=OpenAIImageGenerationResponse,
|
||||||
|
data=OpenAIImageGenerationRequest(
|
||||||
|
model=model,
|
||||||
|
prompt=prompt,
|
||||||
|
quality=quality,
|
||||||
|
background=background,
|
||||||
|
n=n,
|
||||||
|
seed=seed,
|
||||||
|
size=size,
|
||||||
|
moderation="low",
|
||||||
|
),
|
||||||
|
price_extractor=price_extractor,
|
||||||
|
)
|
||||||
return IO.NodeOutput(await validate_and_cast_response(response))
|
return IO.NodeOutput(await validate_and_cast_response(response))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -129,7 +129,7 @@ def pil_to_bytesio(img: Image.Image, mime_type: str = "image/png") -> BytesIO:
|
|||||||
return img_byte_arr
|
return img_byte_arr
|
||||||
|
|
||||||
|
|
||||||
def downscale_image_tensor(image, total_pixels=1536 * 1024) -> torch.Tensor:
|
def downscale_image_tensor(image: torch.Tensor, total_pixels: int = 1536 * 1024) -> torch.Tensor:
|
||||||
"""Downscale input image tensor to roughly the specified total pixels."""
|
"""Downscale input image tensor to roughly the specified total pixels."""
|
||||||
samples = image.movedim(-1, 1)
|
samples = image.movedim(-1, 1)
|
||||||
total = int(total_pixels)
|
total = int(total_pixels)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user