diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index 90a29c2f2..f9c70f675 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -357,13 +357,18 @@ def calculate_tokens_price_image_1_5(response: OpenAIImageGenerationResponse) -> return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 32.0)) / 1_000_000.0 +def calculate_tokens_price_image_2(response: OpenAIImageGenerationResponse) -> float | None: + # https://platform.openai.com/docs/pricing - gpt-image-2: input $8/1M, output $30/1M + return ((response.usage.input_tokens * 8.0) + (response.usage.output_tokens * 30.0)) / 1_000_000.0 + + class OpenAIGPTImage1(IO.ComfyNode): @classmethod def define_schema(cls): return IO.Schema( node_id="OpenAIGPTImage1", - display_name="OpenAI GPT Image 2", + display_name="OpenAI GPT Image 1 & 1.5", category="api node/image/OpenAI", description="Generates images synchronously via OpenAI's GPT Image endpoint.", inputs=[ @@ -427,8 +432,8 @@ class OpenAIGPTImage1(IO.ComfyNode): ), IO.Combo.Input( "model", - options=["gpt-image-1", "gpt-image-1.5", 'gpt-image-2'], - default="gpt-image-2", + options=["gpt-image-1", "gpt-image-1.5"], + default="gpt-image-1.5", optional=True, ), ], @@ -442,14 +447,22 @@ class OpenAIGPTImage1(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["quality", "n"]), + depends_on=IO.PriceBadgeDepends(widgets=["quality", "n", "model"]), expr=""" ( - $ranges := { - "low": [0.011, 0.02], - "medium": [0.046, 0.07], - "high": [0.167, 0.3] - }; + $m := widgets.model; + $ranges := + $contains($m, "gpt-image-1.5") + ? { + "low": [0.009, 0.016], + "medium": [0.037, 0.056], + "high": [0.134, 0.240] + } + : { + "low": [0.011, 0.020], + "medium": [0.046, 0.070], + "high": [0.167, 0.300] + }; $range := $lookup($ranges, widgets.quality); $n := widgets.n; ($n = 1) @@ -487,8 +500,6 @@ class OpenAIGPTImage1(IO.ComfyNode): price_extractor = calculate_tokens_price_image_1 elif model == "gpt-image-1.5": price_extractor = calculate_tokens_price_image_1_5 - elif model == "gpt-image-2": - price_extractor = calculate_tokens_price_image_1_5 else: raise ValueError(f"Unknown model: {model}") @@ -566,6 +577,261 @@ class OpenAIGPTImage1(IO.ComfyNode): return IO.NodeOutput(await validate_and_cast_response(response)) +_GPT_IMAGE_2_SIZES = [ + "auto", + "1024x1024", + "1536x1024", + "1024x1536", + "2048x2048", + "2048x1152", + "3840x2160", + "2160x3840", +] + + +def _resolve_gpt_image_2_size(size: str, custom_width: int, custom_height: int) -> str: + if custom_width <= 0 or custom_height <= 0: + return size + w, h = custom_width, custom_height + if max(w, h) > 3840: + raise ValueError(f"Maximum edge length must be ≤ 3840px, got {max(w, h)}") + if w % 16 != 0 or h % 16 != 0: + raise ValueError(f"Both edges must be multiples of 16px, got {w}x{h}") + if max(w, h) / min(w, h) > 3: + raise ValueError(f"Long-to-short edge ratio must not exceed 3:1, got {max(w, h) / min(w, h):.2f}:1") + total = w * h + if total < 655_360 or total > 8_294_400: + raise ValueError(f"Total pixels must be between 655,360 and 8,294,400, got {total:,}") + return f"{w}x{h}" + + +class OpenAIGPTImage2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="OpenAIGPTImage2", + display_name="OpenAI GPT Image 2", + category="api node/image/OpenAI", + description="Generates images synchronously via OpenAI's GPT-Image-2 endpoint.", + inputs=[ + IO.String.Input( + "prompt", + default="", + multiline=True, + tooltip="Text prompt for GPT Image 2", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2**31 - 1, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="not implemented yet in backend", + optional=True, + ), + IO.Combo.Input( + "quality", + default="auto", + options=["auto", "low", "medium", "high"], + tooltip="Image quality. 'auto' lets the model decide based on the prompt. Square images are typically fastest.", + optional=True, + ), + IO.Combo.Input( + "background", + default="auto", + options=["auto", "opaque"], + tooltip="Background style. GPT-Image-2 does not support transparent backgrounds.", + optional=True, + ), + IO.Combo.Input( + "size", + default="auto", + options=_GPT_IMAGE_2_SIZES, + tooltip="Output image dimensions. Ignored when custom_width and custom_height are both non-zero.", + optional=True, + ), + IO.Int.Input( + "custom_width", + default=0, + min=0, + max=3840, + step=16, + display_mode=IO.NumberDisplay.number, + tooltip="Custom output width in pixels. Set to 0 (default) to use the size preset. When both width and height are non-zero, they override the size preset. Slider enforces multiples of 16 and max edge 3840px. Additional constraints checked at generation: ratio ≤ 3:1, total pixels 655,360–8,294,400.", + optional=True, + ), + IO.Int.Input( + "custom_height", + default=0, + min=0, + max=3840, + step=16, + display_mode=IO.NumberDisplay.number, + tooltip="Custom output height in pixels. Set to 0 (default) to use the size preset. When both width and height are non-zero, they override the size preset. Slider enforces multiples of 16 and max edge 3840px. Additional constraints checked at generation: ratio ≤ 3:1, total pixels 655,360–8,294,400.", + optional=True, + ), + IO.Int.Input( + "num_images", + default=1, + min=1, + max=8, + step=1, + tooltip="Number of images to generate per run.", + display_mode=IO.NumberDisplay.number, + optional=True, + ), + IO.Image.Input( + "image", + tooltip="Optional reference image for image editing.", + optional=True, + ), + IO.Mask.Input( + "mask", + tooltip="Optional mask for inpainting (white areas will be replaced).", + optional=True, + ), + IO.Combo.Input( + "model", + options=["gpt-image-2"], + default="gpt-image-2", + tooltip="Model used for image generation.", + optional=True, + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["quality", "num_images"]), + expr=""" + ( + $ranges := { + "low": [0.005, 0.010], + "medium": [0.041, 0.060], + "high": [0.165, 0.250] + }; + $q := widgets.quality; + $n := widgets.num_images; + $n := ($n != null and $n != 0) ? $n : 1; + $range := $lookup($ranges, $q); + $lo := $range ? $range[0] : 0.005; + $hi := $range ? $range[1] : 0.250; + ($n = 1) + ? {"type":"range_usd","min_usd": $lo, "max_usd": $hi, "format": {"approximate": ($range ? false : true)}} + : { + "type":"range_usd", + "min_usd": $lo, + "max_usd": $hi, + "format": {"approximate": ($range ? false : true), "suffix": " x " & $string($n) & "/Run"} + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + seed: int = 0, + quality: str = "auto", + background: str = "auto", + image: Input.Image | None = None, + mask: Input.Image | None = None, + num_images: int = 1, + size: str = "auto", + custom_width: int = 0, + custom_height: int = 0, + model: str = "gpt-image-2", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False) + + if mask is not None and image is None: + raise ValueError("Cannot use a mask without an input image") + + resolved_size = _resolve_gpt_image_2_size(size, custom_width, custom_height) + + if image is not None: + files = [] + batch_size = image.shape[0] + for i in range(batch_size): + single_image = image[i : i + 1] + scaled_image = downscale_image_tensor(single_image, total_pixels=2048 * 2048).squeeze() + + image_np = (scaled_image.numpy() * 255).astype(np.uint8) + img = Image.fromarray(image_np) + img_byte_arr = BytesIO() + img.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + + if batch_size == 1: + files.append(("image", (f"image_{i}.png", img_byte_arr, "image/png"))) + else: + files.append(("image[]", (f"image_{i}.png", img_byte_arr, "image/png"))) + + if mask is not None: + if image.shape[0] != 1: + raise Exception("Cannot use a mask with multiple image") + if mask.shape[1:] != image.shape[1:-1]: + raise Exception("Mask and Image must be the same size") + _, height, width = mask.shape + rgba_mask = torch.zeros(height, width, 4, device="cpu") + rgba_mask[:, :, 3] = 1 - mask.squeeze().cpu() + + scaled_mask = downscale_image_tensor(rgba_mask.unsqueeze(0), total_pixels=2048 * 2048).squeeze() + + mask_np = (scaled_mask.numpy() * 255).astype(np.uint8) + mask_img = Image.fromarray(mask_np) + mask_img_byte_arr = BytesIO() + mask_img.save(mask_img_byte_arr, format="PNG") + mask_img_byte_arr.seek(0) + files.append(("mask", ("mask.png", mask_img_byte_arr, "image/png"))) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/openai/images/edits", method="POST"), + response_model=OpenAIImageGenerationResponse, + data=OpenAIImageEditRequest( + model=model, + prompt=prompt, + quality=quality, + background=background, + n=num_images, + size=resolved_size, + moderation="low", + ), + content_type="multipart/form-data", + files=files, + price_extractor=calculate_tokens_price_image_2, + ) + else: + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/openai/images/generations", method="POST"), + response_model=OpenAIImageGenerationResponse, + data=OpenAIImageGenerationRequest( + model=model, + prompt=prompt, + quality=quality, + background=background, + n=num_images, + size=resolved_size, + moderation="low", + ), + price_extractor=calculate_tokens_price_image_2, + ) + return IO.NodeOutput(await validate_and_cast_response(response)) + + class OpenAIChatNode(IO.ComfyNode): """ Node to generate text responses from an OpenAI model. @@ -915,6 +1181,7 @@ class OpenAIExtension(ComfyExtension): OpenAIDalle2, OpenAIDalle3, OpenAIGPTImage1, + OpenAIGPTImage2, OpenAIChatNode, OpenAIInputFiles, OpenAIChatConfig, diff --git a/tests-unit/comfy_api_test/openai_nodes_test.py b/tests-unit/comfy_api_test/openai_nodes_test.py new file mode 100644 index 000000000..3cfceeb89 --- /dev/null +++ b/tests-unit/comfy_api_test/openai_nodes_test.py @@ -0,0 +1,246 @@ +import pytest + +from comfy_api_nodes.nodes_openai import ( + OpenAIGPTImage1, + OpenAIGPTImage2, + _GPT_IMAGE_2_SIZES, + _resolve_gpt_image_2_size, + calculate_tokens_price_image_1, + calculate_tokens_price_image_1_5, + calculate_tokens_price_image_2, +) +from comfy_api_nodes.apis.openai import OpenAIImageGenerationResponse, Usage + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_response(input_tokens: int, output_tokens: int) -> OpenAIImageGenerationResponse: + return OpenAIImageGenerationResponse( + data=[], + usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens), + ) + + +# --------------------------------------------------------------------------- +# Price extractor tests +# --------------------------------------------------------------------------- + +def test_price_image_1_formula(): + response = _make_response(input_tokens=1_000_000, output_tokens=1_000_000) + assert calculate_tokens_price_image_1(response) == pytest.approx(50.0) + + +def test_price_image_1_5_formula(): + response = _make_response(input_tokens=1_000_000, output_tokens=1_000_000) + assert calculate_tokens_price_image_1_5(response) == pytest.approx(40.0) + + +def test_price_image_2_formula(): + response = _make_response(input_tokens=1_000_000, output_tokens=1_000_000) + assert calculate_tokens_price_image_2(response) == pytest.approx(38.0) + + +def test_price_image_2_cheaper_than_1(): + response = _make_response(input_tokens=500, output_tokens=196) + assert calculate_tokens_price_image_2(response) < calculate_tokens_price_image_1(response) + + +def test_price_image_2_cheaper_output_than_1_5(): + # gpt-image-2 output rate ($30/1M) is lower than gpt-image-1.5 ($32/1M) + response = _make_response(input_tokens=0, output_tokens=1_000_000) + assert calculate_tokens_price_image_2(response) < calculate_tokens_price_image_1_5(response) + + +# --------------------------------------------------------------------------- +# _resolve_gpt_image_2_size tests +# --------------------------------------------------------------------------- + +def test_resolve_preset_passthrough_when_custom_zero(): + # 0/0 means "use size preset" + assert _resolve_gpt_image_2_size("1024x1024", 0, 0) == "1024x1024" + assert _resolve_gpt_image_2_size("auto", 0, 0) == "auto" + assert _resolve_gpt_image_2_size("3840x2160", 0, 0) == "3840x2160" + + +def test_resolve_preset_passthrough_when_only_one_dim_set(): + # only one dimension set → still use preset + assert _resolve_gpt_image_2_size("auto", 1024, 0) == "auto" + assert _resolve_gpt_image_2_size("auto", 0, 1024) == "auto" + + +def test_resolve_custom_overrides_preset(): + assert _resolve_gpt_image_2_size("auto", 1024, 1024) == "1024x1024" + assert _resolve_gpt_image_2_size("1024x1024", 2048, 1152) == "2048x1152" + assert _resolve_gpt_image_2_size("auto", 3840, 2160) == "3840x2160" + + +def test_resolve_custom_rejects_edge_too_large(): + with pytest.raises(ValueError, match="3840"): + _resolve_gpt_image_2_size("auto", 4096, 1024) + + +def test_resolve_custom_rejects_non_multiple_of_16(): + with pytest.raises(ValueError, match="multiple of 16"): + _resolve_gpt_image_2_size("auto", 1025, 1024) + + +def test_resolve_custom_rejects_bad_ratio(): + with pytest.raises(ValueError, match="ratio"): + _resolve_gpt_image_2_size("auto", 3840, 1024) # 3.75:1 > 3:1 + + +def test_resolve_custom_rejects_too_few_pixels(): + with pytest.raises(ValueError, match="Total pixels"): + _resolve_gpt_image_2_size("auto", 16, 16) + + +def test_resolve_custom_rejects_too_many_pixels(): + # 3840x2176 exceeds 8,294,400 + with pytest.raises(ValueError, match="Total pixels"): + _resolve_gpt_image_2_size("auto", 3840, 2176) + + +# --------------------------------------------------------------------------- +# OpenAIGPTImage1 schema tests +# --------------------------------------------------------------------------- + +class TestOpenAIGPTImage1Schema: + def setup_method(self): + self.schema = OpenAIGPTImage1.define_schema() + + def test_node_id(self): + assert self.schema.node_id == "OpenAIGPTImage1" + + def test_display_name(self): + assert self.schema.display_name == "OpenAI GPT Image 1 & 1.5" + + def test_model_options_exclude_gpt_image_2(self): + model_input = next(i for i in self.schema.inputs if i.name == "model") + assert "gpt-image-2" not in model_input.options + + def test_model_options_include_legacy_models(self): + model_input = next(i for i in self.schema.inputs if i.name == "model") + assert "gpt-image-1" in model_input.options + assert "gpt-image-1.5" in model_input.options + + def test_has_background_with_transparent(self): + bg_input = next(i for i in self.schema.inputs if i.name == "background") + assert "transparent" in bg_input.options + + +# --------------------------------------------------------------------------- +# OpenAIGPTImage2 schema tests +# --------------------------------------------------------------------------- + +class TestOpenAIGPTImage2Schema: + def setup_method(self): + self.schema = OpenAIGPTImage2.define_schema() + + def test_node_id(self): + assert self.schema.node_id == "OpenAIGPTImage2" + + def test_display_name(self): + assert self.schema.display_name == "OpenAI GPT Image 2" + + def test_category(self): + assert "OpenAI" in self.schema.category + + def test_no_transparent_background(self): + bg_input = next(i for i in self.schema.inputs if i.name == "background") + assert "transparent" not in bg_input.options + + def test_background_options(self): + bg_input = next(i for i in self.schema.inputs if i.name == "background") + assert set(bg_input.options) == {"auto", "opaque"} + + def test_quality_options(self): + quality_input = next(i for i in self.schema.inputs if i.name == "quality") + assert set(quality_input.options) == {"auto", "low", "medium", "high"} + + def test_quality_default_is_auto(self): + quality_input = next(i for i in self.schema.inputs if i.name == "quality") + assert quality_input.default == "auto" + + def test_all_popular_sizes_present(self): + size_input = next(i for i in self.schema.inputs if i.name == "size") + for size in ["1024x1024", "1536x1024", "1024x1536", "2048x2048", "2048x1152", "3840x2160", "2160x3840"]: + assert size in size_input.options, f"Missing size: {size}" + + def test_no_custom_size_option(self): + size_input = next(i for i in self.schema.inputs if i.name == "size") + assert "custom" not in size_input.options + + def test_size_default_is_auto(self): + size_input = next(i for i in self.schema.inputs if i.name == "size") + assert size_input.default == "auto" + + def test_custom_width_and_height_inputs_exist(self): + input_names = [i.name for i in self.schema.inputs] + assert "custom_width" in input_names + assert "custom_height" in input_names + + def test_custom_width_height_default_zero(self): + width_input = next(i for i in self.schema.inputs if i.name == "custom_width") + height_input = next(i for i in self.schema.inputs if i.name == "custom_height") + assert width_input.default == 0 + assert height_input.default == 0 + + def test_custom_width_height_step_is_16(self): + width_input = next(i for i in self.schema.inputs if i.name == "custom_width") + height_input = next(i for i in self.schema.inputs if i.name == "custom_height") + assert width_input.step == 16 + assert height_input.step == 16 + + def test_custom_width_height_max_is_3840(self): + width_input = next(i for i in self.schema.inputs if i.name == "custom_width") + height_input = next(i for i in self.schema.inputs if i.name == "custom_height") + assert width_input.max == 3840 + assert height_input.max == 3840 + + def test_uses_num_images_not_n(self): + input_names = [i.name for i in self.schema.inputs] + assert "num_images" in input_names + assert "n" not in input_names + + def test_model_input_shows_gpt_image_2(self): + model_input = next(i for i in self.schema.inputs if i.name == "model") + assert model_input.options == ["gpt-image-2"] + assert model_input.default == "gpt-image-2" + + def test_has_image_and_mask_inputs(self): + input_names = [i.name for i in self.schema.inputs] + assert "image" in input_names + assert "mask" in input_names + + def test_is_api_node(self): + assert self.schema.is_api_node is True + + def test_sizes_match_constant(self): + size_input = next(i for i in self.schema.inputs if i.name == "size") + assert size_input.options == _GPT_IMAGE_2_SIZES + + +# --------------------------------------------------------------------------- +# OpenAIGPTImage2 execute validation tests +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_execute_raises_on_empty_prompt(): + with pytest.raises(Exception): + await OpenAIGPTImage2.execute(prompt=" ") + + +@pytest.mark.asyncio +async def test_execute_raises_mask_without_image(): + import torch + mask = torch.ones(1, 64, 64) + with pytest.raises(ValueError, match="mask without an input image"): + await OpenAIGPTImage2.execute(prompt="test", mask=mask) + + +@pytest.mark.asyncio +async def test_execute_raises_invalid_custom_size(): + with pytest.raises(ValueError): + await OpenAIGPTImage2.execute(prompt="test", custom_width=4096, custom_height=1024)