diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py index 1ac740d1d..42298d6bd 100644 --- a/comfy_extras/nodes_images.py +++ b/comfy_extras/nodes_images.py @@ -3,15 +3,23 @@ from __future__ import annotations import nodes import folder_paths +import av import json + import os import re import math +import numpy as np +import struct import torch + +import zlib import comfy.utils +from fractions import Fraction from server import PromptServer from comfy_api.latest import ComfyExtension, IO, UI +from comfy.cli_args import args from typing_extensions import override SVG = IO.SVG.Type # TODO: temporary solution for backward compatibility, will be removed later. @@ -830,6 +838,405 @@ class ImageMergeTileList(IO.ComfyNode): return IO.NodeOutput(merged_image) +# --------------------------------------------------------------------------- +# Format specifications +# --------------------------------------------------------------------------- + +# Maps (file_format, bit_depth, has_alpha) -> (numpy dtype scale, av pixel format, +# stream pix_fmt). Keeps the encode path declarative instead of branchy. +_FORMAT_SPECS = { + ("png", "8-bit", False): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgb24", "stream_fmt": "rgb24"}, + ("png", "8-bit", True): {"scale": 255.0, "dtype": np.uint8, "frame_fmt": "rgba", "stream_fmt": "rgba"}, + ("png", "16-bit", False): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgb48le", "stream_fmt": "rgb48be"}, + ("png", "16-bit", True): {"scale": 65535.0, "dtype": np.uint16, "frame_fmt": "rgba64le", "stream_fmt": "rgba64be"}, + ("exr", "32-bit float", False): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrpf32le", "stream_fmt": "gbrpf32le"}, + ("exr", "32-bit float", True): {"scale": 1.0, "dtype": np.float32, "frame_fmt": "gbrapf32le", "stream_fmt": "gbrapf32le"}, +} + + +# --------------------------------------------------------------------------- +# Color transforms +# --------------------------------------------------------------------------- + +def srgb_to_linear(t: torch.Tensor) -> torch.Tensor: + """Inverse sRGB EOTF (IEC 61966-2-1). Operates on RGB channels only; + alpha (if present as the 4th channel) is passed through unchanged.""" + if t.shape[-1] == 4: + rgb, alpha = t[..., :3], t[..., 3:] + return torch.cat([srgb_to_linear(rgb), alpha], dim=-1) + + # Piecewise: linear toe below 0.04045, gamma curve above. + low = t / 12.92 + high = ((t.clamp(min=0.0) + 0.055) / 1.055) ** 2.4 + return torch.where(t <= 0.04045, low, high) + + +# HLG OETF constants from BT.2100 Table 5. +_HLG_A = 0.17883277 +_HLG_B = 0.28466892 +_HLG_C = 0.55991072928 # = 0.5 - a*ln(4*a) + + +def hlg_to_linear(t: torch.Tensor) -> torch.Tensor: + """Inverse HLG OETF (BT.2100). Maps a non-linear HLG signal in [0, 1] to + *scene*-linear light in [0, 1]. Per BT.2100 Note 5a, this is the correct + transform when converting HLG to a linear scene-light representation + (rather than display-light, which would also involve the HLG OOTF). + + Operates on RGB channels only; alpha is passed through unchanged.""" + if t.shape[-1] == 4: + rgb, alpha = t[..., :3], t[..., 3:] + return torch.cat([hlg_to_linear(rgb), alpha], dim=-1) + + # Piecewise: sqrt branch below 0.5, log branch above. + # Clamp inside the log branch so negative / out-of-range values don't blow up; + # values above 1.0 are allowed and extrapolate naturally. + low = (t ** 2) / 3.0 + high = (torch.exp((t.clamp(min=_HLG_C) - _HLG_C) / _HLG_A) + _HLG_B) / 12.0 + return torch.where(t <= 0.5, low, high) + + +# --------------------------------------------------------------------------- +# Metadata injection +# --------------------------------------------------------------------------- + +_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" + + +def _png_chunk(chunk_type: bytes, data: bytes) -> bytes: + """Build a single PNG chunk: length | type | data | CRC32(type+data).""" + crc = zlib.crc32(chunk_type + data) & 0xFFFFFFFF + return struct.pack(">I", len(data)) + chunk_type + data + struct.pack(">I", crc) + + +def _png_text_chunk(keyword: str, text: str) -> bytes: + """tEXt chunk: latin-1 keyword + NUL + latin-1 text.""" + payload = keyword.encode("latin-1") + b"\x00" + text.encode("latin-1", errors="replace") + return _png_chunk(b"tEXt", payload) + + +def inject_png_metadata(png_bytes: bytes, prompt: dict | None, extra_pnginfo: dict | None) -> bytes: + """Insert ComfyUI prompt/workflow as tEXt chunks right after IHDR.""" + if not png_bytes.startswith(_PNG_SIGNATURE): + return png_bytes + + chunks: list[bytes] = [] + if prompt is not None: + chunks.append(_png_text_chunk("prompt", json.dumps(prompt))) + if extra_pnginfo: + for key, value in extra_pnginfo.items(): + chunks.append(_png_text_chunk(key, json.dumps(value))) + if not chunks: + return png_bytes + + # IHDR is always the first chunk; insert ours immediately after it. + ihdr_length = struct.unpack(">I", png_bytes[8:12])[0] + ihdr_end = 8 + 8 + ihdr_length + 4 # signature + (len+type) + data + crc + return png_bytes[:ihdr_end] + b"".join(chunks) + png_bytes[ihdr_end:] + + +# Standard chromaticities (CIE 1931 xy) for the colorspaces this node writes. +# Each tuple is (Rx, Ry, Gx, Gy, Bx, By, Wx, Wy). All share D65 white point. +_CHROMATICITIES = { + # ITU-R BT.709 / sRGB primaries + "Rec.709": (0.6400, 0.3300, 0.3000, 0.6000, 0.1500, 0.0600, 0.3127, 0.3290), + # ITU-R BT.2020 (UHDTV / wide-gamut HDR) primaries + "Rec.2020": (0.7080, 0.2920, 0.1700, 0.7970, 0.1310, 0.0460, 0.3127, 0.3290), +} + + +def _pack_chromaticities(primaries: tuple) -> bytes: + """Serialize 8 chromaticity floats into the EXR `chromaticities` payload.""" + return struct.pack("<8f", *primaries) + + +def _exr_attribute(name: str, attr_type: str, value: bytes) -> bytes: + """Serialize one EXR header attribute: name\\0 type\\0 size:int32 value.""" + return ( + name.encode("utf-8") + b"\x00" + + attr_type.encode("utf-8") + b"\x00" + + struct.pack(" bytes: + """Insert ComfyUI metadata and color-space info into an EXR header. + + Color: EXR pixels are linear by convention. The standard way to describe + their RGB→XYZ relationship is the `chromaticities` attribute. We pick the + primaries that match what the user told us their input was: + + colorspace="sRGB" → Rec. 709 / sRGB primaries (D65) + colorspace="HDR" → Rec. 2020 / BT.2100 primaries (D65) + + Pixels are always converted to linear scene light upstream (sRGB EOTF + inverse for sRGB; HLG OETF inverse for HDR), so the file content is + scene-linear in the indicated gamut. OpenEXR has no standard transfer- + function attribute (the OpenEXR TSC has discussed adding one but it + doesn't exist), so we don't invent one — `chromaticities` plus the EXR + linear-by-convention rule fully specifies the color. + + Prompt/workflow: written as plain `string` attributes using the same keys + (`prompt`, `workflow`, ...) that Comfy uses for PNG tEXt chunks, so the + same readers can pull them out symmetrically. + + Implementation note: the chunk-offset table that follows the header stores + *absolute* byte offsets into the file. Inserting N bytes into the header + means every offset must be incremented by N or the file becomes unreadable. + """ + if len(exr_bytes) < 8 or exr_bytes[:4] != b"\x76\x2f\x31\x01": + return exr_bytes + + new_blob = b"" + if prompt is not None: + new_blob += _exr_attribute("prompt", "string", json.dumps(prompt).encode("utf-8")) + if extra_pnginfo: + for key, value in extra_pnginfo.items(): + new_blob += _exr_attribute(key, "string", json.dumps(value).encode("utf-8")) + if colorspace is not None: + # Map each colorspace option to the RGB primaries the linear pixels + # are now in. "sRGB" and "linear" both produce Rec. 709 linear; "HDR" + # (HLG-encoded Rec. 2020 input) produces Rec. 2020 linear. + primaries_name = { + "sRGB": "Rec.709", + "linear": "Rec.709", + "HDR": "Rec.2020", + }.get(colorspace, "Rec.709") + new_blob += _exr_attribute( + "chromaticities", + "chromaticities", + _pack_chromaticities(_CHROMATICITIES[primaries_name]), + ) + if not new_blob: + return exr_bytes + + # Walk header attributes to find the terminating null byte, and pick up + # dataWindow + compression so we know how many chunks the offset table has. + pos = 8 # past magic (4) + version (4) + data_window = None + compression = 0 + while pos < len(exr_bytes) and exr_bytes[pos] != 0: + name_end = exr_bytes.index(b"\x00", pos) + attr_name = exr_bytes[pos:name_end].decode("latin-1", errors="replace") + type_end = exr_bytes.index(b"\x00", name_end + 1) + attr_type = exr_bytes[name_end + 1:type_end].decode("latin-1", errors="replace") + size = struct.unpack(" bytes: + """Encode a single HxWxC tensor to PNG or EXR bytes in memory. + + For EXR the input is interpreted according to `colorspace` and converted + to scene-linear (EXR's convention) before writing: + + "sRGB" → input is sRGB-encoded Rec. 709; apply inverse sRGB EOTF. + "HDR" → input is HLG-encoded Rec. 2020 (BT.2100); apply inverse HLG + OETF to get scene-linear, per BT.2100 Note 5a. + "linear" → input is already scene-linear (Rec. 709 primaries); write + through unchanged. Use this for renderer/compositor output. + + For PNG, colorspace selection does not modify pixels — PNG is delivered + sRGB-encoded and there is no PNG path for wide-gamut HDR in this node. + """ + height, width, num_channels = img_tensor.shape + has_alpha = num_channels == 4 + + spec = _FORMAT_SPECS[(file_format, bit_depth, has_alpha)] + + if spec["dtype"] == np.float32: + # EXR path: preserve full range, no clamp. + if colorspace == "sRGB": + img_tensor = srgb_to_linear(img_tensor) + elif colorspace == "HDR": + img_tensor = hlg_to_linear(img_tensor) + img_np = img_tensor.cpu().numpy().astype(np.float32) + else: + # PNG path: quantize to integer range. + scaled = (img_tensor * spec["scale"]).clamp(0, spec["scale"]) + img_np = scaled.to(torch.int32).cpu().numpy().astype(spec["dtype"]) + + # Encode directly via CodecContext. PyAV's `image2` muxer does NOT write to + # BytesIO (it expects a real file path), so we bypass the container entirely. + # For single-frame PNG/EXR the raw codec output IS the file. + codec = av.CodecContext.create(file_format, "w") + codec.width = width + codec.height = height + codec.pix_fmt = spec["stream_fmt"] + codec.time_base = Fraction(1, 1) + + frame = av.VideoFrame.from_ndarray(img_np, format=spec["frame_fmt"]) + if spec["frame_fmt"] != spec["stream_fmt"]: + frame = frame.reformat(format=spec["stream_fmt"]) + frame.pts = 0 + frame.time_base = codec.time_base + + packets = list(codec.encode(frame)) + list(codec.encode(None)) # flush with None + return b"".join(bytes(p) for p in packets) + + +# --------------------------------------------------------------------------- +# Node +# --------------------------------------------------------------------------- + +class SaveImageAdvanced(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="SaveImageAdvanced", + search_aliases=["save", "save image", "export image", "output image", "write image"], + display_name="Save Image (Advanced)", + description="Saves the input images to your ComfyUI output directory.", + category="image", + essentials_category="Basics", + inputs=[ + IO.Image.Input("images", tooltip="The images to save."), + IO.String.Input( + "filename_prefix", + default="ComfyUI", + tooltip=( + "The prefix for the file to save. May include formatting tokens " + "such as %date:yyyy-MM-dd% or %Empty Latent Image.width%." + ), + ), + IO.DynamicCombo.Input( + "image_format", + options=[ + IO.DynamicCombo.Option("png", [ + IO.Combo.Input("bit_depth", options=["8-bit", "16-bit"], + default="8-bit", advanced=True), + IO.Combo.Input("colorspace", options=["sRGB"], + default="sRGB", advanced=True), + ]), + IO.DynamicCombo.Option("exr", [ + IO.Combo.Input("bit_depth", options=["32-bit float"], + default="32-bit float", advanced=True), + IO.Combo.Input( + "colorspace", + options=["sRGB", "HDR", "linear"], + default="sRGB", + advanced=True, + tooltip=( + "Colorspace of the input tensor. The EXR is " + "always written as scene-linear in the matching " + "gamut.\n" + " 'sRGB' — input is sRGB-encoded Rec.709; " + "the inverse sRGB EOTF is applied.\n" + " 'HDR' — input is HLG-encoded Rec.2020 " + "(BT.2100); the inverse HLG OETF is applied " + "to get scene-linear light.\n" + " 'linear' — input is already scene-linear " + "(Rec.709 primaries); written through unchanged. " + "Use this for renderer/compositor output." + ), + ), + ]), + ], + tooltip="The file format in which to save the image.", + ), + ], + hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], + is_output_node=True, + ) + + @classmethod + def execute(cls, images, filename_prefix: str, image_format: dict) -> IO.NodeOutput: + file_format = image_format["image_format"] + bit_depth = image_format["bit_depth"] + colorspace = image_format.get("colorspace", "sRGB") + + output_dir = folder_paths.get_output_directory() + full_output_folder, filename, counter, subfolder, filename_prefix = ( + folder_paths.get_save_image_path( + filename_prefix, output_dir, images[0].shape[1], images[0].shape[0] + ) + ) + + prompt = cls.hidden.prompt + extra_pnginfo = cls.hidden.extra_pnginfo + write_metadata = not args.disable_metadata + + results = [] + for batch_number, image in enumerate(images): + encoded = _encode_image(image, file_format, bit_depth, colorspace) + + if write_metadata: + if file_format == "png": + encoded = inject_png_metadata(encoded, prompt, extra_pnginfo) + elif file_format == "exr": + encoded = inject_exr_metadata(encoded, prompt, extra_pnginfo, colorspace) + + name = filename.replace("%batch_num%", str(batch_number)) + file = f"{name}_{counter:05}.{file_format}" + with open(os.path.join(full_output_folder, file), "wb") as f: + f.write(encoded) + + results.append({"filename": file, "subfolder": subfolder, "type": "output"}) + counter += 1 + + return IO.NodeOutput(ui={"images": results}) + + class ImagesExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -842,6 +1249,7 @@ class ImagesExtension(ComfyExtension): ImageAddNoise, SaveAnimatedWEBP, SaveAnimatedPNG, + SaveImageAdvanced, SaveSVGNode, ImageStitch, ResizeAndPadImage,