diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index 012fae3ac..f25f1ee03 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -891,6 +891,14 @@ class Tracks(ComfyTypeIO): track_visibility: torch.Tensor Type = TrackDict +@comfytype(io_type="COMFY_DICT") +class ComfyDict(ComfyTypeIO): + Type = dict + +@comfytype(io_type="COMFY_LIST") +class ComfyList(ComfyTypeIO): + Type = list + @comfytype(io_type="COMFY_MULTITYPED_V3") class MultiType: Type = Any @@ -1326,6 +1334,32 @@ class Curve(ComfyTypeIO): return d +@comfytype(io_type="COLORS") +class Colors(ComfyTypeIO): + Type = list[Color.Type] + + class Input(WidgetInput): + def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, + socketless: bool=True, default: list[str]=None, advanced: bool=None): + super().__init__(id, display_name, optional, tooltip, None, default, socketless, None, None, None, None, advanced) + if default is None: + self.default = [] + + +@comfytype(io_type="BOUNDING_BOXES") +class BoundingBoxes(ComfyTypeIO): + class BoundingBoxWithMetadata(BoundingBox.BoundingBoxDict): + metadata: dict + Type = list[BoundingBoxWithMetadata] + + class Input(WidgetInput): + def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, + socketless: bool=True, default: list[dict]=None, advanced: bool=None): + super().__init__(id, display_name, optional, tooltip, None, default, socketless, None, None, None, None, advanced) + if default is None: + self.default = [] + + @comfytype(io_type="HISTOGRAM") class Histogram(ComfyTypeIO): """A histogram represented as a list of bin counts.""" @@ -2376,6 +2410,8 @@ __all__ = [ "AnyType", "MultiType", "Tracks", + "ComfyDict", + "ComfyList", "Color", # Dynamic Types "MatchType", @@ -2394,6 +2430,8 @@ __all__ = [ "PriceBadgeDepends", "PriceBadge", "BoundingBox", + "BoundingBoxes", + "Colors", "Curve", "Histogram", "Range", diff --git a/comfy_extras/color_util.py b/comfy_extras/color_util.py new file mode 100644 index 000000000..d50795ae3 --- /dev/null +++ b/comfy_extras/color_util.py @@ -0,0 +1,23 @@ +def hex_to_rgb(value: str) -> tuple[int, int, int]: + h = value.lstrip("#") + if len(h) != 6: + return (255, 255, 255) + try: + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + except ValueError: + return (255, 255, 255) + + +def readable_color(rgb: tuple[int, int, int]) -> tuple[int, int, int]: + r, g, b = rgb + lum = 0.299 * r + 0.587 * g + 0.114 * b + if lum >= 130: + return (r, g, b) + t = (130 - lum) / (255 - lum) + return (round(r + (255 - r) * t), round(g + (255 - g) * t), round(b + (255 - b) * t)) + + +def normalize_palette(colors) -> list[str]: + if isinstance(colors, dict): + colors = colors.values() + return [c.upper() for c in colors if isinstance(c, str) and c] diff --git a/comfy_extras/nodes_bounding_boxes.py b/comfy_extras/nodes_bounding_boxes.py new file mode 100644 index 000000000..401886aee --- /dev/null +++ b/comfy_extras/nodes_bounding_boxes.py @@ -0,0 +1,252 @@ +import numpy as np +import torch +from PIL import Image, ImageDraw, ImageEnhance, ImageFont +from typing_extensions import override + +from comfy_api.latest import ComfyExtension, io +from comfy_extras.color_util import hex_to_rgb, normalize_palette, readable_color + +_PREVIEW_LONG_EDGE = 1024 +_PREVIEW_DIM = 0.25 + + +def pixels_to_fractions(box: dict, width: int, height: int) -> dict: + w = width or 1 + h = height or 1 + return { + "x": box.get("x", 0) / w, + "y": box.get("y", 0) / h, + "w": box.get("width", 0) / w, + "h": box.get("height", 0) / h, + } + + +def fractions_to_pixels(box: dict, width: int, height: int) -> dict: + x, y = box.get("x", 0.0), box.get("y", 0.0) + w, h = box.get("w", 0.0), box.get("h", 0.0) + if w < 0: + x, w = x + w, -w + if h < 0: + y, h = y + h, -h + return { + "x": round(x * width), + "y": round(y * height), + "width": round(w * width), + "height": round(h * height), + } + + +def fractions_to_bbox_frame(boxes: list, width: int, height: int) -> list: + pixels = [ + fractions_to_pixels(box, width, height) + for box in boxes + if isinstance(box, dict) + ] + return [pixels] if pixels else [] + + +def _font(size: int): + try: + return ImageFont.load_default(size) + except Exception: + return ImageFont.load_default() + + +def _wrap(draw, text: str, font, max_w: float) -> list[str]: + lines = [] + for para in text.split("\n"): + line = "" + for word in para.split(): + test = word if not line else line + " " + word + if line and draw.textlength(test, font=font) > max_w: + lines.append(line) + line = word + else: + line = test + lines.append(line) + return lines + + +def _bg_from_image(image) -> Image.Image | None: + if image is None: + return None + try: + arr = (image[0].detach().cpu().numpy() * 255).clip(0, 255).astype(np.uint8) + return Image.fromarray(arr) + except Exception: + return None + + +def render_preview(regions, width, height, bg=None): + if bg is not None: + iw, ih = bg.size + long_edge = max(iw, ih) or 1 + scale = min(1.0, _PREVIEW_LONG_EDGE / long_edge) + rw, rh = max(1, round(iw * scale)), max(1, round(ih * scale)) + base = bg.convert("RGB").resize((rw, rh), Image.LANCZOS) + base = ImageEnhance.Brightness(base).enhance(_PREVIEW_DIM) + img = base.convert("RGBA") + else: + long_edge = max(width, height) or 1 + scale = min(1.0, _PREVIEW_LONG_EDGE / long_edge) + rw, rh = max(1, round(width * scale)), max(1, round(height * scale)) + grey = round(_PREVIEW_DIM * 128) + img = Image.new("RGBA", (rw, rh), (grey, grey, grey, 255)) + + overlay = Image.new("RGBA", (rw, rh), (0, 0, 0, 0)) + draw = ImageDraw.Draw(overlay) + fs = max(10, round(rh / 64)) + font = _font(fs) + tag_font = _font(max(9, fs - 2)) + line_h = fs + 2 + + for i, region in enumerate(regions): + if not isinstance(region, dict): + continue + palette = [c for c in (region.get("palette") or []) if c] + r, g, b = hex_to_rgb(palette[0]) if palette else (140, 140, 140) + x1 = max(0, min(rw, round(region.get("x", 0) * rw))) + y1 = max(0, min(rh, round(region.get("y", 0) * rh))) + x2 = max(0, min(rw, round((region.get("x", 0) + region.get("w", 0)) * rw))) + y2 = max(0, min(rh, round((region.get("y", 0) + region.get("h", 0)) * rh))) + if x2 < x1: + x1, x2 = x2, x1 + if y2 < y1: + y1, y2 = y2, y1 + + draw.rectangle([x1, y1, x2, y2], outline=(r, g, b, 255), width=2) + + swatches = palette[:5] + if swatches and (x2 - x1) > 2: + sh = max(5, fs // 2) + seg = (x2 - x1) / len(swatches) + for p, hexc in enumerate(swatches): + sx = x1 + round(p * seg) + draw.rectangle([sx, y1, x1 + round((p + 1) * seg), y1 + sh], fill=hex_to_rgb(hexc)) + + etype = "text" if region.get("type") == "text" else "obj" + tag = str(i + 1).zfill(2) + tw = draw.textlength(tag, font=tag_font) + draw.rectangle([x1, y1, x1 + tw + 6, y1 + fs + 2], fill=(r, g, b, 255)) + tag_fill = (0, 0, 0, 255) if (0.299 * r + 0.587 * g + 0.114 * b) > 140 else (255, 255, 255, 255) + draw.text((x1 + 3, y1 + 1), tag, fill=tag_fill, font=tag_font) + + body = region.get("desc", "") or "" + if etype == "text" and region.get("text"): + body = '"%s"%s' % (region["text"], " — " + body if body else "") + if body and (x2 - x1) > 8: + ty = y1 + fs + 5 + for line in _wrap(draw, body, font, x2 - x1 - 8): + if ty > y2: + break + draw.text((x1 + 4, ty), line, fill=readable_color((r, g, b)) + (255,), font=font) + ty += line_h + + composed = Image.alpha_composite(img, overlay).convert("RGB") + arr = np.asarray(composed, dtype=np.float32) / 255.0 + return torch.from_numpy(arr).unsqueeze(0) + + +def boxes_to_regions(boxes, width: int, height: int) -> list: + regions: list = [] + if not isinstance(boxes, list): + return regions + for box in boxes: + if not isinstance(box, dict): + continue + meta = box.get("metadata") + meta = meta if isinstance(meta, dict) else {} + regions.append({ + **pixels_to_fractions(box, width, height), + "type": meta.get("type", "obj"), + "text": meta.get("text", ""), + "desc": meta.get("desc", ""), + "palette": meta.get("palette", []), + }) + return regions + + +def _norm_bbox(region: dict) -> list[int]: + def grid(value: float) -> int: + return max(0, min(1000, round(value * 1000))) + + x, y = region.get("x", 0.0), region.get("y", 0.0) + w, h = region.get("w", 0.0), region.get("h", 0.0) + ymin, xmin, ymax, xmax = grid(y), grid(x), grid(y + h), grid(x + w) + if ymin > ymax: + ymin, ymax = ymax, ymin + if xmin > xmax: + xmin, xmax = xmax, xmin + return [ymin, xmin, ymax, xmax] + + +def build_elements(regions: list) -> list: + elements = [] + for region in regions: + if not isinstance(region, dict): + continue + etype = "text" if region.get("type") == "text" else "obj" + element = {"type": etype} + element["bbox"] = _norm_bbox(region) + if etype == "text": + element["text"] = region.get("text", "") + element["desc"] = region.get("desc", "") + palette = normalize_palette(region.get("palette", [])) + if palette: + element["color_palette"] = palette[:5] + elements.append(element) + return elements + + +class CreateBoundingBoxes(io.ComfyNode): + @classmethod + def define_schema(cls): + editor_state = io.BoundingBoxes.Input( + "editor_state", + tooltip="Draw regions and set each region's type/text/desc/palette.", + ) + return io.Schema( + node_id="CreateBoundingBoxes", + display_name="Create Bounding Boxes", + category="utilities", + description="Draw regions over a reference image. Outputs Ideogram caption elements, pixel-space bounding boxes, and a rendered preview.", + inputs=[ + io.Image.Input( + "background", + optional=True, + tooltip="Optional reference image shown behind the canvas and preview.", + ), + io.Int.Input("width", default=1024, min=64, max=16384, step=16, + tooltip="Canvas aspect width and the pixel grid for the bbox output."), + io.Int.Input("height", default=1024, min=64, max=16384, step=16, + tooltip="Canvas aspect height and the pixel grid for the bbox output."), + editor_state, + ], + outputs=[ + io.Image.Output(display_name="preview"), + io.BoundingBox.Output(display_name="bboxes"), + io.ComfyList.Output(display_name="elements"), + ], + is_experimental=True, + ) + + @classmethod + def execute(cls, width, height, editor_state=None, background=None) -> io.NodeOutput: + regions = boxes_to_regions(editor_state, width, height) + preview = render_preview(regions, width, height, _bg_from_image(background)) + return io.NodeOutput( + preview, + fractions_to_bbox_frame(regions, width, height), + build_elements(regions), + ui={"dims": [width, height]}, + ) + + +class BoundingBoxesExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [CreateBoundingBoxes] + + +async def comfy_entrypoint() -> BoundingBoxesExtension: + return BoundingBoxesExtension() diff --git a/comfy_extras/nodes_json_prompt.py b/comfy_extras/nodes_json_prompt.py new file mode 100644 index 000000000..13b507bba --- /dev/null +++ b/comfy_extras/nodes_json_prompt.py @@ -0,0 +1,76 @@ +from typing_extensions import override + +from comfy_api.latest import ComfyExtension, io +from comfy_extras.color_util import normalize_palette + + +class BuildJsonPromptIdeogram(io.ComfyNode): + @classmethod + def define_schema(cls): + color_palette = io.Colors.Input( + "color_palette", + tooltip="Style color palette.", + ) + return io.Schema( + node_id="BuildJsonPromptIdeogram", + display_name="Build JSON Prompt (Ideogram)", + category="image/ideogram", + description="Assemble the Ideogram 4 caption from Create Bounding Boxes elements plus the background and style fields.", + inputs=[ + io.ComfyList.Input("element", tooltip="Caption elements from Create Bounding Boxes."), + io.String.Input("high_level_description", multiline=True, default="", + tooltip="Optional one-line overview of the whole image (blank = omitted)."), + io.String.Input("background", multiline=True, default="", + tooltip="Scene background description."), + io.DynamicCombo.Input("style", options=[ + io.DynamicCombo.Option("none", []), + io.DynamicCombo.Option("photo", [io.String.Input("photo", default="")]), + io.DynamicCombo.Option("art_style", [io.String.Input("art_style", default="")]), + ]), + io.String.Input("aesthetics", default="", tooltip="Style descriptor. Sent even when blank once a style is chosen."), + io.String.Input("lighting", default="", tooltip="Style descriptor. Sent even when blank once a style is chosen."), + io.String.Input("medium", default="", tooltip="Style descriptor. Sent even when blank once a style is chosen."), + color_palette, + ], + outputs=[io.ComfyDict.Output(display_name="prompt")], + is_experimental=True, + ) + + @classmethod + def execute(cls, element, style, high_level_description="", background="", + aesthetics="", lighting="", medium="", color_palette=None) -> io.NodeOutput: + elements = element if isinstance(element, list) else [] + kind = style.get("style", "none") if isinstance(style, dict) else "none" + photo = style.get("photo", "") if isinstance(style, dict) else "" + art_style = style.get("art_style", "") if isinstance(style, dict) else "" + palette = normalize_palette(color_palette or []) + + caption: dict = {} + if high_level_description.strip(): + caption["high_level_description"] = high_level_description + if kind != "none": + style_desc: dict = {"aesthetics": aesthetics, "lighting": lighting} + if kind == "photo": + style_desc["photo"] = photo + style_desc["medium"] = medium + else: + style_desc["medium"] = medium + style_desc["art_style"] = art_style + if palette: + style_desc["color_palette"] = palette + caption["style_description"] = style_desc + caption["compositional_deconstruction"] = { + "background": background, + "elements": elements, + } + return io.NodeOutput(caption) + + +class JsonPromptExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [BuildJsonPromptIdeogram] + + +async def comfy_entrypoint() -> JsonPromptExtension: + return JsonPromptExtension() diff --git a/comfy_extras/nodes_string.py b/comfy_extras/nodes_string.py index 97485c8c5..dbf599263 100644 --- a/comfy_extras/nodes_string.py +++ b/comfy_extras/nodes_string.py @@ -440,6 +440,57 @@ class JsonExtractString(io.ComfyNode): except (json.JSONDecodeError, TypeError): return io.NodeOutput("") + +def _dump_json(value, indent): + return json.dumps(value, ensure_ascii=False, indent=indent or None) + + +class DictToJsonString(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="DictToJsonString", + display_name="Dict to JSON String", + category="text", + search_aliases=["json", "dict to json", "stringify", "serialize", "dict to string"], + inputs=[ + io.ComfyDict.Input("value"), + io.Int.Input("indent", default=2, min=0, max=8, + tooltip="Spaces per indent level. 0 produces compact single-line JSON."), + ], + outputs=[ + io.String.Output(), + ], + ) + + @classmethod + def execute(cls, value, indent=2): + return io.NodeOutput(_dump_json(value, indent)) + + +class ListToJsonString(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="ListToJsonString", + display_name="List to JSON String", + category="text", + search_aliases=["json", "list to json", "stringify", "serialize", "list to string", "array to json"], + inputs=[ + io.ComfyList.Input("value"), + io.Int.Input("indent", default=2, min=0, max=8, + tooltip="Spaces per indent level. 0 produces compact single-line JSON."), + ], + outputs=[ + io.String.Output(), + ], + ) + + @classmethod + def execute(cls, value, indent=2): + return io.NodeOutput(_dump_json(value, indent)) + + class StringExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[io.ComfyNode]]: @@ -457,6 +508,8 @@ class StringExtension(ComfyExtension): RegexExtract, RegexReplace, JsonExtractString, + DictToJsonString, + ListToJsonString, ] async def comfy_entrypoint() -> StringExtension: diff --git a/nodes.py b/nodes.py index 0d422d418..4f862bfd3 100644 --- a/nodes.py +++ b/nodes.py @@ -2363,6 +2363,8 @@ async def init_builtin_extra_nodes(): "nodes_images.py", "nodes_video_model.py", "nodes_ideogram4.py", + "nodes_bounding_boxes.py", + "nodes_json_prompt.py", "nodes_train.py", "nodes_dataset.py", "nodes_sag.py",