Merge 6447250bd6 into 2806163f6e

Default control_after_generate to fixed in PrimitiveInt node (#13690 )
Refactor LoadImageMask to use LoadImage code. (#13687 )
2026-05-25 16:37:23 +08:00 · 2026-05-04 09:15:43 +07:00 · 2026-05-04 07:21:34 +08:00 · 2026-05-03 16:18:27 -04:00 · 2026-05-03 14:07:21 -04:00 · 2026-05-03 16:30:00 +03:00
10 changed files with 367 additions and 54 deletions
--- a/README.md
+++ b/README.md
@ -31,7 +31,8 @@
 [github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
 [github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases

-<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/4aab0bef-b413-4595-9766-a2c134676d27" />
+<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/36e065e0-bfae-4456-8c7f-8369d5ea48a2" />
+<br>
 </div>

 ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more...
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -91,6 +91,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"

 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
 parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
+parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.")

 class LatentPreviewMethod(enum.Enum):
    NoPreviews = "none"
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -1,6 +1,8 @@
 import torch
 import logging

+from comfy.cli_args import args
+
 try:
    import comfy_kitchen as ck
    from comfy_kitchen.tensor import (
@ -21,7 +23,15 @@ try:
            ck.registry.disable("cuda")
            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

-    ck.registry.disable("triton")
+    if args.enable_triton_backend:
+        try:
+            import triton
+            logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__)
+        except ImportError as e:
+            logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.")
+            ck.registry.disable("triton")
+    else:
+        ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
 except ImportError as e:
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -1320,6 +1320,13 @@ def detect_te_model(sd):
            return TEModel.QWEN25_3B
        if weight.shape[0] == 512:
            return TEModel.QWEN25_7B
+    # Qwen-VL checkpoints can be saved under model.language_model.* (e.g. HY-OmniWeave text encoder).
+    if 'model.language_model.layers.0.self_attn.k_proj.bias' in sd:
+        weight = sd['model.language_model.layers.0.self_attn.k_proj.bias']
+        if weight.shape[0] == 256:
+            return TEModel.QWEN25_3B
+        if weight.shape[0] == 512:
+            return TEModel.QWEN25_7B
    if "model.language_model.layers.0.linear_attn.A_log" in sd and "model.language_model.layers.0.input_layernorm.weight" in sd:
        weight = sd['model.language_model.layers.0.input_layernorm.weight']
        if weight.shape[0] == 1024:
@ -1365,7 +1372,11 @@ def t5xxl_detect(clip_data):
    return {}

 def llama_detect(clip_data):
-    weight_names = ["model.layers.0.self_attn.k_proj.weight", "model.layers.0.linear_attn.in_proj_a.weight"]
+    weight_names = [
+        "model.layers.0.self_attn.k_proj.weight",
+        "model.layers.0.linear_attn.in_proj_a.weight",
+        "model.language_model.layers.0.self_attn.k_proj.weight",
+    ]

    for sd in clip_data:
        for weight_name in weight_names:
@ -1476,7 +1487,23 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.omnigen2.te(**llama_detect(clip_data))
            clip_target.tokenizer = comfy.text_encoders.omnigen2.Omnigen2Tokenizer
        elif te_model == TEModel.QWEN25_7B:
-            if clip_type == CLIPType.HUNYUAN_IMAGE:
+            # Some Qwen2.5-VL checkpoints (including HY-OmniWeave's text encoder)
+            # are saved with "model.language_model.*" and "model.visual.*" prefixes.
+            # Normalize keys to the layout expected by Comfy text encoder wrappers.
+            for i, sd in enumerate(clip_data):
+                if "model.language_model.layers.0.self_attn.k_proj.weight" in sd:
+                    clip_data[i] = comfy.utils.state_dict_prefix_replace(
+                        sd,
+                        {
+                            "model.language_model.": "model.",
+                            "model.visual.": "visual.",
+                            "final_layer_norm.": "model.norm.",
+                        },
+                    )
+            if clip_type == CLIPType.HUNYUAN_VIDEO_15:
+                clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.hunyuan_video.HunyuanVideo15Tokenizer
+            elif clip_type == CLIPType.HUNYUAN_IMAGE:
                clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImageTokenizer
            elif clip_type == CLIPType.LONGCAT_IMAGE:
@ -1814,6 +1841,39 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
        if custom_operations is None:
            sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)

+    # HY-OmniWeave checkpoints store double-block attention as split q/k/v tensors
+    # while Comfy's HunyuanVideo implementation expects merged qkv tensors.
+    if "double_blocks.0.img_attn_q.weight" in sd and "double_blocks.0.img_attn.qkv.weight" not in sd:
+        converted_qkv = 0
+        block_indices = set()
+        for k in list(sd.keys()):
+            if not k.startswith("double_blocks."):
+                continue
+            parts = k.split(".")
+            if len(parts) < 3:
+                continue
+            if parts[2] == "img_attn_q":
+                try:
+                    block_indices.add(int(parts[1]))
+                except ValueError:
+                    pass
+
+        for idx in sorted(block_indices):
+            for attn_prefix in ("img_attn", "txt_attn"):
+                for end in ("weight", "bias"):
+                    q_key = f"double_blocks.{idx}.{attn_prefix}_q.{end}"
+                    k_key = f"double_blocks.{idx}.{attn_prefix}_k.{end}"
+                    v_key = f"double_blocks.{idx}.{attn_prefix}_v.{end}"
+                    qkv_key = f"double_blocks.{idx}.{attn_prefix}.qkv.{end}"
+                    if qkv_key in sd:
+                        continue
+                    if q_key in sd and k_key in sd and v_key in sd:
+                        sd[qkv_key] = torch.cat((sd.pop(q_key), sd.pop(k_key), sd.pop(v_key)), dim=0)
+                        converted_qkv += 1
+
+        if converted_qkv > 0:
+            logging.info(f"Converted {converted_qkv} split HunyuanVideo attention tensors to qkv format.")
+
    parameters = comfy.utils.calculate_parameters(sd)
    weight_dtype = comfy.utils.weight_dtype(sd)

--- a/comfy_extras/nodes_compositing.py
+++ b/comfy_extras/nodes_compositing.py
@ -202,14 +202,11 @@ class JoinImageWithAlpha(io.ComfyNode):

    @classmethod
    def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
-        batch_size = min(len(image), len(alpha))
-        out_images = []
-
+        batch_size = max(len(image), len(alpha))
        alpha = 1.0 - resize_mask(alpha, image.shape[1:])
-        for i in range(batch_size):
-           out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2))
-
-        return io.NodeOutput(torch.stack(out_images))
+        alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
+        image = comfy.utils.repeat_to_batch_size(image, batch_size)
+        return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))


 class CompositingExtension(ComfyExtension):
--- a/comfy_extras/nodes_hunyuan.py
+++ b/comfy_extras/nodes_hunyuan.py
@ -2,6 +2,8 @@ import nodes
 import node_helpers
 import torch
 import comfy.model_management
+import comfy.utils
+import comfy.clip_vision
 from typing_extensions import override
 from comfy_api.latest import ComfyExtension, io
 from comfy.ldm.hunyuan_video.upsampler import HunyuanVideo15SRModel
@ -301,6 +303,246 @@ class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
    encode = execute  # TODO: remove


+class TextEncodeHunyuanVideo15Omni(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="TextEncodeHunyuanVideo15Omni",
+            display_name="Text Encode HunyuanVideo 15 Omni",
+            category="advanced/conditioning",
+            inputs=[
+                io.Clip.Input("clip"),
+                io.String.Input("prompt", multiline=True, dynamic_prompts=True),
+                io.Combo.Input("task", options=["t2v", "i2v", "interpolation", "reference2v", "editing", "tiv2v"], default="t2v"),
+                io.Boolean.Input("use_visual_inputs", default=True, advanced=True),
+                io.Int.Input("max_visual_inputs", default=8, min=1, max=64, advanced=True),
+                io.ClipVisionOutput.Input("clip_vision_output", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(),
+            ],
+        )
+
+    @staticmethod
+    def _task_system_prompt(task: str) -> str:
+        prompts = {
+            "t2v": "Describe a high-quality target video from the user's request with concrete scene details, motion, camera behavior, and style.",
+            "i2v": "Describe a target video that should stay consistent with the provided reference image while following the user's request.",
+            "interpolation": "Describe a target video that smoothly transitions between the provided keyframe images while following the user's request.",
+            "reference2v": "Describe a target video that composes the provided reference subjects into a coherent scene following the user's request.",
+            "editing": "Describe an edited output video that follows the user's instruction while preserving relevant source video content.",
+            "tiv2v": "Describe an edited output video using both the provided source video and reference image guidance according to the user's instruction.",
+        }
+        return prompts.get(task, prompts["t2v"])
+
+    @classmethod
+    def _build_template(cls, task: str, image_count: int) -> str:
+        system_prompt = cls._task_system_prompt(task)
+        visual_tokens = "<|vision_start|><|image_pad|><|vision_end|>\n" * image_count
+        return (
+            "<|im_start|>system\n"
+            f"{system_prompt}"
+            "<|im_end|>\n"
+            "<|im_start|>user\n"
+            f"{visual_tokens}" + "{}<|im_end|>\n"
+            "<|im_start|>assistant\n"
+        )
+
+    @staticmethod
+    def _extract_image_embeds(clip_vision_output, max_visual_inputs: int):
+        if clip_vision_output is None:
+            return []
+        mm_projected = getattr(clip_vision_output, "mm_projected", None)
+        if mm_projected is None:
+            return []
+        if mm_projected.ndim == 2:
+            return [mm_projected]
+        count = min(mm_projected.shape[0], max_visual_inputs)
+        return [mm_projected[i] for i in range(count)]
+
+    @classmethod
+    def execute(cls, clip, prompt, task, use_visual_inputs, max_visual_inputs, clip_vision_output=None) -> io.NodeOutput:
+        image_embeds = cls._extract_image_embeds(clip_vision_output, max_visual_inputs) if use_visual_inputs else []
+        template = cls._build_template(task, len(image_embeds))
+
+        # HunyuanVideo 1.5 tokenizers use `images=...`; HunyuanVideo 1.0 uses `image_embeds=...`.
+        try:
+            tokens = clip.tokenize(prompt, llama_template=template, images=image_embeds)
+        except TypeError:
+            embeds = None
+            if len(image_embeds) > 0:
+                embeds = torch.stack(image_embeds, dim=0)
+            tokens = clip.tokenize(prompt, llama_template=template, image_embeds=embeds, image_interleave=1)
+        return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
+
+    encode = execute  # TODO: remove
+
+
+class HunyuanClipVisionOutputConcat(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanClipVisionOutputConcat",
+            display_name="Hunyuan CLIP Vision Output Concat",
+            category="conditioning/video_models",
+            inputs=[
+                io.ClipVisionOutput.Input("clip_vision_output_1"),
+                io.ClipVisionOutput.Input("clip_vision_output_2", optional=True),
+                io.ClipVisionOutput.Input("clip_vision_output_3", optional=True),
+                io.ClipVisionOutput.Input("clip_vision_output_4", optional=True),
+            ],
+            outputs=[
+                io.ClipVisionOutput.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(cls, clip_vision_output_1, clip_vision_output_2=None, clip_vision_output_3=None, clip_vision_output_4=None) -> io.NodeOutput:
+        outputs = [o for o in (clip_vision_output_1, clip_vision_output_2, clip_vision_output_3, clip_vision_output_4) if o is not None]
+        merged = comfy.clip_vision.Output()
+        tensor_attrs = ["last_hidden_state", "image_embeds", "penultimate_hidden_states", "all_hidden_states", "mm_projected"]
+        for attr in tensor_attrs:
+            values = [getattr(o, attr) for o in outputs if hasattr(o, attr)]
+            if len(values) > 0 and torch.is_tensor(values[0]):
+                setattr(merged, attr, torch.cat(values, dim=0))
+
+        image_sizes = []
+        for o in outputs:
+            if hasattr(o, "image_sizes"):
+                image_sizes.extend(getattr(o, "image_sizes"))
+        if len(image_sizes) > 0:
+            merged.image_sizes = image_sizes
+        return io.NodeOutput(merged)
+
+
+class HunyuanVideo15OmniConditioning(io.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return io.Schema(
+            node_id="HunyuanVideo15OmniConditioning",
+            display_name="HunyuanVideo 15 Omni Conditioning",
+            category="conditioning/video_models",
+            inputs=[
+                io.Conditioning.Input("positive"),
+                io.Conditioning.Input("negative"),
+                io.Vae.Input("vae"),
+                io.Combo.Input("task", options=["t2v", "i2v", "interpolation", "reference2v", "editing", "tiv2v"], default="t2v"),
+                io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
+                io.Int.Input("length", default=81, min=1, max=nodes.MAX_RESOLUTION, step=4),
+                io.Int.Input("batch_size", default=1, min=1, max=4096),
+                io.Image.Input("reference_images", optional=True, tooltip="For i2v/interpolation/reference2v/tiv2v."),
+                io.Image.Input("condition_video", optional=True, tooltip="For editing/tiv2v."),
+                io.ClipVisionOutput.Input("clip_vision_output", optional=True),
+            ],
+            outputs=[
+                io.Conditioning.Output(display_name="positive"),
+                io.Conditioning.Output(display_name="negative"),
+                io.Latent.Output(display_name="latent"),
+            ],
+        )
+
+    @staticmethod
+    def _latent_length(length: int) -> int:
+        return ((length - 1) // 4) + 1
+
+    @staticmethod
+    def _upscale_frames(frames: torch.Tensor, width: int, height: int):
+        return comfy.utils.common_upscale(frames.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
+
+    @classmethod
+    def _encode_single_image(cls, vae, image: torch.Tensor, width: int, height: int):
+        upscaled = cls._upscale_frames(image[:1], width, height)
+        return vae.encode(upscaled[:, :, :, :3])
+
+    @classmethod
+    def _encode_video(cls, vae, video: torch.Tensor, width: int, height: int, length: int):
+        upscaled = cls._upscale_frames(video[:length], width, height)
+        return vae.encode(upscaled[:, :, :, :3])
+
+    @staticmethod
+    def _assign_frame(target: torch.Tensor, source: torch.Tensor, frame_idx: int):
+        if frame_idx < 0 or frame_idx >= target.shape[2]:
+            return
+        target[:, :, frame_idx:frame_idx + 1] = source[:, :, :1]
+
+    @classmethod
+    def execute(cls, positive, negative, vae, task, width, height, length, batch_size, reference_images=None, condition_video=None, clip_vision_output=None) -> io.NodeOutput:
+        latent_length = cls._latent_length(length)
+        latent = torch.zeros([batch_size, 32, latent_length, height // 16, width // 16], device=comfy.model_management.intermediate_device())
+
+        if task == "t2v":
+            if clip_vision_output is not None:
+                positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
+                negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
+            return io.NodeOutput(positive, negative, {"samples": latent})
+
+        cond_latent = torch.zeros_like(latent[:1])
+        omni_mask = torch.zeros((latent_length,), device=cond_latent.device, dtype=cond_latent.dtype)
+
+        if task == "i2v":
+            if reference_images is None or reference_images.shape[0] < 1:
+                raise ValueError("Task i2v requires at least one reference image.")
+            encoded = cls._encode_single_image(vae, reference_images, width, height)
+            cls._assign_frame(cond_latent, encoded, 0)
+            omni_mask[0] = 1.0
+
+        elif task == "interpolation":
+            if reference_images is None or reference_images.shape[0] < 2:
+                raise ValueError("Task interpolation requires at least two reference images.")
+            encoded_first = cls._encode_single_image(vae, reference_images[:1], width, height)
+            encoded_last = cls._encode_single_image(vae, reference_images[-1:], width, height)
+            cls._assign_frame(cond_latent, encoded_first, 0)
+            cls._assign_frame(cond_latent, encoded_last, latent_length - 1)
+            omni_mask[0] = 1.0
+            omni_mask[-1] = 1.0
+
+        elif task == "reference2v":
+            if reference_images is None or reference_images.shape[0] < 1:
+                raise ValueError("Task reference2v requires at least one reference image.")
+            num_refs = min(reference_images.shape[0], max(1, latent_length - 1))
+            for idx in range(num_refs):
+                encoded = cls._encode_single_image(vae, reference_images[idx:idx + 1], width, height)
+                frame_idx = min(idx + 1, latent_length - 1)
+                cls._assign_frame(cond_latent, encoded, frame_idx)
+                omni_mask[frame_idx] = 1.0
+
+        elif task == "editing":
+            if condition_video is None or condition_video.shape[0] < 1:
+                raise ValueError("Task editing requires condition_video.")
+            encoded = cls._encode_video(vae, condition_video, width, height, length)
+            valid_frames = min(latent_length, encoded.shape[2])
+            cond_latent[:, :, :valid_frames] = encoded[:, :, :valid_frames]
+            omni_mask[:valid_frames] = 1.0
+
+        elif task == "tiv2v":
+            if condition_video is None or condition_video.shape[0] < 1:
+                raise ValueError("Task tiv2v requires condition_video.")
+            if reference_images is None or reference_images.shape[0] < 1:
+                raise ValueError("Task tiv2v requires at least one reference image.")
+            encoded_video = cls._encode_video(vae, condition_video, width, height, length)
+            valid_frames = min(latent_length, encoded_video.shape[2])
+            cond_latent[:, :, :valid_frames] = encoded_video[:, :, :valid_frames]
+            omni_mask[:valid_frames] = 1.0
+
+            encoded_ref = cls._encode_single_image(vae, reference_images[:1], width, height)
+            ref_idx = 1 if latent_length > 1 else 0
+            cond_latent[:, :, ref_idx:ref_idx + 1] += encoded_ref[:, :, :1]
+            omni_mask[ref_idx] += 1.0
+
+        cond_latent = comfy.utils.resize_to_batch_size(cond_latent, batch_size)
+        # BaseModel/HunyuanVideo15 inverts concat_mask (mask = 1 - concat_mask), so pass the pre-inverted mask.
+        concat_mask = (1.0 - omni_mask).view(1, 1, latent_length, 1, 1).expand(cond_latent.shape[0], 1, latent_length, cond_latent.shape[-2], cond_latent.shape[-1]).to(cond_latent.dtype)
+
+        positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": cond_latent, "concat_mask": concat_mask})
+        negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": cond_latent, "concat_mask": concat_mask})
+        if clip_vision_output is not None:
+            positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
+            negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
+
+        return io.NodeOutput(positive, negative, {"samples": latent})
+
+
 class HunyuanImageToVideo(io.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -411,9 +653,12 @@ class HunyuanExtension(ComfyExtension):
        return [
            CLIPTextEncodeHunyuanDiT,
            TextEncodeHunyuanVideo_ImageToVideo,
+            TextEncodeHunyuanVideo15Omni,
+            HunyuanClipVisionOutputConcat,
            EmptyHunyuanLatentVideo,
            EmptyHunyuanVideo15Latent,
            HunyuanVideo15ImageToVideo,
+            HunyuanVideo15OmniConditioning,
            HunyuanVideo15SuperResolution,
            HunyuanVideo15LatentUpscaleWithModel,
            LatentUpscaleModelLoader,
--- a/comfy_extras/nodes_primitive.py
+++ b/comfy_extras/nodes_primitive.py
@ -49,7 +49,7 @@ class Int(io.ComfyNode):
            display_name="Int",
            category="utils/primitive",
            inputs=[
-                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=True),
+                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
            ],
            outputs=[io.Int.Output()],
        )
--- a/node_helpers.py
+++ b/node_helpers.py
@ -86,6 +86,6 @@ def image_alpha_fix(destination, source):
    if destination.shape[-1] < source.shape[-1]:
        source = source[...,:destination.shape[-1]]
    elif destination.shape[-1] > source.shape[-1]:
-        destination = torch.nn.functional.pad(destination, (0, 1))
-        destination[..., -1] = 1.0
+        source = torch.nn.functional.pad(source, (0, 1))
+        source[..., -1] = 1.0
    return destination, source
--- a/nodes.py
+++ b/nodes.py
@ -958,7 +958,7 @@ class CLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image"], ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "hunyuan_video_15", "flux2", "ovis", "longcat_image"], ),
                              },
                "optional": {
                              "device": (["default", "cpu"], {"advanced": True}),
@ -968,7 +968,7 @@ class CLIPLoader:

    CATEGORY = "advanced/loaders"

-    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B"
+    DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\nhidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nhunyuan_video_15: qwen2.5-vl (single-file fallback without byT5)"

    def load_clip(self, clip_name, type="stable_diffusion", device="default"):
        clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@ -1754,57 +1754,49 @@ class LoadImage:

        return True

-class LoadImageMask:
+
+class LoadImageMask(LoadImage):
    ESSENTIALS_CATEGORY = "Image Tools"
    SEARCH_ALIASES = ["import mask", "alpha mask", "channel mask"]

    _color_channels = ["alpha", "red", "green", "blue"]
+
    @classmethod
    def INPUT_TYPES(s):
-        input_dir = folder_paths.get_input_directory()
-        files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
-        return {"required":
-                    {"image": (sorted(files), {"image_upload": True}),
-                     "channel": (s._color_channels, ), }
-                }
+        types = super().INPUT_TYPES()
+        return {
+            "required": {
+                **types["required"],
+                "channel": (s._color_channels, )
+            }
+        }

    CATEGORY = "mask"
-
    RETURN_TYPES = ("MASK",)
-    FUNCTION = "load_image"
-    def load_image(self, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        i = node_helpers.pillow(Image.open, image_path)
-        i = node_helpers.pillow(ImageOps.exif_transpose, i)
-        if i.getbands() != ("R", "G", "B", "A"):
-            if i.mode == 'I':
-                i = i.point(lambda i: i * (1 / 255))
-            i = i.convert("RGBA")
-        mask = None
+    FUNCTION = "load_image_mask"
+
+    def load_image_mask(self, image, channel):
+        image_tensor, mask_tensor = super().load_image(image)
        c = channel[0].upper()
-        if c in i.getbands():
-            mask = np.array(i.getchannel(c)).astype(np.float32) / 255.0
-            mask = torch.from_numpy(mask)
-            if c == 'A':
-                mask = 1. - mask
+
+        if c == 'A':
+            return (mask_tensor,)
+
+        channel_idx = {'R': 0, 'G': 1, 'B': 2}.get(c, 0)
+
+        if channel_idx < image_tensor.shape[-1]:
+            return (image_tensor[..., channel_idx].clone(),)
        else:
-            mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
-        return (mask.unsqueeze(0),)
+            empty_mask = torch.zeros(
+                image_tensor.shape[:-1],
+                dtype=image_tensor.dtype,
+                device=image_tensor.device
+            )
+            return (empty_mask,)

    @classmethod
    def IS_CHANGED(s, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        m = hashlib.sha256()
-        with open(image_path, 'rb') as f:
-            m.update(f.read())
-        return m.digest().hex()
-
-    @classmethod
-    def VALIDATE_INPUTS(s, image):
-        if not folder_paths.exists_annotated_filepath(image):
-            return "Invalid image file: {}".format(image)
-
-        return True
+        return super().IS_CHANGED(image)


 class LoadImageOutput(LoadImage):
--- a/server.py
+++ b/server.py
@ -1,3 +1,4 @@
+import errno
 import os
 import sys
 import asyncio
@ -1245,7 +1246,13 @@ class PromptServer():
            address = addr[0]
            port = addr[1]
            site = web.TCPSite(runner, address, port, ssl_context=ssl_ctx)
-            await site.start()
+            try:
+                await site.start()
+            except OSError as e:
+                if e.errno == errno.EADDRINUSE:
+                    logging.error(f"Port {port} is already in use on address {address}. Please close the other application or use a different port with --port.")
+                    raise SystemExit(1)
+                raise

            if not hasattr(self, 'address'):
                self.address = address #TODO: remove this
Author	SHA1	Message	Date
ifilipis	5cf7f1c846	Merge `6447250bd6` into `2806163f6e`	2026-05-04 09:15:43 +07:00
Jedrzej Kosinski	2806163f6e	Default control_after_generate to fixed in PrimitiveInt node (#13690 )	2026-05-04 07:21:34 +08:00
comfyanonymous	cea8d0925f	Refactor LoadImageMask to use LoadImage code. (#13687 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-03 16:18:27 -04:00
Silver	b138133ffa	Enable triton comfy kitchen via cli-arg (#12730 )	2026-05-03 14:07:21 -04:00
Jukka Seppänen	025e6792ee	Batch broadcasting in JoinImageWithAlpha node (#13686 ) Some checks failed Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Generate Pydantic Stubs from api.comfy.org / generate-models (push) Has been cancelled Details * Batch broadcasting in JoinImageWithAlpha node	2026-05-03 16:30:00 +03:00
Luke Mino-Altherr	867b8d2408	fix: gracefully handle port-in-use error on server startup (#13001 ) Catch EADDRINUSE OSError when binding the TCP site and exit with a clear error message instead of an unhandled traceback.	2026-05-03 20:44:20 +08:00
Alexis Rolland	d0f0b15cf5	Update ComfyUI screenshot in README (#13683 ) Update ComfyUI screenshot to showcase a more modern workflow	2026-05-03 18:48:58 +08:00
Alexis Rolland	b5bb83c964	Fix issue blend images with alpha (#13615 ) Make ImageBlend and ImageCompositeMasked nodes handle images with different channel counts	2026-05-03 18:17:08 +08:00
Codex	6447250bd6	Add HY-OmniWeave support for HunyuanVideo 1.5	2026-04-04 22:03:24 +00:00