Compare commits

...

9 Commits

Author SHA1 Message Date
ifilipis
5cf7f1c846
Merge 6447250bd6 into 2806163f6e 2026-05-04 09:15:43 +07:00
Jedrzej Kosinski
2806163f6e
Default control_after_generate to fixed in PrimitiveInt node (#13690) 2026-05-04 07:21:34 +08:00
comfyanonymous
cea8d0925f
Refactor LoadImageMask to use LoadImage code. (#13687)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
2026-05-03 16:18:27 -04:00
Silver
b138133ffa
Enable triton comfy kitchen via cli-arg (#12730) 2026-05-03 14:07:21 -04:00
Jukka Seppänen
025e6792ee
Batch broadcasting in JoinImageWithAlpha node (#13686)
Some checks failed
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run
Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run
Execution Tests / test (macos-latest) (push) Waiting to run
Execution Tests / test (ubuntu-latest) (push) Waiting to run
Execution Tests / test (windows-latest) (push) Waiting to run
Test server launches without errors / test (push) Waiting to run
Unit Tests / test (macos-latest) (push) Waiting to run
Unit Tests / test (ubuntu-latest) (push) Waiting to run
Unit Tests / test (windows-2022) (push) Waiting to run
Generate Pydantic Stubs from api.comfy.org / generate-models (push) Has been cancelled
* Batch broadcasting in JoinImageWithAlpha node
2026-05-03 16:30:00 +03:00
Luke Mino-Altherr
867b8d2408
fix: gracefully handle port-in-use error on server startup (#13001)
Catch EADDRINUSE OSError when binding the TCP site and exit with a clear error message instead of an unhandled traceback.
2026-05-03 20:44:20 +08:00
Alexis Rolland
d0f0b15cf5
Update ComfyUI screenshot in README (#13683)
Update ComfyUI screenshot to showcase a more modern workflow
2026-05-03 18:48:58 +08:00
Alexis Rolland
b5bb83c964
Fix issue blend images with alpha (#13615)
Make ImageBlend and ImageCompositeMasked nodes handle images with different channel counts
2026-05-03 18:17:08 +08:00
Codex
6447250bd6 Add HY-OmniWeave support for HunyuanVideo 1.5 2026-04-04 22:03:24 +00:00
10 changed files with 367 additions and 54 deletions

View File

@ -31,7 +31,8 @@
[github-downloads-latest-shield]: https://img.shields.io/github/downloads/comfyanonymous/ComfyUI/latest/total?style=flat&label=downloads%40latest
[github-downloads-link]: https://github.com/comfyanonymous/ComfyUI/releases
<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/4aab0bef-b413-4595-9766-a2c134676d27" />
<img width="1590" height="795" alt="ComfyUI Screenshot" src="https://github.com/user-attachments/assets/36e065e0-bfae-4456-8c7f-8369d5ea48a2" />
<br>
</div>
ComfyUI is the AI creation engine for visual professionals who demand control over every model, every parameter, and every output. Its powerful and modular node graph interface empowers creatives to generate images, videos, 3D models, audio, and more...

View File

@ -91,6 +91,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"
parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.")
class LatentPreviewMethod(enum.Enum):
NoPreviews = "none"

View File

@ -1,6 +1,8 @@
import torch
import logging
from comfy.cli_args import args
try:
import comfy_kitchen as ck
from comfy_kitchen.tensor import (
@ -21,7 +23,15 @@ try:
ck.registry.disable("cuda")
logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")
ck.registry.disable("triton")
if args.enable_triton_backend:
try:
import triton
logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__)
except ImportError as e:
logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.")
ck.registry.disable("triton")
else:
ck.registry.disable("triton")
for k, v in ck.list_backends().items():
logging.info(f"Found comfy_kitchen backend {k}: {v}")
except ImportError as e:

View File

@ -1320,6 +1320,13 @@ def detect_te_model(sd):
return TEModel.QWEN25_3B
if weight.shape[0] == 512:
return TEModel.QWEN25_7B
# Qwen-VL checkpoints can be saved under model.language_model.* (e.g. HY-OmniWeave text encoder).
if 'model.language_model.layers.0.self_attn.k_proj.bias' in sd:
weight = sd['model.language_model.layers.0.self_attn.k_proj.bias']
if weight.shape[0] == 256:
return TEModel.QWEN25_3B
if weight.shape[0] == 512:
return TEModel.QWEN25_7B
if "model.language_model.layers.0.linear_attn.A_log" in sd and "model.language_model.layers.0.input_layernorm.weight" in sd:
weight = sd['model.language_model.layers.0.input_layernorm.weight']
if weight.shape[0] == 1024:
@ -1365,7 +1372,11 @@ def t5xxl_detect(clip_data):
return {}
def llama_detect(clip_data):
weight_names = ["model.layers.0.self_attn.k_proj.weight", "model.layers.0.linear_attn.in_proj_a.weight"]
weight_names = [
"model.layers.0.self_attn.k_proj.weight",
"model.layers.0.linear_attn.in_proj_a.weight",
"model.language_model.layers.0.self_attn.k_proj.weight",
]
for sd in clip_data:
for weight_name in weight_names:
@ -1476,7 +1487,23 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
clip_target.clip = comfy.text_encoders.omnigen2.te(**llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.omnigen2.Omnigen2Tokenizer
elif te_model == TEModel.QWEN25_7B:
if clip_type == CLIPType.HUNYUAN_IMAGE:
# Some Qwen2.5-VL checkpoints (including HY-OmniWeave's text encoder)
# are saved with "model.language_model.*" and "model.visual.*" prefixes.
# Normalize keys to the layout expected by Comfy text encoder wrappers.
for i, sd in enumerate(clip_data):
if "model.language_model.layers.0.self_attn.k_proj.weight" in sd:
clip_data[i] = comfy.utils.state_dict_prefix_replace(
sd,
{
"model.language_model.": "model.",
"model.visual.": "visual.",
"final_layer_norm.": "model.norm.",
},
)
if clip_type == CLIPType.HUNYUAN_VIDEO_15:
clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.hunyuan_video.HunyuanVideo15Tokenizer
elif clip_type == CLIPType.HUNYUAN_IMAGE:
clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
clip_target.tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImageTokenizer
elif clip_type == CLIPType.LONGCAT_IMAGE:
@ -1814,6 +1841,39 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
if custom_operations is None:
sd, metadata = comfy.utils.convert_old_quants(sd, "", metadata=metadata)
# HY-OmniWeave checkpoints store double-block attention as split q/k/v tensors
# while Comfy's HunyuanVideo implementation expects merged qkv tensors.
if "double_blocks.0.img_attn_q.weight" in sd and "double_blocks.0.img_attn.qkv.weight" not in sd:
converted_qkv = 0
block_indices = set()
for k in list(sd.keys()):
if not k.startswith("double_blocks."):
continue
parts = k.split(".")
if len(parts) < 3:
continue
if parts[2] == "img_attn_q":
try:
block_indices.add(int(parts[1]))
except ValueError:
pass
for idx in sorted(block_indices):
for attn_prefix in ("img_attn", "txt_attn"):
for end in ("weight", "bias"):
q_key = f"double_blocks.{idx}.{attn_prefix}_q.{end}"
k_key = f"double_blocks.{idx}.{attn_prefix}_k.{end}"
v_key = f"double_blocks.{idx}.{attn_prefix}_v.{end}"
qkv_key = f"double_blocks.{idx}.{attn_prefix}.qkv.{end}"
if qkv_key in sd:
continue
if q_key in sd and k_key in sd and v_key in sd:
sd[qkv_key] = torch.cat((sd.pop(q_key), sd.pop(k_key), sd.pop(v_key)), dim=0)
converted_qkv += 1
if converted_qkv > 0:
logging.info(f"Converted {converted_qkv} split HunyuanVideo attention tensors to qkv format.")
parameters = comfy.utils.calculate_parameters(sd)
weight_dtype = comfy.utils.weight_dtype(sd)

View File

@ -202,14 +202,11 @@ class JoinImageWithAlpha(io.ComfyNode):
@classmethod
def execute(cls, image: torch.Tensor, alpha: torch.Tensor) -> io.NodeOutput:
batch_size = min(len(image), len(alpha))
out_images = []
batch_size = max(len(image), len(alpha))
alpha = 1.0 - resize_mask(alpha, image.shape[1:])
for i in range(batch_size):
out_images.append(torch.cat((image[i][:,:,:3], alpha[i].unsqueeze(2)), dim=2))
return io.NodeOutput(torch.stack(out_images))
alpha = comfy.utils.repeat_to_batch_size(alpha, batch_size)
image = comfy.utils.repeat_to_batch_size(image, batch_size)
return io.NodeOutput(torch.cat((image[..., :3], alpha.unsqueeze(-1)), dim=-1))
class CompositingExtension(ComfyExtension):

View File

@ -2,6 +2,8 @@ import nodes
import node_helpers
import torch
import comfy.model_management
import comfy.utils
import comfy.clip_vision
from typing_extensions import override
from comfy_api.latest import ComfyExtension, io
from comfy.ldm.hunyuan_video.upsampler import HunyuanVideo15SRModel
@ -301,6 +303,246 @@ class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode):
encode = execute # TODO: remove
class TextEncodeHunyuanVideo15Omni(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="TextEncodeHunyuanVideo15Omni",
display_name="Text Encode HunyuanVideo 15 Omni",
category="advanced/conditioning",
inputs=[
io.Clip.Input("clip"),
io.String.Input("prompt", multiline=True, dynamic_prompts=True),
io.Combo.Input("task", options=["t2v", "i2v", "interpolation", "reference2v", "editing", "tiv2v"], default="t2v"),
io.Boolean.Input("use_visual_inputs", default=True, advanced=True),
io.Int.Input("max_visual_inputs", default=8, min=1, max=64, advanced=True),
io.ClipVisionOutput.Input("clip_vision_output", optional=True),
],
outputs=[
io.Conditioning.Output(),
],
)
@staticmethod
def _task_system_prompt(task: str) -> str:
prompts = {
"t2v": "Describe a high-quality target video from the user's request with concrete scene details, motion, camera behavior, and style.",
"i2v": "Describe a target video that should stay consistent with the provided reference image while following the user's request.",
"interpolation": "Describe a target video that smoothly transitions between the provided keyframe images while following the user's request.",
"reference2v": "Describe a target video that composes the provided reference subjects into a coherent scene following the user's request.",
"editing": "Describe an edited output video that follows the user's instruction while preserving relevant source video content.",
"tiv2v": "Describe an edited output video using both the provided source video and reference image guidance according to the user's instruction.",
}
return prompts.get(task, prompts["t2v"])
@classmethod
def _build_template(cls, task: str, image_count: int) -> str:
system_prompt = cls._task_system_prompt(task)
visual_tokens = "<|vision_start|><|image_pad|><|vision_end|>\n" * image_count
return (
"<|im_start|>system\n"
f"{system_prompt}"
"<|im_end|>\n"
"<|im_start|>user\n"
f"{visual_tokens}" + "{}<|im_end|>\n"
"<|im_start|>assistant\n"
)
@staticmethod
def _extract_image_embeds(clip_vision_output, max_visual_inputs: int):
if clip_vision_output is None:
return []
mm_projected = getattr(clip_vision_output, "mm_projected", None)
if mm_projected is None:
return []
if mm_projected.ndim == 2:
return [mm_projected]
count = min(mm_projected.shape[0], max_visual_inputs)
return [mm_projected[i] for i in range(count)]
@classmethod
def execute(cls, clip, prompt, task, use_visual_inputs, max_visual_inputs, clip_vision_output=None) -> io.NodeOutput:
image_embeds = cls._extract_image_embeds(clip_vision_output, max_visual_inputs) if use_visual_inputs else []
template = cls._build_template(task, len(image_embeds))
# HunyuanVideo 1.5 tokenizers use `images=...`; HunyuanVideo 1.0 uses `image_embeds=...`.
try:
tokens = clip.tokenize(prompt, llama_template=template, images=image_embeds)
except TypeError:
embeds = None
if len(image_embeds) > 0:
embeds = torch.stack(image_embeds, dim=0)
tokens = clip.tokenize(prompt, llama_template=template, image_embeds=embeds, image_interleave=1)
return io.NodeOutput(clip.encode_from_tokens_scheduled(tokens))
encode = execute # TODO: remove
class HunyuanClipVisionOutputConcat(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="HunyuanClipVisionOutputConcat",
display_name="Hunyuan CLIP Vision Output Concat",
category="conditioning/video_models",
inputs=[
io.ClipVisionOutput.Input("clip_vision_output_1"),
io.ClipVisionOutput.Input("clip_vision_output_2", optional=True),
io.ClipVisionOutput.Input("clip_vision_output_3", optional=True),
io.ClipVisionOutput.Input("clip_vision_output_4", optional=True),
],
outputs=[
io.ClipVisionOutput.Output(),
],
)
@classmethod
def execute(cls, clip_vision_output_1, clip_vision_output_2=None, clip_vision_output_3=None, clip_vision_output_4=None) -> io.NodeOutput:
outputs = [o for o in (clip_vision_output_1, clip_vision_output_2, clip_vision_output_3, clip_vision_output_4) if o is not None]
merged = comfy.clip_vision.Output()
tensor_attrs = ["last_hidden_state", "image_embeds", "penultimate_hidden_states", "all_hidden_states", "mm_projected"]
for attr in tensor_attrs:
values = [getattr(o, attr) for o in outputs if hasattr(o, attr)]
if len(values) > 0 and torch.is_tensor(values[0]):
setattr(merged, attr, torch.cat(values, dim=0))
image_sizes = []
for o in outputs:
if hasattr(o, "image_sizes"):
image_sizes.extend(getattr(o, "image_sizes"))
if len(image_sizes) > 0:
merged.image_sizes = image_sizes
return io.NodeOutput(merged)
class HunyuanVideo15OmniConditioning(io.ComfyNode):
@classmethod
def define_schema(cls):
return io.Schema(
node_id="HunyuanVideo15OmniConditioning",
display_name="HunyuanVideo 15 Omni Conditioning",
category="conditioning/video_models",
inputs=[
io.Conditioning.Input("positive"),
io.Conditioning.Input("negative"),
io.Vae.Input("vae"),
io.Combo.Input("task", options=["t2v", "i2v", "interpolation", "reference2v", "editing", "tiv2v"], default="t2v"),
io.Int.Input("width", default=848, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16),
io.Int.Input("length", default=81, min=1, max=nodes.MAX_RESOLUTION, step=4),
io.Int.Input("batch_size", default=1, min=1, max=4096),
io.Image.Input("reference_images", optional=True, tooltip="For i2v/interpolation/reference2v/tiv2v."),
io.Image.Input("condition_video", optional=True, tooltip="For editing/tiv2v."),
io.ClipVisionOutput.Input("clip_vision_output", optional=True),
],
outputs=[
io.Conditioning.Output(display_name="positive"),
io.Conditioning.Output(display_name="negative"),
io.Latent.Output(display_name="latent"),
],
)
@staticmethod
def _latent_length(length: int) -> int:
return ((length - 1) // 4) + 1
@staticmethod
def _upscale_frames(frames: torch.Tensor, width: int, height: int):
return comfy.utils.common_upscale(frames.movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1)
@classmethod
def _encode_single_image(cls, vae, image: torch.Tensor, width: int, height: int):
upscaled = cls._upscale_frames(image[:1], width, height)
return vae.encode(upscaled[:, :, :, :3])
@classmethod
def _encode_video(cls, vae, video: torch.Tensor, width: int, height: int, length: int):
upscaled = cls._upscale_frames(video[:length], width, height)
return vae.encode(upscaled[:, :, :, :3])
@staticmethod
def _assign_frame(target: torch.Tensor, source: torch.Tensor, frame_idx: int):
if frame_idx < 0 or frame_idx >= target.shape[2]:
return
target[:, :, frame_idx:frame_idx + 1] = source[:, :, :1]
@classmethod
def execute(cls, positive, negative, vae, task, width, height, length, batch_size, reference_images=None, condition_video=None, clip_vision_output=None) -> io.NodeOutput:
latent_length = cls._latent_length(length)
latent = torch.zeros([batch_size, 32, latent_length, height // 16, width // 16], device=comfy.model_management.intermediate_device())
if task == "t2v":
if clip_vision_output is not None:
positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
return io.NodeOutput(positive, negative, {"samples": latent})
cond_latent = torch.zeros_like(latent[:1])
omni_mask = torch.zeros((latent_length,), device=cond_latent.device, dtype=cond_latent.dtype)
if task == "i2v":
if reference_images is None or reference_images.shape[0] < 1:
raise ValueError("Task i2v requires at least one reference image.")
encoded = cls._encode_single_image(vae, reference_images, width, height)
cls._assign_frame(cond_latent, encoded, 0)
omni_mask[0] = 1.0
elif task == "interpolation":
if reference_images is None or reference_images.shape[0] < 2:
raise ValueError("Task interpolation requires at least two reference images.")
encoded_first = cls._encode_single_image(vae, reference_images[:1], width, height)
encoded_last = cls._encode_single_image(vae, reference_images[-1:], width, height)
cls._assign_frame(cond_latent, encoded_first, 0)
cls._assign_frame(cond_latent, encoded_last, latent_length - 1)
omni_mask[0] = 1.0
omni_mask[-1] = 1.0
elif task == "reference2v":
if reference_images is None or reference_images.shape[0] < 1:
raise ValueError("Task reference2v requires at least one reference image.")
num_refs = min(reference_images.shape[0], max(1, latent_length - 1))
for idx in range(num_refs):
encoded = cls._encode_single_image(vae, reference_images[idx:idx + 1], width, height)
frame_idx = min(idx + 1, latent_length - 1)
cls._assign_frame(cond_latent, encoded, frame_idx)
omni_mask[frame_idx] = 1.0
elif task == "editing":
if condition_video is None or condition_video.shape[0] < 1:
raise ValueError("Task editing requires condition_video.")
encoded = cls._encode_video(vae, condition_video, width, height, length)
valid_frames = min(latent_length, encoded.shape[2])
cond_latent[:, :, :valid_frames] = encoded[:, :, :valid_frames]
omni_mask[:valid_frames] = 1.0
elif task == "tiv2v":
if condition_video is None or condition_video.shape[0] < 1:
raise ValueError("Task tiv2v requires condition_video.")
if reference_images is None or reference_images.shape[0] < 1:
raise ValueError("Task tiv2v requires at least one reference image.")
encoded_video = cls._encode_video(vae, condition_video, width, height, length)
valid_frames = min(latent_length, encoded_video.shape[2])
cond_latent[:, :, :valid_frames] = encoded_video[:, :, :valid_frames]
omni_mask[:valid_frames] = 1.0
encoded_ref = cls._encode_single_image(vae, reference_images[:1], width, height)
ref_idx = 1 if latent_length > 1 else 0
cond_latent[:, :, ref_idx:ref_idx + 1] += encoded_ref[:, :, :1]
omni_mask[ref_idx] += 1.0
cond_latent = comfy.utils.resize_to_batch_size(cond_latent, batch_size)
# BaseModel/HunyuanVideo15 inverts concat_mask (mask = 1 - concat_mask), so pass the pre-inverted mask.
concat_mask = (1.0 - omni_mask).view(1, 1, latent_length, 1, 1).expand(cond_latent.shape[0], 1, latent_length, cond_latent.shape[-2], cond_latent.shape[-1]).to(cond_latent.dtype)
positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": cond_latent, "concat_mask": concat_mask})
negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": cond_latent, "concat_mask": concat_mask})
if clip_vision_output is not None:
positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output})
negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output})
return io.NodeOutput(positive, negative, {"samples": latent})
class HunyuanImageToVideo(io.ComfyNode):
@classmethod
def define_schema(cls):
@ -411,9 +653,12 @@ class HunyuanExtension(ComfyExtension):
return [
CLIPTextEncodeHunyuanDiT,
TextEncodeHunyuanVideo_ImageToVideo,
TextEncodeHunyuanVideo15Omni,
HunyuanClipVisionOutputConcat,
EmptyHunyuanLatentVideo,
EmptyHunyuanVideo15Latent,
HunyuanVideo15ImageToVideo,
HunyuanVideo15OmniConditioning,
HunyuanVideo15SuperResolution,
HunyuanVideo15LatentUpscaleWithModel,
LatentUpscaleModelLoader,

View File

@ -49,7 +49,7 @@ class Int(io.ComfyNode):
display_name="Int",
category="utils/primitive",
inputs=[
io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=True),
io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
],
outputs=[io.Int.Output()],
)

View File

@ -86,6 +86,6 @@ def image_alpha_fix(destination, source):
if destination.shape[-1] < source.shape[-1]:
source = source[...,:destination.shape[-1]]
elif destination.shape[-1] > source.shape[-1]:
destination = torch.nn.functional.pad(destination, (0, 1))
destination[..., -1] = 1.0
source = torch.nn.functional.pad(source, (0, 1))
source[..., -1] = 1.0
return destination, source

View File

@ -958,7 +958,7 @@ class CLIPLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "flux2", "ovis", "longcat_image"], ),
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma", "ace", "omnigen2", "qwen_image", "hunyuan_image", "hunyuan_video_15", "flux2", "ovis", "longcat_image"], ),
},
"optional": {
"device": (["default", "cpu"], {"advanced": True}),
@ -968,7 +968,7 @@ class CLIPLoader:
CATEGORY = "advanced/loaders"
DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\n hidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B"
DESCRIPTION = "[Recipes]\n\nstable_diffusion: clip-l\nstable_cascade: clip-g\nsd3: t5 xxl/ clip-g / clip-l\nstable_audio: t5 base\nmochi: t5 xxl\ncosmos: old t5 xxl\nlumina2: gemma 2 2B\nwan: umt5 xxl\nhidream: llama-3.1 (Recommend) or t5\nomnigen2: qwen vl 2.5 3B\nhunyuan_video_15: qwen2.5-vl (single-file fallback without byT5)"
def load_clip(self, clip_name, type="stable_diffusion", device="default"):
clip_type = getattr(comfy.sd.CLIPType, type.upper(), comfy.sd.CLIPType.STABLE_DIFFUSION)
@ -1754,57 +1754,49 @@ class LoadImage:
return True
class LoadImageMask:
class LoadImageMask(LoadImage):
ESSENTIALS_CATEGORY = "Image Tools"
SEARCH_ALIASES = ["import mask", "alpha mask", "channel mask"]
_color_channels = ["alpha", "red", "green", "blue"]
@classmethod
def INPUT_TYPES(s):
input_dir = folder_paths.get_input_directory()
files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
return {"required":
{"image": (sorted(files), {"image_upload": True}),
"channel": (s._color_channels, ), }
}
types = super().INPUT_TYPES()
return {
"required": {
**types["required"],
"channel": (s._color_channels, )
}
}
CATEGORY = "mask"
RETURN_TYPES = ("MASK",)
FUNCTION = "load_image"
def load_image(self, image, channel):
image_path = folder_paths.get_annotated_filepath(image)
i = node_helpers.pillow(Image.open, image_path)
i = node_helpers.pillow(ImageOps.exif_transpose, i)
if i.getbands() != ("R", "G", "B", "A"):
if i.mode == 'I':
i = i.point(lambda i: i * (1 / 255))
i = i.convert("RGBA")
mask = None
FUNCTION = "load_image_mask"
def load_image_mask(self, image, channel):
image_tensor, mask_tensor = super().load_image(image)
c = channel[0].upper()
if c in i.getbands():
mask = np.array(i.getchannel(c)).astype(np.float32) / 255.0
mask = torch.from_numpy(mask)
if c == 'A':
mask = 1. - mask
if c == 'A':
return (mask_tensor,)
channel_idx = {'R': 0, 'G': 1, 'B': 2}.get(c, 0)
if channel_idx < image_tensor.shape[-1]:
return (image_tensor[..., channel_idx].clone(),)
else:
mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
return (mask.unsqueeze(0),)
empty_mask = torch.zeros(
image_tensor.shape[:-1],
dtype=image_tensor.dtype,
device=image_tensor.device
)
return (empty_mask,)
@classmethod
def IS_CHANGED(s, image, channel):
image_path = folder_paths.get_annotated_filepath(image)
m = hashlib.sha256()
with open(image_path, 'rb') as f:
m.update(f.read())
return m.digest().hex()
@classmethod
def VALIDATE_INPUTS(s, image):
if not folder_paths.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image)
return True
return super().IS_CHANGED(image)
class LoadImageOutput(LoadImage):

View File

@ -1,3 +1,4 @@
import errno
import os
import sys
import asyncio
@ -1245,7 +1246,13 @@ class PromptServer():
address = addr[0]
port = addr[1]
site = web.TCPSite(runner, address, port, ssl_context=ssl_ctx)
await site.start()
try:
await site.start()
except OSError as e:
if e.errno == errno.EADDRINUSE:
logging.error(f"Port {port} is already in use on address {address}. Please close the other application or use a different port with --port.")
raise SystemExit(1)
raise
if not hasattr(self, 'address'):
self.address = address #TODO: remove this