Merge 987dce1db9 into f3ea976cba

Fix a1111 typo in extra_model_paths.yaml (#2720 )
fix: Update ColorTransfer node ref_image to be mandatory (#13691 )
2026-05-26 08:57:26 +08:00 · 2026-05-04 14:53:02 +02:00 · 2026-05-04 16:01:46 +08:00 · 2026-05-04 12:33:11 +08:00 · 2026-05-04 07:21:34 +08:00 · 2026-05-03 16:18:27 -04:00
8 changed files with 65 additions and 53 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -91,6 +91,7 @@ parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE"

 parser.add_argument("--oneapi-device-selector", type=str, default=None, metavar="SELECTOR_STRING", help="Sets the oneAPI device(s) this instance will use.")
 parser.add_argument("--supports-fp8-compute", action="store_true", help="ComfyUI will act like if the device supports fp8 compute.")
+parser.add_argument("--enable-triton-backend", action="store_true", help="ComfyUI will enable the use of Triton backend in comfy-kitchen. Is disabled at launch by default.")

 class LatentPreviewMethod(enum.Enum):
    NoPreviews = "none"
--- a/comfy/quant_ops.py
+++ b/comfy/quant_ops.py
@ -1,6 +1,8 @@
 import torch
 import logging

+from comfy.cli_args import args
+
 try:
    import comfy_kitchen as ck
    from comfy_kitchen.tensor import (
@ -21,7 +23,15 @@ try:
            ck.registry.disable("cuda")
            logging.warning("WARNING: You need pytorch with cu130 or higher to use optimized CUDA operations.")

-    ck.registry.disable("triton")
+    if args.enable_triton_backend:
+        try:
+            import triton
+            logging.info("Found triton %s. Enabling comfy-kitchen triton backend.", triton.__version__)
+        except ImportError as e:
+            logging.error(f"Failed to import triton, Error: {e}, the comfy-kitchen triton backend will not be available.")
+            ck.registry.disable("triton")
+    else:
+        ck.registry.disable("triton")
    for k, v in ck.list_backends().items():
        logging.info(f"Found comfy_kitchen backend {k}: {v}")
 except ImportError as e:
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@ -435,9 +435,9 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_di

 def apply_rope(xq, xk, freqs_cis):
    org_dtype = xq.dtype
-    cos = freqs_cis[0]
-    sin = freqs_cis[1]
-    nsin = freqs_cis[2]
+    cos = freqs_cis[0].to(xq.device)
+    sin = freqs_cis[1].to(xq.device)
+    nsin = freqs_cis[2].to(xq.device)

    q_embed = (xq * cos)
    q_split = q_embed.shape[-1] // 2
--- a/comfy/text_encoders/qwen35.py
+++ b/comfy/text_encoders/qwen35.py
@ -213,7 +213,10 @@ class GatedDeltaNet(nn.Module):
        mixed_qkv = mixed_qkv.transpose(1, 2)  # [B, seq_len, conv_dim]
        query, key, value = mixed_qkv.split([self.key_dim, self.key_dim, self.value_dim], dim=-1)
        beta = b.sigmoid()
-        g = -self.A_log.float().exp() * F.softplus(a.float() + self.dt_bias.float())
+
+        A_log = comfy.model_management.cast_to_device(self.A_log, x.device, torch.float32)
+        dt_bias = comfy.model_management.cast_to_device(self.dt_bias, x.device, torch.float32)
+        g = -A_log.exp() * F.softplus(a.float() + dt_bias)

        # Delta rule
        if use_recurrent:
@ -474,9 +477,15 @@ class Qwen35VisionRotaryEmbedding(nn.Module):
        inv_freq = 1.0 / (theta ** (torch.arange(0, dim, 2, dtype=torch.float) / dim))
        self.register_buffer("inv_freq", inv_freq, persistent=False)

-    def forward(self, seqlen):
-        seq = torch.arange(seqlen, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
-        freqs = torch.outer(seq, self.inv_freq)
+    def forward(self, seqlen, device=None, dtype=None):
+        if device is None:
+            device = self.inv_freq.device
+        if dtype is None:
+            dtype = self.inv_freq.dtype
+
+        inv_freq = comfy.model_management.cast_to_device(self.inv_freq, device, dtype)
+        seq = torch.arange(seqlen, device=device, dtype=dtype)
+        freqs = torch.outer(seq, inv_freq)
        return freqs


@ -565,12 +574,11 @@ class Qwen35VisionModel(nn.Module):
        ])
        self.merger = Qwen35VisionPatchMerger(self.hidden_size, self.spatial_merge_size, config["out_hidden_size"], device=device, dtype=dtype, ops=ops)

-    def rot_pos_emb(self, grid_thw):
+    def rot_pos_emb(self, grid_thw, device):
        merge_size = self.spatial_merge_size
        grid_thw_list = grid_thw.tolist()
        max_hw = max(max(h, w) for _, h, w in grid_thw_list)
-        freq_table = self.rotary_pos_emb(max_hw)
-        device = freq_table.device
+        freq_table = self.rotary_pos_emb(max_hw, device=device, dtype=torch.float32)
        total_tokens = sum(int(t * h * w) for t, h, w in grid_thw_list)
        pos_ids = torch.empty((total_tokens, 2), dtype=torch.long, device=device)
        offset = 0
@ -651,7 +659,7 @@ class Qwen35VisionModel(nn.Module):
        x = self.patch_embed(x)
        pos_embeds = self.fast_pos_embed_interpolate(grid_thw).to(x.device)
        x = x + pos_embeds
-        rotary_pos_emb = self.rot_pos_emb(grid_thw)
+        rotary_pos_emb = self.rot_pos_emb(grid_thw, device=x.device)
        seq_len = x.shape[0]
        x = x.reshape(seq_len, -1)
        rotary_pos_emb = rotary_pos_emb.reshape(seq_len, -1)
@ -659,7 +667,7 @@ class Qwen35VisionModel(nn.Module):
        cos = emb.cos().unsqueeze(-2)
        sin = emb.sin().unsqueeze(-2)
        sin_half = sin.shape[-1] // 2
-        position_embeddings = (cos, sin[..., :sin_half], -sin[..., sin_half:])
+        position_embeddings = (cos.to(x.device), sin[..., :sin_half].to(x.device), -sin[..., sin_half:].to(x.device))
        cu_seqlens = torch.repeat_interleave(
            grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
        ).cumsum(dim=0, dtype=torch.int32)
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@ -666,12 +666,13 @@ class ColorTransfer(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="ColorTransfer",
+            display_name="Color Transfer",
            category="image/postprocessing",
            description="Match the colors of one image to another using various algorithms.",
            search_aliases=["color match", "color grading", "color correction", "match colors", "color transform", "mkl", "reinhard", "histogram"],
            inputs=[
                io.Image.Input("image_target", tooltip="Image(s) to apply the color transform to."),
-                io.Image.Input("image_ref", optional=True, tooltip="Reference image(s) to match colors to. If not provided, processing is skipped"),
+                io.Image.Input("image_ref", tooltip="Reference image(s) to match colors to."),
                io.Combo.Input("method", options=['reinhard_lab', 'mkl_lab', 'histogram'],),
                io.DynamicCombo.Input("source_stats",
                    tooltip="per_frame: each frame matched to image_ref individually. uniform: pool stats across all source frames as baseline, match to image_ref. target_frame: use one chosen frame as the baseline for the transform to image_ref, applied uniformly to all frames (preserves relative differences)",
--- a/comfy_extras/nodes_primitive.py
+++ b/comfy_extras/nodes_primitive.py
@ -49,7 +49,7 @@ class Int(io.ComfyNode):
            display_name="Int",
            category="utils/primitive",
            inputs=[
-                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=True),
+                io.Int.Input("value", min=-sys.maxsize, max=sys.maxsize, control_after_generate=io.ControlAfterGenerate.fixed),
            ],
            outputs=[io.Int.Output()],
        )
--- a/extra_model_paths.yaml.example
+++ b/extra_model_paths.yaml.example
@ -28,7 +28,7 @@
 #config for a1111 ui
 #all you have to do is uncomment this (remove the #) and change the base_path to where yours is installed

-#a111:
+#a1111:
 #     base_path: path/to/stable-diffusion-webui/
 #     checkpoints: models/Stable-diffusion
 #     configs: models/Stable-diffusion
--- a/nodes.py
+++ b/nodes.py
@ -1754,57 +1754,49 @@ class LoadImage:

        return True

-class LoadImageMask:
+
+class LoadImageMask(LoadImage):
    ESSENTIALS_CATEGORY = "Image Tools"
    SEARCH_ALIASES = ["import mask", "alpha mask", "channel mask"]

    _color_channels = ["alpha", "red", "green", "blue"]
+
    @classmethod
    def INPUT_TYPES(s):
-        input_dir = folder_paths.get_input_directory()
-        files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]
-        return {"required":
-                    {"image": (sorted(files), {"image_upload": True}),
-                     "channel": (s._color_channels, ), }
-                }
+        types = super().INPUT_TYPES()
+        return {
+            "required": {
+                **types["required"],
+                "channel": (s._color_channels, )
+            }
+        }

    CATEGORY = "mask"
-
    RETURN_TYPES = ("MASK",)
-    FUNCTION = "load_image"
-    def load_image(self, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        i = node_helpers.pillow(Image.open, image_path)
-        i = node_helpers.pillow(ImageOps.exif_transpose, i)
-        if i.getbands() != ("R", "G", "B", "A"):
-            if i.mode == 'I':
-                i = i.point(lambda i: i * (1 / 255))
-            i = i.convert("RGBA")
-        mask = None
+    FUNCTION = "load_image_mask"
+
+    def load_image_mask(self, image, channel):
+        image_tensor, mask_tensor = super().load_image(image)
        c = channel[0].upper()
-        if c in i.getbands():
-            mask = np.array(i.getchannel(c)).astype(np.float32) / 255.0
-            mask = torch.from_numpy(mask)
-            if c == 'A':
-                mask = 1. - mask
+
+        if c == 'A':
+            return (mask_tensor,)
+
+        channel_idx = {'R': 0, 'G': 1, 'B': 2}.get(c, 0)
+
+        if channel_idx < image_tensor.shape[-1]:
+            return (image_tensor[..., channel_idx].clone(),)
        else:
-            mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
-        return (mask.unsqueeze(0),)
+            empty_mask = torch.zeros(
+                image_tensor.shape[:-1],
+                dtype=image_tensor.dtype,
+                device=image_tensor.device
+            )
+            return (empty_mask,)

    @classmethod
    def IS_CHANGED(s, image, channel):
-        image_path = folder_paths.get_annotated_filepath(image)
-        m = hashlib.sha256()
-        with open(image_path, 'rb') as f:
-            m.update(f.read())
-        return m.digest().hex()
-
-    @classmethod
-    def VALIDATE_INPUTS(s, image):
-        if not folder_paths.exists_annotated_filepath(image):
-            return "Invalid image file: {}".format(image)
-
-        return True
+        return super().IS_CHANGED(image)


 class LoadImageOutput(LoadImage):
Author	SHA1	Message	Date
Silver	d4e0247805	Merge `987dce1db9` into `f3ea976cba`	2026-05-04 14:53:02 +02:00
Soof Golan	f3ea976cba	Fix a1111 typo in extra_model_paths.yaml (#2720 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-04 16:01:46 +08:00
Alexis Rolland	5538f62b0b	fix: Update ColorTransfer node ref_image to be mandatory (#13691 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-04 12:33:11 +08:00
Jedrzej Kosinski	2806163f6e	Default control_after_generate to fixed in PrimitiveInt node (#13690 )	2026-05-04 07:21:34 +08:00
comfyanonymous	cea8d0925f	Refactor LoadImageMask to use LoadImage code. (#13687 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-03 16:18:27 -04:00
Silver	b138133ffa	Enable triton comfy kitchen via cli-arg (#12730 )	2026-05-03 14:07:21 -04:00
Silver	987dce1db9	Merge branch 'Comfy-Org:master' into fix/qwen35-partial-load	2026-04-05 12:27:19 +02:00
silveroxides	9e8e8e4a96	Fix device consistency for manual parameters and rotary embeddings in Qwen3.5	2026-04-05 12:21:31 +02:00
silveroxides	d6756e5c97	Fix device mismatch. 1. In apply_rope, move the RoPE frequency tensors (cos, sin, nsin) to the device of the input tensor xq. 2. In Qwen35VisionModel.forward, move position_embeddings to x.device.	2026-04-05 11:57:21 +02:00