Merge branch 'trellis2' of https://github.com/yousef-rafat/ComfyUI into pr/12183

remove triton, custom datatype, split mesh postpro
Merge branch 'master' into trellis2
2026-06-19 22:39:24 +08:00 · 2026-05-20 17:15:56 +03:00 · 2026-05-20 17:15:33 +03:00 · 2026-05-20 10:56:30 +03:00 · 2026-05-19 20:28:06 -07:00 · 2026-05-20 10:25:49 +08:00
43 changed files with 879 additions and 554 deletions
--- a/comfy/bg_removal_model.py
+++ b/comfy/bg_removal_model.py
@ -44,7 +44,14 @@ class BackgroundRemovalModel():
        comfy.model_management.load_model_gpu(self.patcher)
        H, W = image.shape[1], image.shape[2]
        pixel_values = comfy.clip_model.clip_preprocess(image.to(self.load_device), size=self.image_size, mean=self.image_mean, std=self.image_std, crop=False)
-        out = self.model(pixel_values=pixel_values)
+
+        if pixel_values.shape[0] > 1:
+            out = torch.cat([
+                self.model(pixel_values=pixel_values[i:i+1])
+                for i in range(pixel_values.shape[0])
+            ], dim=0)
+        else:
+            out = self.model(pixel_values=pixel_values)
        out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False)

        mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -150,6 +150,7 @@ class SD3(LatentFormat):
 class StableAudio1(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
+    temporal_downscale_ratio = 2048

 class Flux(SD3):
    latent_channels = 16
@ -768,6 +769,7 @@ class ACEAudio(LatentFormat):
 class ACEAudio15(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
+    temporal_downscale_ratio = 1764

 class ChromaRadiance(LatentFormat):
    latent_channels = 3
--- a/comfy/ldm/trellis2/flexgemm.py
+++ b/comfy/ldm/trellis2/flexgemm.py
@ -1,136 +1,43 @@
 # will contain every cuda -> pytorch operation

-import math
+from typing import Optional, Tuple
 import torch
-from typing import Callable
-import logging

-NO_TRITON = False
-try:
-    allow_tf32 = torch.cuda.is_tf32_supported()
-except Exception:
-    allow_tf32 = False
-try:
-    import triton
-    import triton.language as tl
-    heuristics = {
-        'valid_kernel': lambda args: args['valid_kernel'](args['B1']),
-        'valid_kernel_seg': lambda args: args['valid_kernel_seg'](args['B1']),
-    }
+UINT32_SENTINEL = 0xFFFFFFFF

-    #@triton_autotune(
-    #    configs=config.autotune_config,
-    #    key=['LOGN', 'Ci', 'Co', 'V', 'allow_tf32'],
-    #)
-    @triton.heuristics(heuristics)
-    @triton.jit
-    def sparse_submanifold_conv_fwd_masked_implicit_gemm_kernel(
-        input,
-        weight,
-        bias,
-        neighbor,
-        sorted_idx,
-        output,
-        # Tensor dimensions
-        N, LOGN, Ci, Co, V: tl.constexpr,
-        # Meta-parameters
-        B1: tl.constexpr,   # Block size for N dimension
-        B2: tl.constexpr,   # Block size for Co dimension
-        BK: tl.constexpr,   # Block size for K dimension (V * Ci)
-        allow_tf32: tl.constexpr,  # Allow TF32 precision for matmuls
-        # Huristic parameters
-        valid_kernel,
-        valid_kernel_seg,
-    ):
-
-        block_id = tl.program_id(axis=0)
-        block_dim_co = tl.cdiv(Co, B2)
-        block_id_co = block_id % block_dim_co
-        block_id_n = block_id // block_dim_co
-
-        # Create pointers for submatrices of A and B.
-        num_k = tl.cdiv(Ci, BK)  # Number of blocks in K dimension
-        valid_kernel_start = tl.load(valid_kernel_seg + block_id_n)
-        valid_kernel_seglen = tl.load(valid_kernel_seg + block_id_n + 1) - valid_kernel_start
-        offset_n = block_id_n * B1 + tl.arange(0, B1)
-        n_mask = offset_n < N
-        offset_sorted_n = tl.load(sorted_idx + offset_n, mask=n_mask, other=0)  # (B1,)
-        offset_co = (block_id_co * B2 + tl.arange(0, B2)) % Co                  # (B2,)
-        offset_k = tl.arange(0, BK)                                             # (BK,)
-
-        # Create a block of the output matrix C.
-        accumulator = tl.zeros((B1, B2), dtype=tl.float32)
-
-        # Iterate along V*Ci dimension.
-        for k in range(num_k * valid_kernel_seglen):
-            v = k // num_k
-            bk = k % num_k
-            v = tl.load(valid_kernel + valid_kernel_start + v)
-            # Calculate pointers to input matrix.
-            neighbor_offset_n = tl.load(neighbor + offset_sorted_n * V + v)                             # (B1,)
-            input_ptr = input + bk * BK + (neighbor_offset_n[:, None].to(tl.int64) * Ci + offset_k[None, :])         # (B1, BK)
-            # Calculate pointers to weight matrix.
-            weight_ptr = weight + v * Ci + bk * BK + (offset_co[None, :] * V * Ci + offset_k[:, None])  # (BK, B2)
-            # Load the next block of input and weight.
-            neigh_mask = neighbor_offset_n != 0xffffffff
-            k_mask = offset_k < Ci - bk * BK
-            input_block = tl.load(input_ptr, mask=neigh_mask[:, None] & k_mask[None, :], other=0.0)
-            weight_block = tl.load(weight_ptr, mask=k_mask[:, None], other=0.0)
-            # Accumulate along the K dimension.
-            accumulator = tl.dot(input_block, weight_block, accumulator,
-                                input_precision='tf32' if allow_tf32 else 'ieee')                      # (B1, B2)
-        c = accumulator.to(input.type.element_ty)
-
-        # add bias
-        if bias is not None:
-            bias_block = tl.load(bias + offset_co)
-            c += bias_block[None, :]
-
-        # Write back the block of the output matrix with masks.
-        out_offset_n = offset_sorted_n
-        out_offset_co = block_id_co * B2 + tl.arange(0, B2)
-        out_ptr = output + (out_offset_n[:, None] * Co + out_offset_co[None, :])
-        out_mask = n_mask[:, None] & (out_offset_co[None, :] < Co)
-        tl.store(out_ptr, c, mask=out_mask)
-    def sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk(
-        input: torch.Tensor,
-        weight: torch.Tensor,
-        bias: torch.Tensor,
-        neighbor: torch.Tensor,
-        sorted_idx: torch.Tensor,
-        valid_kernel: Callable[[int], torch.Tensor],
-        valid_kernel_seg: Callable[[int], torch.Tensor],
-    ) -> torch.Tensor:
-        N, Ci, Co, V = neighbor.shape[0], input.shape[1], weight.shape[0], weight.shape[1]
-        LOGN = int(math.log2(N))
-        output = torch.empty((N, Co), device=input.device, dtype=input.dtype)
-        grid = lambda META: (triton.cdiv(Co, META['B2']) * triton.cdiv(N, META['B1']),)
-        sparse_submanifold_conv_fwd_masked_implicit_gemm_kernel[grid](
-            input, weight, bias, neighbor, sorted_idx, output,
-            N, LOGN, Ci, Co, V,
-            B1=128,
-            B2=64,
-            BK=32,
-            valid_kernel=valid_kernel,
-            valid_kernel_seg=valid_kernel_seg,
-            allow_tf32=allow_tf32,
-        )
-        return output
-except Exception:
-    NO_TRITON = True

 def compute_kernel_offsets(Kw, Kh, Kd, Dw, Dh, Dd, device):
-    # offsets in same order as CUDA kernel
+    """Kernel spatial offsets in the same order as the CUDA/Triton kernels."""
    offsets = []
    for vx in range(Kw):
        for vy in range(Kh):
            for vz in range(Kd):
-                offsets.append((
-                    vx * Dw,
-                    vy * Dh,
-                    vz * Dd
-                ))
-    return torch.tensor(offsets, device=device)
+                offsets.append((vx * Dw, vy * Dh, vz * Dd))
+    return torch.tensor(offsets, device=device, dtype=torch.int32)
+
+
+class TorchHashMap:
+    """Sorted-array hashmap backed by torch.searchsorted."""
+
+    def __init__(self, keys: torch.Tensor, values: torch.Tensor, default_value: int):
+        device = keys.device
+        self.sorted_keys, order = torch.sort(keys.to(torch.long))
+        self.sorted_vals = values.to(torch.long)[order]
+        self.default_value = torch.tensor(default_value, dtype=torch.long, device=device)
+        self._n = self.sorted_keys.numel()
+
+    def lookup_flat(self, flat_keys: torch.Tensor) -> torch.Tensor:
+        flat = flat_keys.to(torch.long)
+        if self._n == 0:
+            return torch.full((flat.shape[0],), -1, device=flat.device, dtype=torch.int32)
+        idx = torch.searchsorted(self.sorted_keys, flat)
+        idx_safe = torch.clamp(idx, max=self._n - 1)
+        found = (idx < self._n) & (self.sorted_keys[idx_safe] == flat)
+        out = torch.full((flat.shape[0],), -1, device=flat.device, dtype=torch.int32)
+        if found.any():
+            out[found] = self.sorted_vals[idx_safe[found]].to(torch.int32)
+        return out
+

 def build_submanifold_neighbor_map(
    hashmap,
@ -143,10 +50,10 @@ def build_submanifold_neighbor_map(
    M = coords.shape[0]
    V = Kw * Kh * Kd
    half_V = V // 2 + 1
+    INVALID = -1

-    INVALID = hashmap.default_value
-
-    neighbor = torch.full((M, V), INVALID, device=device, dtype=torch.long)
+    # int32 neighbour map: 4 bytes/elem vs 8 bytes for int64
+    neighbor = torch.full((M, V), INVALID, device=device, dtype=torch.int32)

    b = coords[:, 0].long()
    x = coords[:, 1].long()
@ -161,7 +68,8 @@ def build_submanifold_neighbor_map(

    for v in range(half_V):
        if v == half_V - 1:
-            neighbor[:, v] = torch.arange(M, device=device)
+            # Center voxel always maps to itself
+            neighbor[:, v] = torch.arange(M, device=device, dtype=torch.int32)
            continue

        dx, dy, dz = offsets[v]
@ -170,7 +78,6 @@ def build_submanifold_neighbor_map(
        ky = oy + dy
        kz = oz + dz

-        # Check spatial bounds
        valid = (
            (kx >= 0) & (kx < W) &
            (ky >= 0) & (ky < H) &
@ -187,192 +94,59 @@ def build_submanifold_neighbor_map(
        if flat.numel() > 0:
            found = hashmap.lookup_flat(flat)
            idx_in_M = torch.where(valid)[0]
-            neighbor[idx_in_M, v] = found
+            neighbor[idx_in_M, v] = found.to(torch.int32)

-            valid_found_mask = (found != INVALID)
+            # BUG FIX: old code used  found != hashmap.default_value  which
+            # compared int32 -1 against int64 4294967295 → always True.
+            # We now explicitly check for valid indices.
+            valid_found_mask = found >= 0
            if valid_found_mask.any():
                src_points = idx_in_M[valid_found_mask]
-                dst_points = found[valid_found_mask]
-                neighbor[dst_points, V - 1 - v] = src_points
+                dst_points = found[valid_found_mask].long()
+                neighbor[dst_points, V - 1 - v] = src_points.to(torch.int32)

    return neighbor

-class TorchHashMap:
-    def __init__(self, keys: torch.Tensor, values: torch.Tensor, default_value: int):
-        device = keys.device
-        # use long for searchsorted
-        self.sorted_keys, order = torch.sort(keys.to(torch.long))
-        self.sorted_vals = values.to(torch.long)[order]
-        self.default_value = torch.tensor(default_value, dtype=torch.long, device=device)
-        self._n = self.sorted_keys.numel()

-    def lookup_flat(self, flat_keys: torch.Tensor) -> torch.Tensor:
-        flat = flat_keys.to(torch.long)
-        if self._n == 0:
-            return torch.full((flat.shape[0],), self.default_value, device=flat.device, dtype=self.sorted_vals.dtype)
-        idx = torch.searchsorted(self.sorted_keys, flat)
-        idx_safe = torch.clamp(idx, max=self._n - 1)
-        found = (idx < self._n) & (self.sorted_keys[idx_safe] == flat)
-        out = torch.full((flat.shape[0],), self.default_value, device=flat.device, dtype=self.sorted_vals.dtype)
-        if found.any():
-            out[found] = self.sorted_vals[idx_safe[found]]
-        return out
+def sparse_submanifold_conv3d(
+    feats: torch.Tensor,
+    coords: torch.Tensor,
+    shape: tuple,
+    weight: torch.Tensor,
+    bias: Optional[torch.Tensor],
+    neighbor_cache: Optional[torch.Tensor],
+    dilation: tuple,
+    max_chunk_mem_gb: float = 6.0,
+    accumulate_f32: bool = True,
+) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:

-
-UINT32_SENTINEL = 0xFFFFFFFF
-
-def neighbor_map_post_process_for_masked_implicit_gemm_1(neighbor_map):
-    device = neighbor_map.device
-    N, V = neighbor_map.shape
-
-    sentinel = UINT32_SENTINEL
-
-    neigh_map_T = neighbor_map.t().reshape(-1)
-    neigh_mask_T = (neigh_map_T != sentinel).to(torch.int32)
-
-    mask = (neighbor_map != sentinel).to(torch.long)
-    gray_code = torch.zeros(N, dtype=torch.long, device=device)
-
-    for v in range(V):
-        gray_code |= (mask[:, v] << v)
-
-    binary_code = gray_code.clone()
-    for v in range(1, V):
-        binary_code ^= (gray_code >> v)
-
-    sorted_idx = torch.argsort(binary_code)
-
-    prefix_sum_neighbor_mask = torch.cumsum(neigh_mask_T, dim=0)
-
-    total_valid_signal = int(prefix_sum_neighbor_mask[-1].item()) if prefix_sum_neighbor_mask.numel() > 0 else 0
-
-    if total_valid_signal > 0:
-        pos = torch.nonzero(neigh_mask_T, as_tuple=True)[0]
-        to = (prefix_sum_neighbor_mask[pos] - 1).long()
-
-        valid_signal_i = torch.empty((total_valid_signal,), dtype=torch.long, device=device)
-        valid_signal_o = torch.empty((total_valid_signal,), dtype=torch.long, device=device)
-
-        valid_signal_i[to] = (pos % N).to(torch.long)
-        valid_signal_o[to] = neigh_map_T[pos].to(torch.long)
-    else:
-        valid_signal_i = torch.empty((0,), dtype=torch.long, device=device)
-        valid_signal_o = torch.empty((0,), dtype=torch.long, device=device)
-
-    seg = torch.empty((V + 1,), dtype=torch.long, device=device)
-    seg[0] = 0
-    if V > 0:
-        idxs = (torch.arange(1, V + 1, device=device, dtype=torch.long) * N) - 1
-        seg[1:] = prefix_sum_neighbor_mask[idxs]
-
-    return gray_code, sorted_idx, valid_signal_i, valid_signal_o, seg
-
-def _popcount_int32_tensor(x: torch.Tensor) -> torch.Tensor:
-
-    x = x.to(torch.int64)
-
-    m1 = torch.tensor(0x5555555555555555, dtype=torch.int64, device=x.device)
-    m2 = torch.tensor(0x3333333333333333, dtype=torch.int64, device=x.device)
-    m4 = torch.tensor(0x0F0F0F0F0F0F0F0F, dtype=torch.int64, device=x.device)
-    h01 = torch.tensor(0x0101010101010101, dtype=torch.int64, device=x.device)
-
-    x = x - ((x >> 1) & m1)
-    x = (x & m2) + ((x >> 2) & m2)
-    x = (x + (x >> 4)) & m4
-    x = (x * h01) >> 56
-    return x.to(torch.int32)
-
-
-def neighbor_map_post_process_for_masked_implicit_gemm_2(
-    gray_code: torch.Tensor,
-    sorted_idx: torch.Tensor,
-    block_size: int
-):
-    device = gray_code.device
-    N = gray_code.numel()
-    num_blocks = (N + block_size - 1) // block_size
-
-    pad = num_blocks * block_size - N
-    if pad > 0:
-        pad_vals = torch.zeros((pad,), dtype=torch.int32, device=device)
-        gray_padded = torch.cat([gray_code[sorted_idx], pad_vals], dim=0)
-    else:
-        gray_padded = gray_code[sorted_idx]
-
-    gray_blocks = gray_padded.view(num_blocks, block_size)
-
-    reduced_code = gray_blocks
-    while reduced_code.shape[1] > 1:
-        half = reduced_code.shape[1] // 2
-        remainder = reduced_code.shape[1] % 2
-
-        left = reduced_code[:, :half * 2:2]
-        right = reduced_code[:, 1:half * 2:2]
-        merged = left | right
-
-        if remainder:
-            reduced_code = torch.cat([merged, reduced_code[:, -1:]], dim=1)
-        else:
-            reduced_code = merged
-
-    reduced_code = reduced_code.squeeze(1)
-
-    seglen_counts = _popcount_int32_tensor(reduced_code).to(torch.int32)
-
-    seg = torch.empty((num_blocks + 1,), dtype=torch.int32, device=device)
-    seg[0] = 0
-    if num_blocks > 0:
-        seg[1:] = torch.cumsum(seglen_counts, dim=0)
-
-    total = int(seg[-1].item())
-
-    if total == 0:
-        return torch.empty((0,), dtype=torch.int32, device=device), seg
-
-    V = int(reduced_code.max().item()).bit_length() if reduced_code.max() > 0 else 0
-
-    if V == 0:
-        return torch.empty((0,), dtype=torch.int32, device=device), seg
-
-    bit_pos = torch.arange(0, V, dtype=torch.int32, device=device)
-    shifted = reduced_code.unsqueeze(1) >> bit_pos.unsqueeze(0)
-    bits = (shifted & 1).to(torch.bool)
-
-    positions = bit_pos.unsqueeze(0).expand(num_blocks, V)
-    valid_kernel_idx = positions[bits].to(torch.int32).contiguous()
-
-    return valid_kernel_idx, seg
-
-
-def sparse_submanifold_conv3d(feats, coords, shape, weight, bias, neighbor_cache, dilation):
-    if NO_TRITON: # TODO
-        raise RuntimeError("sparse_submanifold_conv3d requires Triton, which is not available.")
    if feats.shape[0] == 0:
-        logging.warning("Found feats to be empty!")
        Co = weight.shape[0]
        return torch.empty((0, Co), device=feats.device, dtype=feats.dtype), None
+
    if len(shape) == 5:
-        N, C, W, H, D = shape
+        _, _, W, H, D = shape
    else:
        W, H, D = shape

    Co, Kw, Kh, Kd, Ci = weight.shape
-
-    b_stride = W * H * D
-    x_stride = H * D
-    y_stride = D
-    z_stride = 1
-
-    flat_keys = (coords[:, 0].long() * b_stride +
-                 coords[:, 1].long() * x_stride +
-                 coords[:, 2].long() * y_stride +
-                 coords[:, 3].long() * z_stride)
-
-    vals = torch.arange(coords.shape[0], dtype=torch.int32, device=coords.device)
-
-    hashmap = TorchHashMap(flat_keys, vals, 0xFFFFFFFF)
+    V = Kw * Kh * Kd
+    device = feats.device
+    sentinel = -1

    if neighbor_cache is None:
+        b_stride = W * H * D
+        x_stride = H * D
+        y_stride = D
+        z_stride = 1
+
+        flat_keys = (coords[:, 0].long() * b_stride +
+                     coords[:, 1].long() * x_stride +
+                     coords[:, 2].long() * y_stride +
+                     coords[:, 3].long() * z_stride)
+        vals = torch.arange(coords.shape[0], dtype=torch.int32, device=device)
+        hashmap = TorchHashMap(flat_keys, vals, UINT32_SENTINEL)
+
        neighbor = build_submanifold_neighbor_map(
            hashmap, coords, W, H, D, Kw, Kh, Kd,
            dilation[0], dilation[1], dilation[2]
@ -380,30 +154,67 @@ def sparse_submanifold_conv3d(feats, coords, shape, weight, bias, neighbor_cache
    else:
        neighbor = neighbor_cache

-    block_size = 128
+    N_pts = feats.shape[0]

-    gray_code, sorted_idx, valid_signal_i, valid_signal_o, valid_signal_seg = \
-        neighbor_map_post_process_for_masked_implicit_gemm_1(neighbor)
+    if accumulate_f32:
+        weight_T = weight.view(Co, V * Ci).to(torch.float32).T.contiguous()
+        output = torch.zeros(N_pts, Co, device=device, dtype=torch.float32)
+    else:
+        weight_T = weight.view(Co, V * Ci).to(feats.dtype).T.contiguous()
+        output = torch.zeros(N_pts, Co, device=device, dtype=feats.dtype)

-    valid_kernel, valid_kernel_seg = \
-        neighbor_map_post_process_for_masked_implicit_gemm_2(gray_code, sorted_idx, block_size)
+    # ------------------------------------------------------------------
+    # Chunk size from memory budget
+    # ------------------------------------------------------------------
+    bytes_per_elem = 4 if accumulate_f32 else feats.element_size()
+    mem_per_row = V * Ci * bytes_per_elem
+    max_chunk_mem = max_chunk_mem_gb * (1024 ** 3)
+    chunk_size = max(1, int(max_chunk_mem / mem_per_row))
+    chunk_size = min(chunk_size, N_pts)

-    valid_kernel_fn = lambda b_size: valid_kernel
-    valid_kernel_seg_fn = lambda b_size: valid_kernel_seg
+    # ------------------------------------------------------------------
+    # Chunked forward pass
+    #   Each iteration:
+    #     1. gather   (chunk, V, Ci)     – memory bound
+    #     2. mask     zero invalids       – in-place, no extra alloc
+    #     3. reshape  (chunk, V*Ci)
+    #     4. GEMM     (chunk, V*Ci) @ (V*Ci, Co) → (chunk, Co)  – cuBLAS
+    #        written directly into output slice via out= argument
+    # ------------------------------------------------------------------
+    for start in range(0, N_pts, chunk_size):
+        end = min(start + chunk_size, N_pts)
+        actual_chunk = end - start

-    weight_flat = weight.contiguous().view(Co, -1, Ci)
+        # (chunk, V) int32
+        chunk_neighbor = neighbor[start:end]
+        chunk_valid = chunk_neighbor != sentinel

-    out = sparse_submanifold_conv_fwd_masked_implicit_gemm_splitk(
-        feats,
-        weight_flat,
-        bias,
-        neighbor,
-        sorted_idx,
-        valid_kernel_fn,
-        valid_kernel_seg_fn
-    )
+        # Clamp sentinel -1 → 0 for safe indexing.  No clone of the full map.
+        chunk_idx = chunk_neighbor.clamp(min=0).long()

-    return out, neighbor
+        # Gather: (chunk, V, Ci).  Memory-bound, single index_select.
+        gathered = feats[chunk_idx]
+
+        # Zero invalid neighbours in-place.  gathered is a fresh tensor from
+        # advanced indexing, so in-place mutation is safe.
+        gathered.mul_(chunk_valid.unsqueeze(-1))
+
+        # Reshape to (chunk, V*Ci)
+        gathered_flat = gathered.view(actual_chunk, V * Ci)
+        if accumulate_f32:
+            gathered_flat = gathered_flat.to(torch.float32)
+
+        # Single GEMM call per chunk, written directly into output.
+        # This avoids allocating a temporary (chunk, Co) tensor.
+        torch.matmul(gathered_flat, weight_T, out=output[start:end])
+
+    if accumulate_f32:
+        output = output.to(feats.dtype)
+
+    if bias is not None:
+        output = output + bias.unsqueeze(0).to(output.dtype)
+
+    return output, neighbor

 class Mesh:
    def __init__(self,
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -1701,6 +1701,13 @@ class HiDreamO1(BaseModel):
        if text_input_ids is None or noise is None:
            return out

+        # handle area conds
+        area = kwargs.get("area", None)
+        if area is not None:
+            crop_h = min(noise.shape[-2] - area[2], area[0])
+            crop_w = min(noise.shape[-1] - area[3], area[1])
+            noise = torch.empty((noise.shape[0], 3, crop_h, crop_w), dtype=noise.dtype, device=noise.device)
+
        conds = build_extra_conds(
            text_input_ids, noise,
            ref_images=kwargs.get("reference_latents", None),
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@ -1493,27 +1493,30 @@ class ModelPatcher:
        self.unpatch_hooks()
        self.clear_cached_hook_weights()

-    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
-        original_state_dict = self.model.diffusion_model.state_dict()
-        unet_state_dict = {}
+    def model_state_dict_for_saving(self, model=None, prefix=""):
+        if model is None:
+            model = self.model
+
+        original_state_dict = model.state_dict()
+        output_state_dict = {}
        keys = list(original_state_dict)
        while len(keys) > 0:
            k = keys.pop(0)
            v = original_state_dict[k]
            op_keys = k.rsplit('.', 1)
            if (len(op_keys) < 2) or op_keys[1] not in ["weight", "bias"]:
-                unet_state_dict[k] = v
+                output_state_dict[k] = v
                continue
            try:
-                op = comfy.utils.get_attr(self.model.diffusion_model, op_keys[0])
+                op = comfy.utils.get_attr(model, op_keys[0])
            except:
-                unet_state_dict[k] = v
+                output_state_dict[k] = v
                continue
            if not op or not hasattr(op, "comfy_cast_weights") or \
                (hasattr(op, "comfy_patched_weights") and op.comfy_patched_weights == True):
-                unet_state_dict[k] = v
+                output_state_dict[k] = v
                continue
-            key = "diffusion_model." + k
+            key = prefix + k
            weight = comfy.utils.get_attr(self.model, key)
            if isinstance(weight, QuantizedTensor) and k in original_state_dict:
                qt_state_dict = weight.state_dict(k)
@ -1521,10 +1524,14 @@ class ModelPatcher:
                for group_key in (x for x in qt_state_dict if x in original_state_dict):
                    if group_key in keys:
                        keys.remove(group_key)
-                    unet_state_dict.pop(group_key, "")
-                    unet_state_dict[group_key] = LazyCastingParamPiece(caster, "diffusion_model." + group_key, original_state_dict[group_key])
+                    output_state_dict.pop(group_key, "")
+                    output_state_dict[group_key] = LazyCastingParamPiece(caster, prefix + group_key, original_state_dict[group_key])
                continue
-            unet_state_dict[k] = LazyCastingParam(self, key, weight)
+            output_state_dict[k] = LazyCastingParam(self, key, weight)
+        return output_state_dict
+
+    def state_dict_for_saving(self, clip_state_dict=None, vae_state_dict=None, clip_vision_state_dict=None):
+        unet_state_dict = self.model_state_dict_for_saving(self.model.diffusion_model, "diffusion_model.")
        return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict)

    def __del__(self):
--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -260,7 +260,7 @@ def resolve_cast_module_with_vbar(s, dtype, device, bias_dtype, compute_dtype, w


 def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None, want_requant=False):
-    # NOTE: offloadable=False is a a legacy and if you are a custom node author reading this please pass
+    # NOTE: offloadable=False is a legacy mode and if you are a custom node author reading this please pass
    # offloadable=True and call uncast_bias_weight() after your last usage of the weight/bias. This
    # will add async-offload support to your cast and improve performance.
    if input is not None:
@ -1376,6 +1376,7 @@ def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_
        if not fp8_compute:
            disabled.add("float8_e4m3fn")
            disabled.add("float8_e5m2")
+        logging.info("Native ops: {} {}".format(", ".join(QUANT_ALGOS.keys() - disabled), ", emulated ops: {}".format(", ".join(disabled)) if len(disabled) > 0 else ""))
        return mixed_precision_ops(model_config.quant_config, compute_dtype, disabled=disabled)

    if (
--- a/comfy/sample.py
+++ b/comfy/sample.py
@ -37,11 +37,12 @@ def prepare_noise(latent_image, seed, noise_inds=None):

    return noises

-def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None):
+def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None, downscale_ratio_temporal=None):
    if latent_image.is_nested:
        return latent_image
    latent_format = model.get_model_object("latent_format") #Resize the empty latent image so it has the right number of channels
-    if torch.count_nonzero(latent_image) == 0:
+    is_empty = torch.count_nonzero(latent_image) == 0
+    if is_empty:
        if latent_format.latent_channels != latent_image.shape[1]:
            latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_format.latent_channels, dim=1)
        if downscale_ratio_spacial is not None:
@ -51,6 +52,13 @@ def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None)

    if latent_format.latent_dimensions == 3 and latent_image.ndim == 4:
        latent_image = latent_image.unsqueeze(2)
+
+    if is_empty and downscale_ratio_temporal is not None:
+        if downscale_ratio_temporal != latent_format.temporal_downscale_ratio:
+            ratio = downscale_ratio_temporal / latent_format.temporal_downscale_ratio
+            new_t = max(1, round(latent_image.shape[2] * ratio))
+            latent_image = comfy.utils.repeat_to_batch_size(latent_image, new_t, dim=2)
+
    return latent_image

 def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -424,6 +424,13 @@ class CLIP:
            sd_clip[k] = sd_tokenizer[k]
        return sd_clip

+    def state_dict_for_saving(self):
+        sd_clip = self.patcher.model_state_dict_for_saving()
+        sd_tokenizer = self.tokenizer.state_dict()
+        for k in sd_tokenizer:
+            sd_clip[k] = sd_tokenizer[k]
+        return sd_clip
+
    def load_model(self, tokens={}):
        memory_used = 0
        if hasattr(self.cond_stage_model, "memory_estimation_function"):
@ -1921,7 +1928,7 @@ def save_checkpoint(output_path, model, clip=None, vae=None, clip_vision=None, m
    load_models = [model]
    if clip is not None:
        load_models.append(clip.load_model())
-        clip_sd = clip.get_sd()
+        clip_sd = clip.state_dict_for_saving()
    vae_sd = None
    if vae is not None:
        vae_sd = vae.get_sd()
--- a/comfy/text_encoders/qwen35.py
+++ b/comfy/text_encoders/qwen35.py
@ -760,7 +760,7 @@ class Qwen35ImageTokenizer(sd1_clip.SD1Tokenizer):
    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, thinking=False, **kwargs):
        image = kwargs.get("image", None)
        if image is not None and len(images) == 0:
-            images = [image]
+            images = [image[i:i + 1] for i in range(image.shape[0])]

        skip_template = False
        if text.startswith('<|im_start|>'):
@ -771,13 +771,16 @@ class Qwen35ImageTokenizer(sd1_clip.SD1Tokenizer):
        if skip_template:
            llama_text = text
        else:
-            if llama_template is None:
-                if len(images) > 0:
-                    llama_text = self.llama_template_images.format(text)
-                else:
-                    llama_text = self.llama_template.format(text)
+            if llama_template is not None:
+                template = llama_template
+            elif len(images) == 0:
+                template = self.llama_template
            else:
-                llama_text = llama_template.format(text)
+                template = self.llama_template_images
+                if len(images) > 1:
+                    vision_block = "<|vision_start|><|image_pad|><|vision_end|>"
+                    template = template.replace(vision_block, vision_block * len(images), 1)
+            llama_text = template.format(text)
            if not thinking:
                llama_text += "<think>\n</think>\n"

--- a/comfy_api_nodes/apis/bytedance_llm.py
+++ b/comfy_api_nodes/apis/bytedance_llm.py
@ -0,0 +1,101 @@
+"""Pydantic models for BytePlus ModelArk Responses API.
+
+See: https://docs.byteplus.com/en/docs/ModelArk/1585128 (request)
+     https://docs.byteplus.com/en/docs/ModelArk/1783703 (response)
+"""
+
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class BytePlusInputText(BaseModel):
+    type: Literal["input_text"] = "input_text"
+    text: str = Field(...)
+
+
+class BytePlusInputImage(BaseModel):
+    type: Literal["input_image"] = "input_image"
+    image_url: str = Field(..., description="Image URL or `data:image/...;base64,...` payload")
+    detail: str = Field("auto", description="One of high, low, auto")
+
+
+class BytePlusInputVideo(BaseModel):
+    type: Literal["input_video"] = "input_video"
+    video_url: str = Field(..., description="Video URL or `data:video/...;base64,...` payload")
+    fps: float | None = Field(None, ge=0.2, le=5.0)
+
+
+BytePlusMessageContent = BytePlusInputText | BytePlusInputImage | BytePlusInputVideo
+
+
+class BytePlusInputMessage(BaseModel):
+    type: Literal["message"] = "message"
+    role: str = Field(..., description="One of user, system, assistant, developer")
+    content: list[BytePlusMessageContent] = Field(...)
+
+
+class BytePlusResponseCreateRequest(BaseModel):
+    model: str = Field(...)
+    input: list[BytePlusInputMessage] = Field(...)
+    instructions: str | None = Field(None)
+    max_output_tokens: int | None = Field(None, ge=1)
+    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    store: bool | None = Field(False)
+    stream: bool | None = Field(False)
+
+
+class BytePlusOutputText(BaseModel):
+    type: Literal["output_text"] = "output_text"
+    text: str = Field(...)
+
+
+class BytePlusOutputRefusal(BaseModel):
+    type: Literal["refusal"] = "refusal"
+    refusal: str = Field(...)
+
+
+class BytePlusOutputContent(BaseModel):
+    type: str = Field(...)
+    text: str | None = Field(None)
+    refusal: str | None = Field(None)
+
+
+class BytePlusOutputMessage(BaseModel):
+    type: str = Field(...)
+    id: str | None = Field(None)
+    role: str | None = Field(None)
+    status: str | None = Field(None)
+    content: list[BytePlusOutputContent] | None = Field(None)
+
+
+class BytePlusInputTokensDetails(BaseModel):
+    cached_tokens: int | None = Field(None)
+
+
+class BytePlusOutputTokensDetails(BaseModel):
+    reasoning_tokens: int | None = Field(None)
+
+
+class BytePlusResponseUsage(BaseModel):
+    input_tokens: int | None = Field(None)
+    output_tokens: int | None = Field(None)
+    total_tokens: int | None = Field(None)
+    input_tokens_details: BytePlusInputTokensDetails | None = Field(None)
+    output_tokens_details: BytePlusOutputTokensDetails | None = Field(None)
+
+
+class BytePlusResponseError(BaseModel):
+    code: str = Field(...)
+    message: str = Field(...)
+
+
+class BytePlusResponseObject(BaseModel):
+    id: str | None = Field(None)
+    object: str | None = Field(None)
+    created_at: int | None = Field(None)
+    model: str | None = Field(None)
+    status: str | None = Field(None)
+    error: BytePlusResponseError | None = Field(None)
+    output: list[BytePlusOutputMessage] | None = Field(None)
+    usage: BytePlusResponseUsage | None = Field(None)
--- a/comfy_api_nodes/nodes_anthropic.py
+++ b/comfy_api_nodes/nodes_anthropic.py
@ -49,7 +49,7 @@ def _claude_model_inputs():
            min=0.0,
            max=1.0,
            step=0.01,
-            tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random.",
+            tooltip="Controls randomness. 0.0 is deterministic, 1.0 is most random. Ignored for Opus 4.7.",
            advanced=True,
        ),
    ]
@ -208,7 +208,7 @@ class ClaudeNode(IO.ComfyNode):
        validate_string(prompt, strip_whitespace=True, min_length=1)
        model_label = model["model"]
        max_tokens = model["max_tokens"]
-        temperature = model["temperature"]
+        temperature = None if model_label == "Opus 4.7" else model["temperature"]

        image_tensors: list[Input.Image] = [t for t in (images or {}).values() if t is not None]
        if sum(get_number_of_images(t) for t in image_tensors) > CLAUDE_MAX_IMAGES:
--- a/comfy_api_nodes/nodes_bytedance_llm.py
+++ b/comfy_api_nodes/nodes_bytedance_llm.py
@ -0,0 +1,271 @@
+"""API Nodes for ByteDance Seed LLM via the BytePlus ModelArk Responses API.
+
+See: https://docs.byteplus.com/en/docs/ModelArk/1585128
+"""
+
+from typing_extensions import override
+
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.bytedance_llm import (
+    BytePlusInputImage,
+    BytePlusInputMessage,
+    BytePlusInputText,
+    BytePlusInputVideo,
+    BytePlusMessageContent,
+    BytePlusResponseCreateRequest,
+    BytePlusResponseObject,
+)
+from comfy_api_nodes.util import (
+    ApiEndpoint,
+    get_number_of_images,
+    sync_op,
+    upload_images_to_comfyapi,
+    upload_video_to_comfyapi,
+    validate_string,
+)
+
+BYTEPLUS_RESPONSES_ENDPOINT = "/proxy/byteplus/api/v3/responses"
+SEED_MAX_IMAGES = 20
+SEED_MAX_VIDEOS = 4
+
+SEED_MODELS: dict[str, str] = {
+    "Seed 2.0 Pro": "seed-2-0-pro-260328",
+    "Seed 2.0 Lite": "seed-2-0-lite-260228",
+    "Seed 2.0 Mini": "seed-2-0-mini-260215",
+}
+
+# USD per 1M tokens: (input, cache_hit_input, output)
+_SEED_PRICES_PER_MILLION: dict[str, tuple[float, float, float]] = {
+    "seed-2-0-pro-260328": (0.50, 0.10, 3.00),
+    "seed-2-0-lite-260228": (0.25, 0.05, 2.00),
+    "seed-2-0-mini-260215": (0.10, 0.02, 0.40),
+}
+
+
+def _seed_model_inputs(max_images: int = SEED_MAX_IMAGES, max_videos: int = SEED_MAX_VIDEOS):
+    return [
+        IO.Autogrow.Input(
+            "images",
+            template=IO.Autogrow.TemplateNames(
+                IO.Image.Input("image"),
+                names=[f"image_{i}" for i in range(1, max_images + 1)],
+                min=0,
+            ),
+            tooltip=f"Optional image(s) to use as context for the model. Up to {max_images} images.",
+        ),
+        IO.Autogrow.Input(
+            "videos",
+            template=IO.Autogrow.TemplateNames(
+                IO.Video.Input("video"),
+                names=[f"video_{i}" for i in range(1, max_videos + 1)],
+                min=0,
+            ),
+            tooltip=f"Optional video(s) to use as context for the model. Up to {max_videos} videos.",
+        ),
+        IO.Float.Input(
+            "temperature",
+            default=1.0,
+            min=0.0,
+            max=2.0,
+            step=0.01,
+            tooltip="Controls randomness. 0.0 is deterministic, higher values are more random.",
+            advanced=True,
+        ),
+    ]
+
+
+def _calculate_price(model_id: str, response: BytePlusResponseObject) -> float | None:
+    """Compute approximate USD price from response usage."""
+    if not response.usage:
+        return None
+    rates = _SEED_PRICES_PER_MILLION.get(model_id)
+    if rates is None:
+        return None
+    input_rate, cache_hit_rate, output_rate = rates
+    input_tokens = response.usage.input_tokens or 0
+    output_tokens = response.usage.output_tokens or 0
+    cached = 0
+    if response.usage.input_tokens_details:
+        cached = response.usage.input_tokens_details.cached_tokens or 0
+    fresh_input = max(0, input_tokens - cached)
+    total = fresh_input * input_rate + cached * cache_hit_rate + output_tokens * output_rate
+    return total / 1_000_000.0
+
+
+def _get_text_from_response(response: BytePlusResponseObject) -> str:
+    """Extract concatenated text from all assistant message output_text blocks."""
+    if not response.output:
+        return ""
+    chunks: list[str] = []
+    for item in response.output:
+        if item.type != "message" or not item.content:
+            continue
+        for block in item.content:
+            if block.type == "output_text" and block.text:
+                chunks.append(block.text)
+            elif block.type == "refusal" and block.refusal:
+                raise ValueError(f"Model refused to respond: {block.refusal}")
+    return "\n".join(chunks)
+
+
+async def _build_image_content_blocks(
+    cls: type[IO.ComfyNode],
+    image_tensors: list[Input.Image],
+) -> list[BytePlusInputImage]:
+    urls = await upload_images_to_comfyapi(
+        cls,
+        image_tensors,
+        max_images=SEED_MAX_IMAGES,
+        wait_label="Uploading reference images",
+    )
+    return [BytePlusInputImage(image_url=url) for url in urls]
+
+
+async def _build_video_content_blocks(
+    cls: type[IO.ComfyNode],
+    videos: list[Input.Video],
+) -> list[BytePlusInputVideo]:
+    blocks: list[BytePlusInputVideo] = []
+    total = len(videos)
+    for idx, video in enumerate(videos):
+        label = "Uploading reference video"
+        if total > 1:
+            label = f"{label} ({idx + 1}/{total})"
+        url = await upload_video_to_comfyapi(cls, video, wait_label=label)
+        blocks.append(BytePlusInputVideo(video_url=url))
+    return blocks
+
+
+class ByteDanceSeedNode(IO.ComfyNode):
+    """Generate text responses from a ByteDance Seed 2.0 model."""
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="ByteDanceSeedNode",
+            display_name="ByteDance Seed",
+            category="api node/text/ByteDance",
+            essentials_category="Text Generation",
+            description="Generate text responses with ByteDance's Seed 2.0 models. "
+            "Provide a text prompt and optionally one or more images or videos for multimodal context.",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    tooltip="Text input to the model.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[IO.DynamicCombo.Option(label, _seed_model_inputs()) for label in SEED_MODELS],
+                    tooltip="The Seed model used to generate the response.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+                IO.String.Input(
+                    "system_prompt",
+                    multiline=True,
+                    default="",
+                    optional=True,
+                    advanced=True,
+                    tooltip="Foundational instructions that dictate the model's behavior.",
+                ),
+            ],
+            outputs=[IO.String.Output()],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["model"]),
+                expr="""
+                (
+                  $m := widgets.model;
+                  $contains($m, "mini") ? {
+                    "type": "list_usd",
+                    "usd": [0.00025, 0.0009],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : $contains($m, "lite") ? {
+                    "type": "list_usd",
+                    "usd": [0.0003, 0.002],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : $contains($m, "pro") ? {
+                    "type": "list_usd",
+                    "usd": [0.0005, 0.003],
+                    "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" }
+                  }
+                  : {"type":"text", "text":"Token-based"}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        model: dict,
+        seed: int,
+        system_prompt: str = "",
+    ) -> IO.NodeOutput:
+        validate_string(prompt, strip_whitespace=True, min_length=1)
+        model_label = model["model"]
+        temperature = model["temperature"]
+        model_id = SEED_MODELS[model_label]
+
+        image_tensors: list[Input.Image] = [t for t in (model.get("images") or {}).values() if t is not None]
+        if sum(get_number_of_images(t) for t in image_tensors) > SEED_MAX_IMAGES:
+            raise ValueError(f"Up to {SEED_MAX_IMAGES} images are supported per request.")
+
+        video_inputs: list[Input.Video] = [v for v in (model.get("videos") or {}).values() if v is not None]
+        if len(video_inputs) > SEED_MAX_VIDEOS:
+            raise ValueError(f"Up to {SEED_MAX_VIDEOS} videos are supported per request.")
+
+        content: list[BytePlusMessageContent] = []
+        if image_tensors:
+            content.extend(await _build_image_content_blocks(cls, image_tensors))
+        if video_inputs:
+            content.extend(await _build_video_content_blocks(cls, video_inputs))
+        content.append(BytePlusInputText(text=prompt))
+
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path=BYTEPLUS_RESPONSES_ENDPOINT, method="POST"),
+            response_model=BytePlusResponseObject,
+            data=BytePlusResponseCreateRequest(
+                model=model_id,
+                input=[BytePlusInputMessage(role="user", content=content)],
+                instructions=system_prompt or None,
+                temperature=temperature,
+                store=False,
+                stream=False,
+            ),
+            price_extractor=lambda r: _calculate_price(model_id, r),
+        )
+        if response.error:
+            raise ValueError(f"Seed API error ({response.error.code}): {response.error.message}")
+        result = _get_text_from_response(response)
+        if not result:
+            raise ValueError("Empty response from Seed model.")
+        return IO.NodeOutput(result)
+
+
+class ByteDanceLLMExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [ByteDanceSeedNode]
+
+
+async def comfy_entrypoint() -> ByteDanceLLMExtension:
+    return ByteDanceLLMExtension()
--- a/comfy_extras/nodes_ace.py
+++ b/comfy_extras/nodes_ace.py
@ -104,7 +104,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode):
    def execute(cls, seconds, batch_size) -> IO.NodeOutput:
        length = round((seconds * 48000 / 1920))
        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
-        return IO.NodeOutput({"samples": latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 1764})

 class ReferenceAudio(IO.ComfyNode):
    @classmethod
--- a/comfy_extras/nodes_advanced_samplers.py
+++ b/comfy_extras/nodes_advanced_samplers.py
@ -45,7 +45,7 @@ class SamplerLCMUpscale(io.ComfyNode):
    def define_schema(cls) -> io.Schema:
        return io.Schema(
            node_id="SamplerLCMUpscale",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("scale_ratio", default=1.0, min=0.1, max=20.0, step=0.01, advanced=True),
                io.Int.Input("scale_steps", default=-1, min=-1, max=1000, step=1, advanced=True),
@ -123,7 +123,7 @@ class SamplerEulerCFGpp(io.ComfyNode):
        return io.Schema(
            node_id="SamplerEulerCFGpp",
            display_name="SamplerEulerCFG++",
-            category="experimental",  # "sampling/custom_sampling/samplers"
+            category="experimental",  # "sampling/samplers"
            inputs=[
                io.Combo.Input("version", options=["regular", "alternative"], advanced=True),
            ],
--- a/comfy_extras/nodes_align_your_steps.py
+++ b/comfy_extras/nodes_align_your_steps.py
@ -29,7 +29,7 @@ class AlignYourStepsScheduler(io.ComfyNode):
        return io.Schema(
            node_id="AlignYourStepsScheduler",
            search_aliases=["AYS scheduler"],
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Combo.Input("model_type", options=["SD1", "SDXL", "SVD"]),
                io.Int.Input("steps", default=10, min=1, max=10000),
--- a/comfy_extras/nodes_ar_video.py
+++ b/comfy_extras/nodes_ar_video.py
@ -53,7 +53,7 @@ class SamplerARVideo(io.ComfyNode):
        return io.Schema(
            node_id="SamplerARVideo",
            display_name="Sampler AR Video",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Int.Input(
                    "num_frame_per_block",
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@ -33,7 +33,7 @@ class EmptyLatentAudio(IO.ComfyNode):
    def execute(cls, seconds, batch_size) -> IO.NodeOutput:
        length = round((seconds * 44100 / 2048) / 2) * 2
        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
-        return IO.NodeOutput({"samples":latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 2048})

    generate = execute  # TODO: remove

--- a/comfy_extras/nodes_bg_removal.py
+++ b/comfy_extras/nodes_bg_removal.py
@ -34,6 +34,7 @@ class RemoveBackground(IO.ComfyNode):
            node_id="RemoveBackground",
            display_name="Remove Background",
            category="image/background removal",
+            description="Generates a foreground mask to remove the background from an image using a background removal model.",
            inputs=[
                IO.Image.Input("image", tooltip="Input image to remove the background from"),
                IO.BackgroundRemoval.Input("bg_removal_model", tooltip="Background removal model used to generate the mask")
--- a/comfy_extras/nodes_canny.py
+++ b/comfy_extras/nodes_canny.py
@ -11,9 +11,9 @@ class Canny(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="Canny",
-            display_name="Canny",
+            display_name="Detect Edges (Canny)",
            search_aliases=["edge detection", "outline", "contour detection", "line art"],
-            category="image/preprocessors",
+            category="image/filters",
            essentials_category="Image Tools",
            inputs=[
                io.Image.Input("image"),
--- a/comfy_extras/nodes_compositing.py
+++ b/comfy_extras/nodes_compositing.py
@ -111,7 +111,7 @@ class PorterDuffImageComposite(io.ComfyNode):
            node_id="PorterDuffImageComposite",
            search_aliases=["alpha composite", "blend modes", "layer blend", "transparency blend"],
            display_name="Porter-Duff Image Composite",
-            category="mask/compositing",
+            category="image/compositing",
            inputs=[
                io.Image.Input("source"),
                io.Mask.Input("source_alpha"),
@ -168,7 +168,7 @@ class SplitImageWithAlpha(io.ComfyNode):
            node_id="SplitImageWithAlpha",
            search_aliases=["extract alpha", "separate transparency", "remove alpha"],
            display_name="Split Image with Alpha",
-            category="mask/compositing",
+            category="image/compositing",
            inputs=[
                io.Image.Input("image"),
            ],
@ -192,7 +192,7 @@ class JoinImageWithAlpha(io.ComfyNode):
            node_id="JoinImageWithAlpha",
            search_aliases=["add transparency", "apply alpha", "composite alpha", "RGBA"],
            display_name="Join Image with Alpha",
-            category="mask/compositing",
+            category="image/compositing",
            inputs=[
                io.Image.Input("image"),
                io.Mask.Input("alpha"),
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@ -17,7 +17,7 @@ class BasicScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="BasicScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Model.Input("model"),
                io.Combo.Input("scheduler", options=comfy.samplers.SCHEDULER_NAMES),
@ -47,7 +47,7 @@ class KarrasScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="KarrasScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True),
@ -69,7 +69,7 @@ class ExponentialScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="ExponentialScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True),
@ -90,7 +90,7 @@ class PolyexponentialScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="PolyexponentialScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True),
@ -112,7 +112,7 @@ class LaplaceScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="LaplaceScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True),
@ -136,7 +136,7 @@ class SDTurboScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SDTurboScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Model.Input("model"),
                io.Int.Input("steps", default=1, min=1, max=10),
@ -160,7 +160,7 @@ class BetaSamplingScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="BetaSamplingScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Model.Input("model"),
                io.Int.Input("steps", default=20, min=1, max=10000),
@ -182,7 +182,7 @@ class VPScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="VPScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("beta_d", default=19.9, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), #TODO: fix default values
@ -204,7 +204,7 @@ class SplitSigmas(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SplitSigmas",
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[
                io.Sigmas.Input("sigmas"),
                io.Int.Input("step", default=0, min=0, max=10000),
@ -228,7 +228,7 @@ class SplitSigmasDenoise(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SplitSigmasDenoise",
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[
                io.Sigmas.Input("sigmas"),
                io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01),
@ -254,7 +254,7 @@ class FlipSigmas(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="FlipSigmas",
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[io.Sigmas.Input("sigmas")],
            outputs=[io.Sigmas.Output()]
        )
@ -276,7 +276,7 @@ class SetFirstSigma(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SetFirstSigma",
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[
                io.Sigmas.Input("sigmas"),
                io.Float.Input("sigma", default=136.0, min=0.0, max=20000.0, step=0.001, round=False),
@ -298,7 +298,7 @@ class ExtendIntermediateSigmas(io.ComfyNode):
        return io.Schema(
            node_id="ExtendIntermediateSigmas",
            search_aliases=["interpolate sigmas"],
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[
                io.Sigmas.Input("sigmas"),
                io.Int.Input("steps", default=2, min=1, max=100),
@ -351,7 +351,7 @@ class SamplingPercentToSigma(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplingPercentToSigma",
-            category="sampling/custom_sampling/sigmas",
+            category="sampling/sigmas",
            inputs=[
                io.Model.Input("model"),
                io.Float.Input("sampling_percent", default=0.0, min=0.0, max=1.0, step=0.0001),
@ -379,7 +379,7 @@ class KSamplerSelect(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="KSamplerSelect",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[io.Combo.Input("sampler_name", options=comfy.samplers.SAMPLER_NAMES)],
            outputs=[io.Sampler.Output()]
        )
@ -396,7 +396,7 @@ class SamplerDPMPP_3M_SDE(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerDPMPP_3M_SDE",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
@ -421,7 +421,7 @@ class SamplerDPMPP_2M_SDE(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerDPMPP_2M_SDE",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Combo.Input("solver_type", options=['midpoint', 'heun']),
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
@ -448,7 +448,7 @@ class SamplerDPMPP_SDE(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerDPMPP_SDE",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
@ -474,7 +474,7 @@ class SamplerDPMPP_2S_Ancestral(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerDPMPP_2S_Ancestral",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False),
@ -494,7 +494,7 @@ class SamplerEulerAncestral(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerEulerAncestral",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
                io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
@ -515,7 +515,7 @@ class SamplerEulerAncestralCFGPP(io.ComfyNode):
        return io.Schema(
            node_id="SamplerEulerAncestralCFGPP",
            display_name="SamplerEulerAncestralCFG++",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Float.Input("eta", default=1.0, min=0.0, max=1.0, step=0.01, round=False),
                io.Float.Input("s_noise", default=1.0, min=0.0, max=10.0, step=0.01, round=False),
@ -537,7 +537,7 @@ class SamplerLMS(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerLMS",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[io.Int.Input("order", default=4, min=1, max=100, advanced=True)],
            outputs=[io.Sampler.Output()]
        )
@ -554,7 +554,7 @@ class SamplerDPMAdaptative(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerDPMAdaptative",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Int.Input("order", default=3, min=2, max=3, advanced=True),
                io.Float.Input("rtol", default=0.05, min=0.0, max=100.0, step=0.01, round=False, advanced=True),
@ -585,7 +585,7 @@ class SamplerER_SDE(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SamplerER_SDE",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Combo.Input("solver_type", options=["ER-SDE", "Reverse-time SDE", "ODE"]),
                io.Int.Input("max_stage", default=3, min=1, max=3, advanced=True),
@ -623,7 +623,7 @@ class SamplerSASolver(io.ComfyNode):
        return io.Schema(
            node_id="SamplerSASolver",
            search_aliases=["sde"],
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Model.Input("model"),
                io.Float.Input("eta", default=1.0, min=0.0, max=10.0, step=0.01, round=False, advanced=True),
@ -668,7 +668,7 @@ class SamplerSEEDS2(io.ComfyNode):
        return io.Schema(
            node_id="SamplerSEEDS2",
            search_aliases=["sde", "exp heun"],
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[
                io.Combo.Input("solver_type", options=["phi_1", "phi_2"]),
                io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength", advanced=True),
@ -750,7 +750,7 @@ class SamplerCustom(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None))
+        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None))
        latent["samples"] = latent_image

        if not add_noise:
@ -770,6 +770,7 @@ class SamplerCustom(io.ComfyNode):

        out = latent.copy()
        out.pop("downscale_ratio_spacial", None)
+        out.pop("downscale_ratio_temporal", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = model.model.process_latent_out(x0_output["x0"].cpu())
@ -793,7 +794,8 @@ class BasicGuider(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="BasicGuider",
-            category="sampling/custom_sampling/guiders",
+            display_name="Basic Guider",
+            category="sampling/guiders",
            inputs=[
                io.Model.Input("model"),
                io.Conditioning.Input("conditioning"),
@ -814,7 +816,8 @@ class CFGGuider(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="CFGGuider",
-            category="sampling/custom_sampling/guiders",
+            display_name="CFG Guider",
+            category="sampling/guiders",
            inputs=[
                io.Model.Input("model"),
                io.Conditioning.Input("positive"),
@ -868,7 +871,8 @@ class DualCFGGuider(io.ComfyNode):
        return io.Schema(
            node_id="DualCFGGuider",
            search_aliases=["dual prompt guidance"],
-            category="sampling/custom_sampling/guiders",
+            display_name="Dual CFG Guider",
+            category="sampling/guiders",
            inputs=[
                io.Model.Input("model"),
                io.Conditioning.Input("cond1"),
@ -896,7 +900,7 @@ class DisableNoise(io.ComfyNode):
        return io.Schema(
            node_id="DisableNoise",
            search_aliases=["zero noise"],
-            category="sampling/custom_sampling/noise",
+            category="sampling/noise",
            inputs=[],
            outputs=[io.Noise.Output()]
        )
@ -913,7 +917,7 @@ class RandomNoise(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="RandomNoise",
-            category="sampling/custom_sampling/noise",
+            category="sampling/noise",
            inputs=[io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True)],
            outputs=[io.Noise.Output()]
        )
@ -949,7 +953,7 @@ class SamplerCustomAdvanced(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None))
+        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None))
        latent["samples"] = latent_image

        noise_mask = None
@ -965,6 +969,7 @@ class SamplerCustomAdvanced(io.ComfyNode):

        out = latent.copy()
        out.pop("downscale_ratio_spacial", None)
+        out.pop("downscale_ratio_temporal", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu())
--- a/comfy_extras/nodes_flux.py
+++ b/comfy_extras/nodes_flux.py
@ -215,7 +215,7 @@ class Flux2Scheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="Flux2Scheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=4096),
                io.Int.Input("width", default=1024, min=16, max=nodes.MAX_RESOLUTION, step=1),
@ -263,7 +263,7 @@ class FluxKVCache(io.ComfyNode):
            node_id="FluxKVCache",
            display_name="Flux KV Cache",
            description="Enables KV Cache optimization for reference images on Flux family models.",
-            category="",
+            category="experimental",
            is_experimental=True,
            inputs=[
                io.Model.Input("model", tooltip="The model to use KV Cache on."),
--- a/comfy_extras/nodes_gits.py
+++ b/comfy_extras/nodes_gits.py
@ -340,7 +340,7 @@ class GITSScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="GITSScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05, advanced=True),
                io.Int.Input("steps", default=10, min=2, max=1000),
--- a/comfy_extras/nodes_images.py
+++ b/comfy_extras/nodes_images.py
@ -162,7 +162,7 @@ class ImageAddNoise(IO.ComfyNode):
            node_id="ImageAddNoise",
            search_aliases=["film grain"],
            display_name="Add Noise to Image",
-            category="image/postprocessing",
+            category="image/filters",
            inputs=[
                IO.Image.Input("image"),
                IO.Int.Input(
@ -194,7 +194,8 @@ class SaveAnimatedWEBP(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="SaveAnimatedWEBP",
-            category="image/animation",
+            display_name="Save Animated WEBP",
+            category="image",
            inputs=[
                IO.Image.Input("images"),
                IO.String.Input("filename_prefix", default="ComfyUI"),
@ -231,7 +232,8 @@ class SaveAnimatedPNG(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="SaveAnimatedPNG",
-            category="image/animation",
+            display_name="Save Animated PNG",
+            category="image",
            inputs=[
                IO.Image.Input("images"),
                IO.String.Input("filename_prefix", default="ComfyUI"),
@ -493,7 +495,7 @@ class SaveSVGNode(IO.ComfyNode):
            search_aliases=["export vector", "save vector graphics"],
            display_name="Save SVG",
            description="Save SVG files on disk.",
-            category="image/save",
+            category="image",
            inputs=[
                IO.SVG.Input("svg"),
                IO.String.Input(
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -77,7 +77,7 @@ class EmptyLTXVLatentVideo(io.ComfyNode):
    @classmethod
    def execute(cls, width, height, length, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 128, ((length - 1) // 8) + 1, height // 32, width // 32], device=comfy.model_management.intermediate_device())
-        return io.NodeOutput({"samples": latent})
+        return io.NodeOutput({"samples": latent, "downscale_ratio_spacial": 32})

    generate = execute  # TODO: remove

@ -175,7 +175,7 @@ class LTXVImgToVideoInplace(io.ComfyNode):
    generate = execute  # TODO: remove


-def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0):
+def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0, attention_mask=None):
    """Append a guide_attention_entry to both positive and negative conditioning.

    Each entry tracks one guide reference for per-reference attention control.
@ -184,9 +184,10 @@ def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_s
    new_entry = {
        "pre_filter_count": pre_filter_count,
        "strength": strength,
-        "pixel_mask": None,
+        "pixel_mask": attention_mask.unsqueeze(0).unsqueeze(0) if attention_mask is not None else None,  # reshape to (1, 1, F, H, W)
        "latent_shape": latent_shape,
    }
+
    results = []
    for cond in (positive, negative):
        # Read existing entries from this specific conditioning
@ -196,8 +197,7 @@ def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_s
            if found is not None:
                existing = found
                break
-        # Shallow copy and append (no deepcopy needed — entries contain
-        # only scalars and None for pixel_mask at this call site).
+        # Shallow copy only and append (pixel_mask is never mutated).
        entries = [*existing, new_entry]
        results.append(node_helpers.conditioning_set_values(
            cond, {"guide_attention_entries": entries}
@ -263,6 +263,12 @@ class LTXVAddGuide(io.ComfyNode):
                            "down to the nearest multiple of 8. Negative values are counted from the end of the video.",
                ),
                io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01),
+                io.Mask.Input(
+                    "attention_mask",
+                    optional=True,
+                    tooltip="Optional pixel-space spatial mask. Controls per-region "
+                            "conditioning influence via self-attention, multiplied by strength.",
+                ),
                ICLoRAParameters.Input(
                    "iclora_parameters",
                    optional=True,
@ -410,7 +416,7 @@ class LTXVAddGuide(io.ComfyNode):
        return latent_image, noise_mask

    @classmethod
-    def execute(cls, positive, negative, vae, latent, image, frame_idx, strength, iclora_parameters=None) -> io.NodeOutput:
+    def execute(cls, positive, negative, vae, latent, image, frame_idx, strength, attention_mask=None, iclora_parameters=None) -> io.NodeOutput:
        scale_factors = vae.downscale_index_formula
        latent_image = latent["samples"]
        noise_mask = get_noise_mask(latent)
@ -469,6 +475,7 @@ class LTXVAddGuide(io.ComfyNode):
        pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4]
        positive, negative = _append_guide_attention_entry(
            positive, negative, pre_filter_count, guide_latent_shape, strength=strength,
+            attention_mask=attention_mask,
        )

        return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask})
@ -594,7 +601,7 @@ class LTXVScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="LTXVScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Int.Input("steps", default=20, min=1, max=10000),
                io.Float.Input("max_shift", default=2.05, min=0.0, max=100.0, step=0.01),
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@ -83,7 +83,7 @@ class ImageCompositeMasked(IO.ComfyNode):
            node_id="ImageCompositeMasked",
            search_aliases=["overlay", "layer", "paste image", "images composition"],
            display_name="Image Composite Masked",
-            category="image",
+            category="image/compositing",
            inputs=[
                IO.Image.Input("destination"),
                IO.Image.Input("source"),
@ -112,7 +112,7 @@ class MaskToImage(IO.ComfyNode):
            node_id="MaskToImage",
            search_aliases=["convert mask"],
            display_name="Convert Mask to Image",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
            ],
@ -134,7 +134,7 @@ class ImageToMask(IO.ComfyNode):
            node_id="ImageToMask",
            search_aliases=["extract channel", "channel to mask"],
            display_name="Convert Image to Mask",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Image.Input("image"),
                IO.Combo.Input("channel", options=["red", "green", "blue", "alpha"]),
@ -157,7 +157,8 @@ class ImageColorToMask(IO.ComfyNode):
        return IO.Schema(
            node_id="ImageColorToMask",
            search_aliases=["color keying", "chroma key"],
-            category="mask",
+            display_name="Convert Image Color to Mask",
+            category="image/mask",
            inputs=[
                IO.Image.Input("image"),
                IO.Int.Input("color", default=0, min=0, max=0xFFFFFF, step=1, display_mode=IO.NumberDisplay.number),
@ -180,7 +181,8 @@ class SolidMask(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="SolidMask",
-            category="mask",
+            display_name="Create Solid Mask",
+            category="image/mask",
            inputs=[
                IO.Float.Input("value", default=1.0, min=0.0, max=1.0, step=0.01),
                IO.Int.Input("width", default=512, min=1, max=nodes.MAX_RESOLUTION, step=1),
@ -204,7 +206,7 @@ class InvertMask(IO.ComfyNode):
            node_id="InvertMask",
            search_aliases=["reverse mask", "flip mask"],
            display_name="Invert Mask",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
            ],
@ -226,7 +228,7 @@ class CropMask(IO.ComfyNode):
            node_id="CropMask",
            search_aliases=["cut mask", "extract mask region", "mask slice"],
            display_name="Crop Mask",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
                IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
@ -253,7 +255,7 @@ class MaskComposite(IO.ComfyNode):
            node_id="MaskComposite",
            search_aliases=["combine masks", "blend masks", "layer masks", "masks composition"],
            display_name="Combine Masks",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("destination"),
                IO.Mask.Input("source"),
@ -304,7 +306,7 @@ class FeatherMask(IO.ComfyNode):
            node_id="FeatherMask",
            search_aliases=["soft edge mask", "blur mask edges", "gradient mask edge"],
            display_name="Feather Mask",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
                IO.Int.Input("left", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1),
@ -330,7 +332,7 @@ class FeatherMask(IO.ComfyNode):

        for x in range(right):
            feather_rate = (x + 1) / right
-            output[:, :, -x] *= feather_rate
+            output[:, :, -(x + 1)] *= feather_rate

        for y in range(top):
            feather_rate = (y + 1) / top
@ -338,7 +340,7 @@ class FeatherMask(IO.ComfyNode):

        for y in range(bottom):
            feather_rate = (y + 1) / bottom
-            output[:, -y, :] *= feather_rate
+            output[:, -(y + 1), :] *= feather_rate

        return IO.NodeOutput(output)

@ -352,7 +354,7 @@ class GrowMask(IO.ComfyNode):
            node_id="GrowMask",
            search_aliases=["expand mask", "shrink mask"],
            display_name="Grow Mask",
-            category="mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
                IO.Int.Input("expand", default=0, min=-nodes.MAX_RESOLUTION, max=nodes.MAX_RESOLUTION, step=1),
@ -388,7 +390,8 @@ class ThresholdMask(IO.ComfyNode):
        return IO.Schema(
            node_id="ThresholdMask",
            search_aliases=["binary mask"],
-            category="mask",
+            display_name="Threshold Mask",
+            category="image/mask",
            inputs=[
                IO.Mask.Input("mask"),
                IO.Float.Input("value", default=0.5, min=0.0, max=1.0, step=0.01),
@ -414,7 +417,7 @@ class MaskPreview(IO.ComfyNode):
            node_id="MaskPreview",
            search_aliases=["show mask", "view mask", "inspect mask", "debug mask"],
            display_name="Preview Mask",
-            category="mask",
+            category="image/mask",
            description="Saves the input images to your ComfyUI output directory.",
            inputs=[
                IO.Mask.Input("mask"),
--- a/comfy_extras/nodes_mesh_postprocess.py
+++ b/comfy_extras/nodes_mesh_postprocess.py
@ -802,97 +802,127 @@ def compute_vertex_normals(verts, faces):

    return torch.nn.functional.normalize(vertex_normals, p=2, dim=-1, eps=1e-6)

-class PostProcessMesh(IO.ComfyNode):
+def _process_mesh_batch(mesh, per_item_fn):
+    """Handles list/batched/single mesh dispatching, color extraction, and stacking."""
+    mesh = copy.deepcopy(mesh)
+
+    def process_single(v, f, c, bar):
+        v, f, c = per_item_fn(v, f, c)
+        bar.update(1)
+        return v, f, c
+
+    is_list = isinstance(mesh.vertices, list)
+    is_batched_tensor = not is_list and mesh.vertices.ndim == 3
+
+    if is_list or is_batched_tensor:
+        out_v, out_f, out_c = [], [], []
+        bsz = len(mesh.vertices) if is_list else mesh.vertices.shape[0]
+        bar = comfy.utils.ProgressBar(bsz)
+
+        for i in range(bsz):
+            v_i = mesh.vertices[i]
+            f_i = mesh.faces[i]
+            c_i = None
+            if hasattr(mesh, 'vertex_colors') and mesh.vertex_colors is not None:
+                c_i = mesh.vertex_colors[i] if (isinstance(mesh.vertex_colors, list) or mesh.vertex_colors.ndim == 3) else mesh.vertex_colors
+
+            v_i, f_i, c_i = process_single(v_i, f_i, c_i, bar)
+
+            out_v.append(v_i)
+            out_f.append(f_i)
+            if c_i is not None:
+                out_c.append(c_i)
+
+        if all(v.shape == out_v[0].shape for v in out_v) and all(f.shape == out_f[0].shape for f in out_f):
+            mesh.vertices = torch.stack(out_v)
+            mesh.faces = torch.stack(out_f)
+            if out_c:
+                mesh.vertex_colors = torch.stack(out_c)
+        else:
+            mesh.vertices = out_v
+            mesh.faces = out_f
+            if out_c:
+                mesh.vertex_colors = out_c
+    else:
+        c = mesh.vertex_colors if hasattr(mesh, 'vertex_colors') and mesh.vertex_colors is not None else None
+        bar = comfy.utils.ProgressBar(1)
+        v, f, c = process_single(mesh.vertices, mesh.faces, c, bar)
+        mesh.vertices = v
+        mesh.faces = f
+        if c is not None:
+            mesh.vertex_colors = c
+
+    return IO.NodeOutput(mesh)
+
+
+class DecimateMesh(IO.ComfyNode):
    @classmethod
    def define_schema(cls):
        return IO.Schema(
-            node_id="PostProcessMesh",
-            display_name="Post Process Mesh",
+            node_id="DecimateMesh",
+            display_name="Decimate Mesh",
            category="latent/3d",
-            description=(
-            "Applies a sequence of mesh post-processing operations including optional hole filling"
-            " and mesh simplification to a target face count."
-            ),
+            description="Simplifies a mesh to a target face count using QEM.",
            inputs=[
                IO.Mesh.Input("mesh"),
-                IO.Int.Input("target_face_count", default=1_000_000, min=0, max=50_000_000,
-                             tooltip="Target maximum number of faces after mesh simplification. Set to 0 to disable simplification."),
-                IO.Float.Input("fill_holes_perimeter", default=0.03, min=0.0, step=0.0001,
-                               tooltip=(
-                                "Maximum hole perimeter threshold for filling holes in the mesh. "
-                                "Smaller values only fill tiny holes, larger values fill larger gaps. "
-                                "Set to 0 to disable hole filling."))
+                IO.Int.Input("target_face_count", default=200_000, min=0, max=50_000_000,
+                             tooltip="Target maximum number of faces. Set to 0 to disable."),
            ],
-            outputs=[
-                IO.Mesh.Output("mesh"),
-            ]
+            outputs=[IO.Mesh.Output("mesh")],
        )

    @classmethod
-    def execute(cls, mesh, target_face_count, fill_holes_perimeter):
-        mesh = copy.deepcopy(mesh)
-
-        def process_single(v, f, c, bar):
-            if fill_holes_perimeter > 0:
-                v, f = fill_holes_fn(v, f, max_perimeter=fill_holes_perimeter)
-            bar.update(1)
-
-            n = compute_vertex_normals(v, f)
+    def execute(cls, mesh, target_face_count):
+        def _fn(v, f, c):
            if target_face_count > 0 and f.shape[0] > target_face_count:
+                n = compute_vertex_normals(v, f)
                v, f, c, _ = simplify_fn_fast(v, f, colors=c, normals=n, target=target_face_count)
-            bar.update(1)
-
-            v, f, c = make_double_sided(v, f, c)
-            bar.update(1)
            return v, f, c
+        return _process_mesh_batch(mesh, _fn)

-        is_list = isinstance(mesh.vertices, list)
-        is_batched_tensor = not is_list and mesh.vertices.ndim == 3

-        if is_list or is_batched_tensor:
-            out_v, out_f, out_c = [], [],[]
-            bsz = len(mesh.vertices) if is_list else mesh.vertices.shape[0]
-            bar = comfy.utils.ProgressBar(3 * bsz)
+class FillHoles(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="FillHoles",
+            display_name="Fill Holes",
+            category="latent/3d",
+            description="Fills holes in a mesh up to a maximum perimeter threshold.",
+            inputs=[
+                IO.Mesh.Input("mesh"),
+                IO.Float.Input("max_perimeter", default=0.03, min=0.0, step=0.0001,
+                               tooltip="Maximum hole perimeter to fill. Set to 0 to disable."),
+            ],
+            outputs=[IO.Mesh.Output("mesh")],
+        )

-            for i in range(bsz):
-                v_i = mesh.vertices[i]
-                f_i = mesh.faces[i]
+    @classmethod
+    def execute(cls, mesh, max_perimeter):
+        def _fn(v, f, c):
+            if max_perimeter > 0:
+                v, f = fill_holes_fn(v, f, max_perimeter=max_perimeter)
+            return v, f, c
+        return _process_mesh_batch(mesh, _fn)

-                # Safely grab colors if they exist
-                c_i = None
-                if hasattr(mesh, 'vertex_colors') and mesh.vertex_colors is not None:
-                    c_i = mesh.vertex_colors[i] if (isinstance(mesh.vertex_colors, list) or mesh.vertex_colors.ndim == 3) else mesh.vertex_colors

-                v_i, f_i, c_i = process_single(v_i, f_i, c_i, bar)
+class MakeDoubleSided(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="MakeDoubleSided",
+            display_name="Make Double Sided",
+            category="latent/3d",
+            description="Duplicates faces with flipped normals so the mesh renders from both sides.",
+            inputs=[IO.Mesh.Input("mesh")],
+            outputs=[IO.Mesh.Output("mesh")],
+        )

-                out_v.append(v_i)
-                out_f.append(f_i)
-                if c_i is not None:
-                    out_c.append(c_i)
-
-            # If the output meshes happen to have the exact same shape, stack them nicely.
-            # Otherwise, just leave them as a List! (ComfyUI native standard)
-            if all(v.shape == out_v[0].shape for v in out_v) and all(f.shape == out_f[0].shape for f in out_f):
-                mesh.vertices = torch.stack(out_v)
-                mesh.faces = torch.stack(out_f)
-                if out_c:
-                    mesh.vertex_colors = torch.stack(out_c)
-            else:
-                mesh.vertices = out_v
-                mesh.faces = out_f
-                if out_c:
-                    mesh.vertex_colors = out_c
-
-        else:
-            # Single Unbatched Mesh[V, 3]
-            c = mesh.vertex_colors if hasattr(mesh, 'vertex_colors') and mesh.vertex_colors is not None else None
-            v, f, c = process_single(mesh.vertices, mesh.faces, c)
-            mesh.vertices = v
-            mesh.faces = f
-            if c is not None:
-                mesh.vertex_colors = c
-
-        return IO.NodeOutput(mesh)
+    @classmethod
+    def execute(cls, mesh):
+        def _fn(v, f, c):
+            return make_double_sided(v, f, c)
+        return _process_mesh_batch(mesh, _fn)



@ -900,7 +930,9 @@ class PostProcessMeshExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
        return [
-            PostProcessMesh,
+            MakeDoubleSided,
+            FillHoles,
+            DecimateMesh,
            PaintMesh
        ]

--- a/comfy_extras/nodes_model_merging.py
+++ b/comfy_extras/nodes_model_merging.py
@ -276,8 +276,8 @@ class CLIPSave:
                for x in extra_pnginfo:
                    metadata[x] = json.dumps(extra_pnginfo[x])

-        comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True)
-        clip_sd = clip.get_sd()
+        clip.load_model()
+        clip_sd = clip.state_dict_for_saving()

        for prefix in ["clip_l.", "clip_g.", "clip_h.", "t5xxl.", "pile_t5xl.", "mt5xl.", "umt5xxl.", "t5base.", "gemma2_2b.", "llama.", "hydit_clip.", ""]:
            k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys()))
--- a/comfy_extras/nodes_morphology.py
+++ b/comfy_extras/nodes_morphology.py
@ -13,8 +13,8 @@ class Morphology(io.ComfyNode):
        return io.Schema(
            node_id="Morphology",
            search_aliases=["erode", "dilate"],
-            display_name="ImageMorphology",
-            category="image/postprocessing",
+            display_name="Apply Morphology",
+            category="image/filters",
            inputs=[
                io.Image.Input("image"),
                io.Combo.Input(
--- a/comfy_extras/nodes_nop.py
+++ b/comfy_extras/nodes_nop.py
@ -13,7 +13,7 @@ class wanBlockSwap(io.ComfyNode):
        return io.Schema(
            node_id="wanBlockSwap",
            category="",
-            description="NOP",
+            description="Intercept wanBlockSwap custom node that causes major instability and make it no-op.",
            inputs=[
                io.Model.Input("model"),
            ],
--- a/comfy_extras/nodes_number_convert.py
+++ b/comfy_extras/nodes_number_convert.py
@ -20,7 +20,7 @@ class NumberConvertNode(io.ComfyNode):
    def define_schema(cls) -> io.Schema:
        return io.Schema(
            node_id="ComfyNumberConvert",
-            display_name="Number Convert",
+            display_name="Convert Number",
            category="utils",
            search_aliases=[
                "int to float", "float to int", "number convert",
--- a/comfy_extras/nodes_optimalsteps.py
+++ b/comfy_extras/nodes_optimalsteps.py
@ -31,7 +31,7 @@ class OptimalStepsScheduler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="OptimalStepsScheduler",
-            category="sampling/custom_sampling/schedulers",
+            category="sampling/schedulers",
            inputs=[
                io.Combo.Input("model_type", options=["FLUX", "Wan", "Chroma"]),
                io.Int.Input("steps", default=20, min=3, max=1000),
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@ -22,7 +22,7 @@ class Blend(io.ComfyNode):
            node_id="ImageBlend",
            search_aliases=["mix images"],
            display_name="Blend Images",
-            category="image/postprocessing",
+            category="image/filters",
            essentials_category="Image Tools",
            inputs=[
                io.Image.Input("image1"),
@ -80,8 +80,8 @@ class Blur(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="ImageBlur",
-            display_name="Image Blur",
-            category="image/postprocessing",
+            display_name="Blur Image",
+            category="image/filters",
            inputs=[
                io.Image.Input("image"),
                io.Int.Input("blur_radius", default=1, min=1, max=31, step=1),
@ -117,7 +117,7 @@ class Quantize(io.ComfyNode):
        return io.Schema(
            node_id="ImageQuantize",
            display_name="Quantize Image",
-            category="image/postprocessing",
+            category="image/filters",
            inputs=[
                io.Image.Input("image"),
                io.Int.Input("colors", default=256, min=1, max=256, step=1),
@ -183,7 +183,7 @@ class Sharpen(io.ComfyNode):
        return io.Schema(
            node_id="ImageSharpen",
            display_name="Sharpen Image",
-            category="image/postprocessing",
+            category="image/filters",
            inputs=[
                io.Image.Input("image"),
                io.Int.Input("sharpen_radius", default=1, min=1, max=31, step=1, advanced=True),
@ -568,7 +568,7 @@ def batch_latents(latents: list[dict[str, torch.Tensor]]) -> dict[str, torch.Ten
 class BatchImagesNode(io.ComfyNode):
    @classmethod
    def define_schema(cls):
-        autogrow_template = io.Autogrow.TemplatePrefix(io.Image.Input("image"), prefix="image", min=2, max=50)
+        autogrow_template = io.Autogrow.TemplatePrefix(io.Image.Input("image"), prefix="image", min=1, max=50)
        return io.Schema(
            node_id="BatchImagesNode",
            display_name="Batch Images",
@ -590,12 +590,12 @@ class BatchImagesNode(io.ComfyNode):
 class BatchMasksNode(io.ComfyNode):
    @classmethod
    def define_schema(cls):
-        autogrow_template = io.Autogrow.TemplatePrefix(io.Mask.Input("mask"), prefix="mask", min=2, max=50)
+        autogrow_template = io.Autogrow.TemplatePrefix(io.Mask.Input("mask"), prefix="mask", min=1, max=50)
        return io.Schema(
            node_id="BatchMasksNode",
            search_aliases=["combine masks", "stack masks", "merge masks"],
            display_name="Batch Masks",
-            category="mask",
+            category="image/mask",
            inputs=[
                io.Autogrow.Input("masks", template=autogrow_template)
            ],
@ -611,7 +611,7 @@ class BatchMasksNode(io.ComfyNode):
 class BatchLatentsNode(io.ComfyNode):
    @classmethod
    def define_schema(cls):
-        autogrow_template = io.Autogrow.TemplatePrefix(io.Latent.Input("latent"), prefix="latent", min=2, max=50)
+        autogrow_template = io.Autogrow.TemplatePrefix(io.Latent.Input("latent"), prefix="latent", min=1, max=50)
        return io.Schema(
            node_id="BatchLatentsNode",
            search_aliases=["combine latents", "stack latents", "merge latents"],
@ -670,8 +670,8 @@ class ColorTransfer(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="ColorTransfer",
-            display_name="Color Transfer",
-            category="image/postprocessing",
+            display_name="Transfer Color",
+            category="image/filters",
            description="Match the colors of one image to another using various algorithms.",
            search_aliases=["color match", "color grading", "color correction", "match colors", "color transform", "mkl", "reinhard", "histogram"],
            inputs=[
--- a/comfy_extras/nodes_rtdetr.py
+++ b/comfy_extras/nodes_rtdetr.py
@ -15,7 +15,7 @@ class RTDETR_detect(io.ComfyNode):
        return io.Schema(
            node_id="RTDETR_detect",
            display_name="RT-DETR Detect",
-            category="detection",
+            category="image/detection",
            search_aliases=["bbox", "bounding box", "object detection", "coco"],
            inputs=[
                io.Model.Input("model", display_name="model"),
@ -71,7 +71,7 @@ class DrawBBoxes(io.ComfyNode):
        return io.Schema(
            node_id="DrawBBoxes",
            display_name="Draw BBoxes",
-            category="detection",
+            category="image/detection",
            search_aliases=["bbox", "bounding box", "object detection", "rt_detr", "visualize detections", "coco"],
            inputs=[
                io.Image.Input("image", optional=True),
--- a/comfy_extras/nodes_sam3.py
+++ b/comfy_extras/nodes_sam3.py
@ -93,7 +93,7 @@ class SAM3_Detect(io.ComfyNode):
        return io.Schema(
            node_id="SAM3_Detect",
            display_name="SAM3 Detect",
-            category="detection",
+            category="image/detection",
            search_aliases=["sam3", "segment anything", "open vocabulary", "text detection", "segment"],
            inputs=[
                io.Model.Input("model", display_name="model"),
@ -265,7 +265,7 @@ class SAM3_VideoTrack(io.ComfyNode):
        return io.Schema(
            node_id="SAM3_VideoTrack",
            display_name="SAM3 Video Track",
-            category="detection",
+            category="image/detection",
            search_aliases=["sam3", "video", "track", "propagate"],
            inputs=[
                io.Image.Input("images", display_name="images", tooltip="Video frames as batched images"),
@ -320,7 +320,7 @@ class SAM3_TrackPreview(io.ComfyNode):
        return io.Schema(
            node_id="SAM3_TrackPreview",
            display_name="SAM3 Track Preview",
-            category="detection",
+            category="image/detection",
            inputs=[
                SAM3TrackData.Input("track_data", display_name="track_data"),
                io.Image.Input("images", display_name="images", optional=True),
@ -478,7 +478,7 @@ class SAM3_TrackToMask(io.ComfyNode):
        return io.Schema(
            node_id="SAM3_TrackToMask",
            display_name="SAM3 Track to Mask",
-            category="detection",
+            category="image/detection",
            inputs=[
                SAM3TrackData.Input("track_data", display_name="track_data"),
                io.String.Input("object_indices", display_name="object_indices", default="",
--- a/comfy_extras/nodes_sdpose.py
+++ b/comfy_extras/nodes_sdpose.py
@ -353,7 +353,8 @@ class SDPoseDrawKeypoints(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SDPoseDrawKeypoints",
-            category="image/preprocessors",
+            display_name="SDPose Draw Keypoints",
+            category="image/detection",
            search_aliases=["openpose", "pose detection", "preprocessor", "keypoints", "pose"],
            inputs=[
                io.Custom("POSE_KEYPOINT").Input("keypoints"),
@ -421,7 +422,8 @@ class SDPoseKeypointExtractor(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SDPoseKeypointExtractor",
-            category="image/preprocessors",
+            display_name="SDPose Keypoint Extractor",
+            category="image/detection",
            search_aliases=["openpose", "pose detection", "preprocessor", "keypoints", "sdpose"],
            description="Extract pose keypoints from images using the SDPose model: https://huggingface.co/Comfy-Org/SDPose/tree/main/checkpoints",
            inputs=[
@ -595,7 +597,8 @@ class SDPoseFaceBBoxes(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="SDPoseFaceBBoxes",
-            category="image/preprocessors",
+            display_name="SDPose Face Bounding Boxes",
+            category="image/detection",
            search_aliases=["face bbox", "face bounding box", "pose", "keypoints"],
            inputs=[
                io.Custom("POSE_KEYPOINT").Input("keypoints"),
@ -652,7 +655,8 @@ class CropByBBoxes(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="CropByBBoxes",
-            category="image/preprocessors",
+            display_name="Crop By Bounding Boxes",
+            category="image/transform",
            search_aliases=["crop", "face crop", "bbox crop", "pose", "bounding box"],
            description="Crop and resize regions from the input image batch based on provided bounding boxes.",
            inputs=[
--- a/comfy_extras/nodes_string.py
+++ b/comfy_extras/nodes_string.py
@ -1,10 +1,41 @@
 import re
 import json
+import string
 from typing_extensions import override

 from comfy_api.latest import ComfyExtension, io


+class StringFormat(io.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        autogrow = io.Autogrow.TemplateNames(
+            input=io.AnyType.Input("value"),
+            names=list(string.ascii_lowercase),
+            min=0,
+        )
+        return io.Schema(
+            node_id="StringFormat",
+            display_name="Format Text",
+            category="text",
+            search_aliases=["string", "format"],
+            description="Same as Python's string format method. Supports all of Python's format options and features.",
+            inputs=[
+                io.Autogrow.Input("values", template=autogrow),
+                io.String.Input("f_string", default="{a}", multiline=True),
+            ],
+            outputs=[
+                io.String.Output(),
+            ],
+        )
+
+    @classmethod
+    def execute(
+        cls, values: io.Autogrow.Type, f_string: str
+    ) -> io.NodeOutput:
+        return io.NodeOutput(f_string.format(**values))
+
+
 class StringConcatenate(io.ComfyNode):
    @classmethod
    def define_schema(cls):
@ -413,6 +444,7 @@ class StringExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[io.ComfyNode]]:
        return [
+            StringFormat,
            StringConcatenate,
            StringSubstring,
            StringLength,
--- a/comfy_extras/nodes_trellis2.py
+++ b/comfy_extras/nodes_trellis2.py
@ -8,7 +8,6 @@ import numpy as np
 import torch

 ShapeSubdivides = io.Custom("SHAPE_SUBDIVIDES")
-HighResVoxel = io.Custom("HIGH_RES_VOXEL")

 def prepare_trellis_vae_for_decode(vae, sample_shape):
    memory_required = vae.memory_used_decode(sample_shape, vae.vae_dtype)
@ -297,7 +296,7 @@ class Trellis2UpsampleCascade(IO.ComfyNode):
                            ))
            ],
            outputs=[
-                HighResVoxel.Output(
+                IO.Voxel.Output(
                "high_res_voxel",
                tooltip=(
                    "High-resolution sparse coordinates produced after cascade upsampling. "
@ -389,11 +388,11 @@ class Trellis2UpsampleCascade(IO.ComfyNode):
            final_coords_list.append(final_coords_i)
            output_coord_counts.append(int(final_coords_i.shape[0]))

-        output = {
-            "coords": torch.cat(final_coords_list, dim=0),
-            "coord_counts": torch.tensor(output_coord_counts, dtype=torch.int64),
-            "resolutions": torch.full((len(final_coords_list),), int(hr_resolution), dtype=torch.int64),
-        }
+        coords = torch.cat(final_coords_list, dim=0)
+        output = Types.VOXEL(coords)
+        output.coord_counts = torch.tensor(output_coord_counts, dtype=torch.int64)
+        output.resolutions = torch.full((len(final_coords_list),), int(hr_resolution), dtype=torch.int64)
+        output.upsampled = True

        return IO.NodeOutput(output,)

@ -537,9 +536,8 @@ class EmptyTrellis2ShapeLatent(IO.ComfyNode):
            node_id="EmptyTrellis2ShapeLatent",
            category="latent/3d",
            inputs=[
-                IO.MultiType.Input(
+                IO.Voxel.Input(
                    "voxel",
-                    types=[IO.Voxel, HighResVoxel],
                    tooltip=(
                        "Shape structure input. Accepts either a voxel structure "
                        "or upsampled voxel coordinates from a previous cascade stage."
@ -555,20 +553,18 @@ class EmptyTrellis2ShapeLatent(IO.ComfyNode):
    def execute(cls, voxel):
        # to accept the upscaled coords
        is_512_pass = False
+        upsampled = hasattr(voxel, "upsampled")
+        if upsampled:
+            voxel = voxel.data

-        if isinstance(voxel, dict):
-            voxel = voxel["coords"]
-
-        if hasattr(voxel, "data") and voxel.data.ndim == 4:
+        if not upsampled:
            decoded = voxel.data.unsqueeze(1)
            coords = torch.argwhere(decoded.bool())[:, [0, 2, 3, 4]].int()
            is_512_pass = True

-        elif isinstance(voxel, torch.Tensor) and voxel.ndim == 2:
+        else:
            coords = voxel.int()
            is_512_pass = False
-        else:
-            raise ValueError(f"Invalid input to EmptyTrellis2ShapeLatent: {type(voxel)}")

        batch_size, counts, max_tokens = infer_batched_coord_layout(coords)
        in_channels = 32
@ -589,9 +585,8 @@ class EmptyTrellis2LatentTexture(IO.ComfyNode):
            node_id="EmptyTrellis2LatentTexture",
            category="latent/3d",
            inputs=[
-                IO.MultiType.Input(
+                IO.Voxel.Input(
                    "voxel",
-                    types=[IO.Voxel, HighResVoxel],
                    tooltip=(
                        "Shape structure input. Accepts either a voxel structure "
                        "or upsampled voxel coordinates from a previous cascade stage."
@ -607,13 +602,14 @@ class EmptyTrellis2LatentTexture(IO.ComfyNode):
    @classmethod
    def execute(cls, voxel, shape_latent):
        channels = 32
-        if isinstance(voxel, dict):
-            voxel = voxel["coords"]
-        if hasattr(voxel, "data") and voxel.data.ndim == 4:
+        upsampled = hasattr(voxel, "upsampled")
+        if upsampled:
+            voxel = voxel.data
+
+        if not upsampled:
            decoded = voxel.data.unsqueeze(1)
            coords = torch.argwhere(decoded.bool())[:, [0, 2, 3, 4]].int()
-
-        elif isinstance(voxel, torch.Tensor) and voxel.ndim == 2:
+        else:
            coords = voxel.int()

        batch_size, counts, max_tokens = infer_batched_coord_layout(coords)
--- a/comfy_extras/nodes_video_model.py
+++ b/comfy_extras/nodes_video_model.py
@ -65,7 +65,7 @@ class VideoLinearCFGGuidance:
    RETURN_TYPES = ("MODEL",)
    FUNCTION = "patch"

-    CATEGORY = "sampling/video_models"
+    CATEGORY = "sampling/guiders"

    def patch(self, model, min_cfg):
        def linear_cfg(args):
@ -89,7 +89,7 @@ class VideoTriangleCFGGuidance:
    RETURN_TYPES = ("MODEL",)
    FUNCTION = "patch"

-    CATEGORY = "sampling/video_models"
+    CATEGORY = "sampling/guiders"

    def patch(self, model, min_cfg):
        def linear_cfg(args):
@ -157,5 +157,7 @@ NODE_CLASS_MAPPINGS = {
 }

 NODE_DISPLAY_NAME_MAPPINGS = {
-    "ImageOnlyCheckpointLoader": "Image Only Checkpoint Loader (img2vid model)",
+    "ImageOnlyCheckpointLoader": "Load Checkpoint Image Only (img2vid model)",
+    "VideoLinearCFGGuidance": "Video Linear CFG Guidance",
+    "VideoTriangleCFGGuidance": "Video Triangle CFG Guidance",
 }
--- a/comfy_extras/nodes_void.py
+++ b/comfy_extras/nodes_void.py
@ -122,7 +122,8 @@ class VOIDQuadmaskPreprocess(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="VOIDQuadmaskPreprocess",
-            category="mask/video",
+            display_name="VOID Quadmask Preprocessor",
+            category="image/mask",
            inputs=[
                io.Mask.Input("mask"),
                io.Int.Input("dilate_width", default=0, min=0, max=50, step=1,
@ -392,7 +393,7 @@ class VOIDWarpedNoiseSource(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="VOIDWarpedNoiseSource",
-            category="sampling/custom_sampling/noise",
+            category="sampling/noise",
            inputs=[
                io.Latent.Input("warped_noise",
                    tooltip="Warped noise latent from VOIDWarpedNoise"),
@ -454,7 +455,7 @@ class VOIDSampler(io.ComfyNode):
    def define_schema(cls):
        return io.Schema(
            node_id="VOIDSampler",
-            category="sampling/custom_sampling/samplers",
+            category="sampling/samplers",
            inputs=[],
            outputs=[io.Sampler.Output()],
        )
--- a/execution.py
+++ b/execution.py
@ -626,7 +626,7 @@ async def execute(server, dynprompt, caches, current_item, extra_data, executed,

        if comfy.model_management.is_oom(ex):
            tips = "This error means you ran out of memory on your GPU.\n\nTIPS: If the workflow worked before you might have accidentally set the batch_size to a large number."
-            logging.info("Memory summary: {}".format(comfy.model_management.debug_memory_summary()))
+            logging.info("Memory summary:\n{}".format(comfy.model_management.debug_memory_summary()))
            logging.error("Got an OOM, unloading all loaded models.")
            comfy.model_management.unload_all_models()
        elif isinstance(ex, RuntimeError) and ("mat1 and mat2 shapes" in str(ex)) and "Sampler" in class_type:
--- a/nodes.py
+++ b/nodes.py
@ -691,7 +691,7 @@ class LoraLoader:
    FUNCTION = "load_lora"

    CATEGORY = "loaders"
-    DESCRIPTION = "LoRAs are used to modify diffusion and CLIP models, altering the way in which latents are denoised such as applying styles. Multiple LoRA nodes can be linked together."
+    DESCRIPTION = "This LoRA loader is used to modify both diffusion and CLIP models, altering the way in which latents are denoised such as applying styles. Multiple LoRA nodes can be linked together."
    SEARCH_ALIASES = ["lora", "load lora", "apply lora", "lora loader", "lora model"]

    def load_lora(self, model, clip, lora_name, strength_model, strength_clip):
@ -723,6 +723,7 @@ class LoraLoaderModelOnly(LoraLoader):
                              "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}),
                              }}
    RETURN_TYPES = ("MODEL",)
+    DESCRIPTION = "This LoRAs loader is used to modify the diffusion model, altering the way in which latents are denoised such as applying styles. Multiple LoRA nodes can be linked together."
    FUNCTION = "load_lora_model_only"

    def load_lora_model_only(self, model, lora_name, strength_model):
@ -1524,7 +1525,7 @@ class SetLatentNoiseMask:

 def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
    latent_image = latent["samples"]
-    latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None))
+    latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None))

    if disable_noise:
        noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
@ -1547,6 +1548,7 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive,
                                  force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
    out = latent.copy()
    out.pop("downscale_ratio_spacial", None)
+    out.pop("downscale_ratio_temporal", None)
    out["samples"] = samples
    return (out, )

@ -1779,7 +1781,7 @@ class LoadImageMask(LoadImage):
            }
        }

-    CATEGORY = "mask"
+    CATEGORY = "image"
    RETURN_TYPES = ("MASK",)
    FUNCTION = "load_image_mask"

--- a/openapi.yaml
+++ b/openapi.yaml
@ -485,8 +485,15 @@ paths:
    post:
      operationId: uploadMask
      tags: [upload]
-      summary: Upload a mask image
-      description: Uploads a mask image associated with a previously-uploaded reference image.
+      deprecated: true
+      summary: Upload a mask image (deprecated)
+      description: |
+        Deprecated. Clients should composite the mask onto the source image
+        client-side and upload the resulting image via POST /api/upload/image
+        instead. This endpoint will continue to function for older clients,
+        but will not receive new features.
+
+        Uploads a mask image associated with a previously-uploaded reference image.
      requestBody:
        required: true
        content:
@ -4153,6 +4160,10 @@ paths:
                name:
                  type: string
                  description: Display name for the API key
+                description:
+                  type: string
+                  description: User-provided description of the key's purpose
+                  maxLength: 5000
      responses:
        "201":
          description: API key created
@ -6344,14 +6355,6 @@ components:
          type: integer
          format: int64
          description: Size of the asset in bytes
-        width:
-          type: integer
-          nullable: true
-          description: "Original image width in pixels. Null for non-image assets or assets ingested before dimension extraction."
-        height:
-          type: integer
-          nullable: true
-          description: "Original image height in pixels. Null for non-image assets or assets ingested before dimension extraction."
        mime_type:
          type: string
          description: MIME type of the asset
@ -7678,11 +7681,16 @@ components:
      required:
        - id
        - name
+        - description
      properties:
        id:
          type: string
        name:
          type: string
+        description:
+          type: string
+          maxLength: 5000
+          description: User-provided description of the key's purpose. Always present in responses; empty string when no description was supplied on create.
        prefix:
          type: string
          description: First few characters of the key for identification
@ -7703,12 +7711,17 @@ components:
      required:
        - id
        - name
+        - description
        - key
      properties:
        id:
          type: string
        name:
          type: string
+        description:
+          type: string
+          maxLength: 5000
+          description: User-provided description of the key's purpose. Always present in responses; empty string when no description was supplied on create.
        key:
          type: string
          description: Full API key value (only returned on creation)
Author	SHA1	Message	Date
Yousef Rafat	60f0ec8d69	Merge branch 'trellis2' of https://github.com/yousef-rafat/ComfyUI into pr/12183	2026-05-20 17:15:56 +03:00
Yousef Rafat	2b2a1a3cd0	remove triton, custom datatype, split mesh postpro	2026-05-20 17:15:33 +03:00
Yousef R. Gamaleldin	32e2fa9630	Merge branch 'master' into trellis2	2026-05-20 10:56:30 +03:00
comfyanonymous	72e3f6081c	Add downscale ratio to empty ltxv latent. (#13999 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-19 20:28:06 -07:00
Pauan	7ec7b6ffe9	Adding new StringFormat node (#13997 )	2026-05-20 10:25:49 +08:00
Matt Miller	6887165a9d	docs(openapi): tighten workspace API key description field (BE-1004) (#13996 ) Aligns the OSS spec with the cloud-side BE-1004 contract: - createWorkspaceApiKey request body: add maxLength: 5000 to the description property (matches cloud's hub_profile.description MaxLen(5000) convention; enforced cloud-side via handler check). - WorkspaceApiKey + WorkspaceApiKeyCreated response schemas: mark description as required (cloud's handler always populates the field, defaulting to empty string when not supplied on create), drop nullable: true, add maxLength: 5000 for symmetry, and clarify the doc string ("Always present in responses; empty string when no description was supplied on create"). Both schemas are tagged x-runtime: [cloud] at the schema level so the tightening is correctly scoped — OSS-only implementations are not required to honor the workspace API keys endpoints at all. Related cloud PR: Comfy-Org/cloud#3747	2026-05-19 16:55:04 -07:00
Matt Miller	cc4d711eb1	feat(openapi): add optional description field to workspace API key schemas (#13993 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details * feat(openapi): add optional description field to workspace API key schemas Add an optional `description` property (type: string) to three workspace API key schemas in openapi.yaml: - Inline request body of createWorkspaceApiKey (POST /api/workspace/api-keys) - WorkspaceApiKey (list/info schema) - WorkspaceApiKeyCreated (creation response schema) The field is not added to any `required` array, making it fully backward-compatible with existing clients. Refs: BE-1005, BE-1004 Co-authored-by: Matt Miller <mattmillerai@users.noreply.github.com> * fix(openapi): mark description nullable in workspace API key response schemas Per CodeRabbit review on PR #13993: the underlying DB column is nullable varchar (default ''), so the response schemas should permit null to match stored data reality. Without nullable: true the OpenAPI contract would require coercion on the handler side or risk a contract violation. Request schema unchanged — clients shouldn't be sending null on create.	2026-05-19 14:48:47 -07:00
yy	626b082838	Fix typo in ops.py (#11925 )	2026-05-20 05:45:04 +08:00
Matt Miller	d0328b442d	docs(openapi): remove top-level width/height fields on Asset schema (#13973 ) These two fields were added recently to the Asset schema as nullable integers, with the intent of exposing original image dimensions for FE consumers (cloud-side thumbnailing makes naturalWidth/Height return the wrong size for an image card's dimension label). The implementation effort that consumes them subsequently converged on a different shape — dimensions nested under the existing free-form `metadata` JSON field as `{kind: "image", width, height}` — to avoid introducing type-specific flat fields on the canonical Asset shape, and to leave room for forward-compatible additions (video duration, fps, etc.) without further schema churn. This removes the now-unused top-level fields so the spec reflects the agreed direction. No other schema definitions reference these fields directly: AssetCreated, AssetUpdated, etc. inherit Asset via allOf and do not redefine them. The runtime ingest implementation that would have populated these fields was not yet shipped, so no clients are relying on the top-level shape. Co-authored-by: Alexis Rolland <alexisrolland@hotmail.com>	2026-05-19 10:00:26 -07:00
Matt Miller	6b61918a16	docs(openapi): deprecate /api/upload/mask in favor of /api/upload/image (#13968 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Mark the uploadMask operation as deprecated and point clients at /api/upload/image. The mask-compositing behavior the endpoint provides (alpha-compositing the supplied mask onto an original_ref image) is now expected to happen client-side, with the composited result uploaded through the unified /api/upload/image path. The endpoint continues to function for older clients; no runtime behavior changes ship with this commit. Only the OpenAPI annotation and the human-facing description are updated.	2026-05-19 12:19:51 +08:00
comfyanonymous	a4382e056e	Use temporal downscale to make empty audio latent nodes more reusable. (#13975 )	2026-05-19 00:14:30 -04:00
Alexis Rolland	d71cc1c8f2	chore: Various QoL updates of nodes display names, descriptions and categories (CORE-190, CORE-191) (#13830 ) * Move detection category under image category * Add missing categories * Move detection nodes to detection category * Move save nodes to image root catefory * Rename postprocessors * Move mask category under image * Move guiders category to parent level at root of sampling category * Move custom_sampling category to parent level at the root of sampling category * Modify description of LoRA loaders * Fix node id SolidMask * Move VOID Quadmask under image/mask * Group compositing nodes under image/compositing * Move load image as mask to image category for consistency with other load image nodes * Align display name with Load Checkpoint * Move dataset category under training category * Rename Number Convert to Conver Number (verb first) * Rename Canny node * Revert wanBlockSwap + description * Add description to RemoveBackground node * Revert category update of dataset	2026-05-19 00:13:48 -04:00
comfyanonymous	990a7ae7f2	Initial work to make downscale_ratio_temporal work. (#13972 )	2026-05-18 23:01:43 -04:00
Jedrzej Kosinski	df2454b47e	Reduce min for Batch Image/Mask/Latent nodes from 2 to 1 (#13721 )	2026-05-19 09:50:14 +08:00
drozbay	292814c31e	feat: Add optional attention_mask input to LTXVAddGuide (CORE-220) (#13965 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details	2026-05-19 05:07:04 +08:00
Yousef R. Gamaleldin	187e5237e1	Fix BiRefNet issue (#13966 )	2026-05-19 05:03:22 +08:00
Alexander Piskun	164a9d4bbb	[Partner Nodes] add ByteDance Seed LLM node (#13919 ) Signed-off-by: bigcat88 <bigcat88@icloud.com>	2026-05-18 13:06:13 -07:00
rattus	16f862f02a	implement dynamic clip saving (#13959 ) Fix clip saving by doing the same patching process and diffusion models.	2026-05-18 11:46:40 -07:00
Alvin Tang	d4c6c9eff8	fix(FeatherMask): correct negative zero indexing for right/bottom feathering (#12881 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-18 20:22:15 +08:00
Alexander Piskun	264b003286	[Partner Nodes] fix Opus 4.7 sending deprecated temperature parameter (#13955 )	2026-05-18 09:53:31 +03:00
Jukka Seppänen	971c9e3518	HiDream-O1: support area conditioning (#13944 )	2026-05-18 01:17:05 -04:00
Jukka Seppänen	b39af210d0	Fix Qwen3.5 text generation with multiple input images (#13943 )	2026-05-18 01:16:42 -04:00
apophis	aeadb7acaa	correct OOM format (#13950 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-18 12:06:45 +08:00
comfyanonymous	f48d2a017e	Log which quant ops are enabled/emulated. (#13946 ) Some checks are pending Python Linting / Run Ruff (push) Waiting to run Details Python Linting / Run Pylint (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.10, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.11, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-stable (12.1, , linux, 3.12, [self-hosted Linux], stable) (push) Waiting to run Details Full Comfy CI Workflow Runs / test-unix-nightly (12.1, , linux, 3.11, [self-hosted Linux], nightly) (push) Waiting to run Details Execution Tests / test (macos-latest) (push) Waiting to run Details Execution Tests / test (ubuntu-latest) (push) Waiting to run Details Execution Tests / test (windows-latest) (push) Waiting to run Details Test server launches without errors / test (push) Waiting to run Details Unit Tests / test (macos-latest) (push) Waiting to run Details Unit Tests / test (ubuntu-latest) (push) Waiting to run Details Unit Tests / test (windows-2022) (push) Waiting to run Details	2026-05-17 16:30:54 -04:00