From b1c7291569daaeec34ccf6df25d57886eb73d98e Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 30 Apr 2025 11:18:20 -0700
Subject: [PATCH 01/16] Test updater in the windows release workflow. (#7886)

---
 .github/workflows/windows_release_package.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml
index 80a45b321..3926a65f3 100644
--- a/.github/workflows/windows_release_package.yml
+++ b/.github/workflows/windows_release_package.yml
@@ -88,6 +88,8 @@ jobs:
             cd ComfyUI_windows_portable
             python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
 
+            python_embeded/python.exe -s ./update/update.py ComfyUI/
+
             ls
 
         - name: Upload binaries to release

From 39c27a37052b5e18850a6c63cd0a4b92131db3ba Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Wed, 30 Apr 2025 11:42:18 -0700
Subject: [PATCH 02/16] Add updater test to stable release workflow. (#7887)

---
 .github/workflows/stable-release.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/stable-release.yml b/.github/workflows/stable-release.yml
index c4302cdd6..a046ff9ea 100644
--- a/.github/workflows/stable-release.yml
+++ b/.github/workflows/stable-release.yml
@@ -91,6 +91,8 @@ jobs:
           cd ComfyUI_windows_portable
           python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
 
+          python_embeded/python.exe -s ./update/update.py ComfyUI/
+
           ls
 
       - name: Upload binaries to release

From 4ca3d842774421968cc19c319ee96daa58a98fbb Mon Sep 17 00:00:00 2001
From: Silver <65376327+silveroxides@users.noreply.github.com>
Date: Thu, 1 May 2025 02:57:00 +0200
Subject: [PATCH 03/16] Support for Chroma - Flux1 Schnell distilled with CFG
 (#7355)

* Upload files for Chroma Implementation

* Remove trailing whitespace

* trim more trailing whitespace..oops

* remove unused imports

* Add supported_inference_dtypes

* Set min_length to 0 and remove attention_mask=True

* Set min_length to 1

* get_mdulations added from blepping and minor changes

* Add lora conversion if statement in lora.py

* Update supported_models.py

* update model_base.py

* add uptream commits

* set modelType.FLOW, will cause beta scheduler to work properly

* Adjust memory usage factor and remove unnecessary code

* fix mistake

* reduce code duplication

* remove unused imports

* refactor for upstream sync

* sync chroma-support with upstream via syncbranch patch

* Update sd.py

* Add Chroma as option for the OptimalStepsScheduler node
---
 comfy/ldm/chroma/layers.py         | 183 +++++++++++++++++++
 comfy/ldm/chroma/math.py           |  44 +++++
 comfy/ldm/chroma/model.py          | 271 +++++++++++++++++++++++++++++
 comfy/lora.py                      |   2 +-
 comfy/model_base.py                |  62 +++++++
 comfy/model_detection.py           |  26 +++
 comfy/sd.py                        |   5 +
 comfy/supported_models.py          |  30 +++-
 comfy/text_encoders/chroma.py      |  43 +++++
 comfy_extras/nodes_optimalsteps.py |   3 +-
 nodes.py                           |   2 +-
 11 files changed, 667 insertions(+), 4 deletions(-)
 create mode 100644 comfy/ldm/chroma/layers.py
 create mode 100644 comfy/ldm/chroma/math.py
 create mode 100644 comfy/ldm/chroma/model.py
 create mode 100644 comfy/text_encoders/chroma.py

diff --git a/comfy/ldm/chroma/layers.py b/comfy/ldm/chroma/layers.py
new file mode 100644
index 000000000..dd0b72f70
--- /dev/null
+++ b/comfy/ldm/chroma/layers.py
@@ -0,0 +1,183 @@
+import torch
+from torch import Tensor, nn
+
+from .math import attention
+from comfy.ldm.flux.layers import (
+    MLPEmbedder,
+    RMSNorm,
+    QKNorm,
+    SelfAttention,
+    ModulationOut,
+)
+
+
+
+class ChromaModulationOut(ModulationOut):
+    @classmethod
+    def from_offset(cls, tensor: torch.Tensor, offset: int = 0) -> ModulationOut:
+        return cls(
+            shift=tensor[:, offset : offset + 1, :],
+            scale=tensor[:, offset + 1 : offset + 2, :],
+            gate=tensor[:, offset + 2 : offset + 3, :],
+        )
+
+
+
+
+class Approximator(nn.Module):
+    def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers = 5, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
+        self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
+        self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
+        self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)
+
+    @property
+    def device(self):
+        # Get the device of the module (assumes all parameters are on the same device)
+        return next(self.parameters()).device
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.in_proj(x)
+
+        for layer, norms in zip(self.layers, self.norms):
+            x = x + layer(norms(x))
+
+        x = self.out_proj(x)
+
+        return x
+
+
+class DoubleStreamBlock(nn.Module):
+    def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
+        super().__init__()
+
+        mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        self.num_heads = num_heads
+        self.hidden_size = hidden_size
+        self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
+
+        self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.img_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )
+
+        self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
+
+        self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.txt_mlp = nn.Sequential(
+            operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
+            nn.GELU(approximate="tanh"),
+            operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
+        )
+        self.flipped_img_txt = flipped_img_txt
+
+    def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None):
+        (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec
+
+        # prepare image for attention
+        img_modulated = self.img_norm1(img)
+        img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
+        img_qkv = self.img_attn.qkv(img_modulated)
+        img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
+
+        # prepare txt for attention
+        txt_modulated = self.txt_norm1(txt)
+        txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
+        txt_qkv = self.txt_attn.qkv(txt_modulated)
+        txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
+
+        # run actual attention
+        attn = attention(torch.cat((txt_q, img_q), dim=2),
+                         torch.cat((txt_k, img_k), dim=2),
+                         torch.cat((txt_v, img_v), dim=2),
+                         pe=pe, mask=attn_mask)
+
+        txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
+
+        # calculate the img bloks
+        img = img + img_mod1.gate * self.img_attn.proj(img_attn)
+        img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
+
+        # calculate the txt bloks
+        txt += txt_mod1.gate * self.txt_attn.proj(txt_attn)
+        txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
+
+        if txt.dtype == torch.float16:
+            txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504)
+
+        return img, txt
+
+
+class SingleStreamBlock(nn.Module):
+    """
+    A DiT block with parallel linear layers as described in
+    https://arxiv.org/abs/2302.05442 and adapted modulation interface.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qk_scale: float = None,
+        dtype=None,
+        device=None,
+        operations=None
+    ):
+        super().__init__()
+        self.hidden_dim = hidden_size
+        self.num_heads = num_heads
+        head_dim = hidden_size // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+
+        self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
+        # qkv and mlp_in
+        self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
+        # proj and mlp_out
+        self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
+
+        self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
+
+        self.hidden_size = hidden_size
+        self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+
+        self.mlp_act = nn.GELU(approximate="tanh")
+
+    def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None) -> Tensor:
+        mod = vec
+        x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
+        qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
+
+        q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        q, k = self.norm(q, k, v)
+
+        # compute attention
+        attn = attention(q, k, v, pe=pe, mask=attn_mask)
+        # compute activation in mlp stream, cat again and run second linear layer
+        output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
+        x += mod.gate * output
+        if x.dtype == torch.float16:
+            x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
+        return x
+
+
+class LastLayer(nn.Module):
+    def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
+        self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
+
+    def forward(self, x: Tensor, vec: Tensor) -> Tensor:
+        shift, scale = vec
+        shift = shift.squeeze(1)
+        scale = scale.squeeze(1)
+        x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
+        x = self.linear(x)
+        return x
diff --git a/comfy/ldm/chroma/math.py b/comfy/ldm/chroma/math.py
new file mode 100644
index 000000000..36b67931c
--- /dev/null
+++ b/comfy/ldm/chroma/math.py
@@ -0,0 +1,44 @@
+import torch
+from einops import rearrange
+from torch import Tensor
+
+from comfy.ldm.modules.attention import optimized_attention
+import comfy.model_management
+
+
+def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
+    q_shape = q.shape
+    k_shape = k.shape
+
+    q = q.float().reshape(*q.shape[:-1], -1, 1, 2)
+    k = k.float().reshape(*k.shape[:-1], -1, 1, 2)
+    q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v)
+    k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v)
+
+    heads = q.shape[1]
+    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
+    return x
+
+
+def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
+    assert dim % 2 == 0
+    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
+        device = torch.device("cpu")
+    else:
+        device = pos.device
+
+    scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device)
+    omega = 1.0 / (theta**scale)
+    out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega)
+    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
+    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+    return out.to(dtype=torch.float32, device=pos.device)
+
+
+def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
+    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
+    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
+    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
+    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
+    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
+
diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py
new file mode 100644
index 000000000..636748fc5
--- /dev/null
+++ b/comfy/ldm/chroma/model.py
@@ -0,0 +1,271 @@
+#Original code can be found on: https://github.com/black-forest-labs/flux
+
+from dataclasses import dataclass
+
+import torch
+from torch import Tensor, nn
+from einops import rearrange, repeat
+import comfy.ldm.common_dit
+
+from comfy.ldm.flux.layers import (
+    EmbedND,
+    timestep_embedding,
+)
+
+from .layers import (
+    DoubleStreamBlock,
+    LastLayer,
+    SingleStreamBlock,
+    Approximator,
+    ChromaModulationOut,
+)
+
+
+@dataclass
+class ChromaParams:
+    in_channels: int
+    out_channels: int
+    context_in_dim: int
+    hidden_size: int
+    mlp_ratio: float
+    num_heads: int
+    depth: int
+    depth_single_blocks: int
+    axes_dim: list
+    theta: int
+    patch_size: int
+    qkv_bias: bool
+    in_dim: int
+    out_dim: int
+    hidden_dim: int
+    n_layers: int
+
+
+
+
+class Chroma(nn.Module):
+    """
+    Transformer model for flow matching on sequences.
+    """
+
+    def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
+        super().__init__()
+        self.dtype = dtype
+        params = ChromaParams(**kwargs)
+        self.params = params
+        self.patch_size = params.patch_size
+        self.in_channels = params.in_channels
+        self.out_channels = params.out_channels
+        if params.hidden_size % params.num_heads != 0:
+            raise ValueError(
+                f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
+            )
+        pe_dim = params.hidden_size // params.num_heads
+        if sum(params.axes_dim) != pe_dim:
+            raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
+        self.hidden_size = params.hidden_size
+        self.num_heads = params.num_heads
+        self.in_dim = params.in_dim
+        self.out_dim = params.out_dim
+        self.hidden_dim = params.hidden_dim
+        self.n_layers = params.n_layers
+        self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
+        self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
+        self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device)
+        # set as nn identity for now, will overwrite it later.
+        self.distilled_guidance_layer = Approximator(
+                    in_dim=self.in_dim,
+                    hidden_dim=self.hidden_dim,
+                    out_dim=self.out_dim,
+                    n_layers=self.n_layers,
+                    dtype=dtype, device=device, operations=operations
+                )
+
+
+        self.double_blocks = nn.ModuleList(
+            [
+                DoubleStreamBlock(
+                    self.hidden_size,
+                    self.num_heads,
+                    mlp_ratio=params.mlp_ratio,
+                    qkv_bias=params.qkv_bias,
+                    dtype=dtype, device=device, operations=operations
+                )
+                for _ in range(params.depth)
+            ]
+        )
+
+        self.single_blocks = nn.ModuleList(
+            [
+                SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations)
+                for _ in range(params.depth_single_blocks)
+            ]
+        )
+
+        if final_layer:
+            self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)
+
+        self.skip_mmdit = []
+        self.skip_dit = []
+        self.lite = False
+
+    def get_modulations(self, tensor: torch.Tensor, block_type: str, *, idx: int = 0):
+        # This function slices up the modulations tensor which has the following layout:
+        #   single     : num_single_blocks * 3 elements
+        #   double_img : num_double_blocks * 6 elements
+        #   double_txt : num_double_blocks * 6 elements
+        #   final      : 2 elements
+        if block_type == "final":
+            return (tensor[:, -2:-1, :], tensor[:, -1:, :])
+        single_block_count = self.params.depth_single_blocks
+        double_block_count = self.params.depth
+        offset = 3 * idx
+        if block_type == "single":
+            return ChromaModulationOut.from_offset(tensor, offset)
+        # Double block modulations are 6 elements so we double 3 * idx.
+        offset *= 2
+        if block_type in {"double_img", "double_txt"}:
+            # Advance past the single block modulations.
+            offset += 3 * single_block_count
+            if block_type == "double_txt":
+                # Advance past the double block img modulations.
+                offset += 6 * double_block_count
+            return (
+                ChromaModulationOut.from_offset(tensor, offset),
+                ChromaModulationOut.from_offset(tensor, offset + 3),
+            )
+        raise ValueError("Bad block_type")
+
+
+    def forward_orig(
+        self,
+        img: Tensor,
+        img_ids: Tensor,
+        txt: Tensor,
+        txt_ids: Tensor,
+        timesteps: Tensor,
+        guidance: Tensor = None,
+        control = None,
+        transformer_options={},
+        attn_mask: Tensor = None,
+    ) -> Tensor:
+        patches_replace = transformer_options.get("patches_replace", {})
+        if img.ndim != 3 or txt.ndim != 3:
+            raise ValueError("Input img and txt tensors must have 3 dimensions.")
+
+        # running on sequences img
+        img = self.img_in(img)
+
+        # distilled vector guidance
+        mod_index_length = 344
+        distill_timestep = timestep_embedding(timesteps.detach().clone(), 16).to(img.device, img.dtype)
+        # guidance = guidance *
+        distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype)
+
+        # get all modulation index
+        modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype)
+        # we need to broadcast the modulation index here so each batch has all of the index
+        modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype)
+        # and we need to broadcast timestep and guidance along too
+        timestep_guidance = torch.cat([distill_timestep, distil_guidance], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1).to(img.dtype).to(img.device, img.dtype)
+        # then and only then we could concatenate it together
+        input_vec = torch.cat([timestep_guidance, modulation_index], dim=-1).to(img.device, img.dtype)
+
+        mod_vectors = self.distilled_guidance_layer(input_vec)
+
+        txt = self.txt_in(txt)
+
+        ids = torch.cat((txt_ids, img_ids), dim=1)
+        pe = self.pe_embedder(ids)
+
+        blocks_replace = patches_replace.get("dit", {})
+        for i, block in enumerate(self.double_blocks):
+            if i not in self.skip_mmdit:
+                double_mod = (
+                    self.get_modulations(mod_vectors, "double_img", idx=i),
+                    self.get_modulations(mod_vectors, "double_txt", idx=i),
+                )
+                if ("double_block", i) in blocks_replace:
+                    def block_wrap(args):
+                        out = {}
+                        out["img"], out["txt"] = block(img=args["img"],
+                                                       txt=args["txt"],
+                                                       vec=args["vec"],
+                                                       pe=args["pe"],
+                                                       attn_mask=args.get("attn_mask"))
+                        return out
+
+                    out = blocks_replace[("double_block", i)]({"img": img,
+                                                               "txt": txt,
+                                                               "vec": double_mod,
+                                                               "pe": pe,
+                                                               "attn_mask": attn_mask},
+                                                              {"original_block": block_wrap})
+                    txt = out["txt"]
+                    img = out["img"]
+                else:
+                    img, txt = block(img=img,
+                                     txt=txt,
+                                     vec=double_mod,
+                                     pe=pe,
+                                     attn_mask=attn_mask)
+
+                if control is not None: # Controlnet
+                    control_i = control.get("input")
+                    if i < len(control_i):
+                        add = control_i[i]
+                        if add is not None:
+                            img += add
+
+        img = torch.cat((txt, img), 1)
+
+        for i, block in enumerate(self.single_blocks):
+            if i not in self.skip_dit:
+                single_mod = self.get_modulations(mod_vectors, "single", idx=i)
+                if ("single_block", i) in blocks_replace:
+                    def block_wrap(args):
+                        out = {}
+                        out["img"] = block(args["img"],
+                                           vec=args["vec"],
+                                           pe=args["pe"],
+                                           attn_mask=args.get("attn_mask"))
+                        return out
+
+                    out = blocks_replace[("single_block", i)]({"img": img,
+                                                               "vec": single_mod,
+                                                               "pe": pe,
+                                                               "attn_mask": attn_mask},
+                                                              {"original_block": block_wrap})
+                    img = out["img"]
+                else:
+                    img = block(img, vec=single_mod, pe=pe, attn_mask=attn_mask)
+
+                if control is not None: # Controlnet
+                    control_o = control.get("output")
+                    if i < len(control_o):
+                        add = control_o[i]
+                        if add is not None:
+                            img[:, txt.shape[1] :, ...] += add
+
+        img = img[:, txt.shape[1] :, ...]
+        final_mod = self.get_modulations(mod_vectors, "final")
+        img = self.final_layer(img, vec=final_mod)  # (N, T, patch_size ** 2 * out_channels)
+        return img
+
+    def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
+        bs, c, h, w = x.shape
+        patch_size = 2
+        x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
+
+        img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
+
+        h_len = ((h + (patch_size // 2)) // patch_size)
+        w_len = ((w + (patch_size // 2)) // patch_size)
+        img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
+        img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
+        img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
+        img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
+
+        txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
+        out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
+        return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
diff --git a/comfy/lora.py b/comfy/lora.py
index fff524be2..dbabd3335 100644
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -252,7 +252,7 @@ def model_lora_keys_unet(model, key_map={}):
                 key_lora = k[len("diffusion_model."):-len(".weight")]
                 key_map["base_model.model.{}".format(key_lora)] = k #official hunyuan lora format
 
-    if isinstance(model, comfy.model_base.Flux): #Diffusers lora Flux
+    if isinstance(model, comfy.model_base.Flux) or isinstance(model, comfy.model_base.Chroma): #Diffusers lora Flux or a diffusers lora Chroma
         diffusers_keys = comfy.utils.flux_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
         for k in diffusers_keys:
             if k.endswith(".weight"):
diff --git a/comfy/model_base.py b/comfy/model_base.py
index d2aa4ce7a..1a06bb503 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -38,6 +38,7 @@ import comfy.ldm.lumina.model
 import comfy.ldm.wan.model
 import comfy.ldm.hunyuan3d.model
 import comfy.ldm.hidream.model
+import comfy.ldm.chroma.model
 
 import comfy.model_management
 import comfy.patcher_extension
@@ -1108,3 +1109,64 @@ class HiDream(BaseModel):
         if image_cond is not None:
             out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond))
         return out
+
+class Chroma(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
+        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.chroma.model.Chroma)
+
+    def concat_cond(self, **kwargs):
+        try:
+            #Handle Flux control loras dynamically changing the img_in weight.
+            num_channels = self.diffusion_model.img_in.weight.shape[1]
+        except:
+            #Some cases like tensorrt might not have the weights accessible
+            num_channels = self.model_config.unet_config["in_channels"]
+
+        out_channels = self.model_config.unet_config["out_channels"]
+
+        if num_channels <= out_channels:
+            return None
+
+        image = kwargs.get("concat_latent_image", None)
+        noise = kwargs.get("noise", None)
+        device = kwargs["device"]
+
+        if image is None:
+            image = torch.zeros_like(noise)
+
+        image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
+        image = utils.resize_to_batch_size(image, noise.shape[0])
+        image = self.process_latent_in(image)
+        if num_channels <= out_channels * 2:
+            return image
+
+        #inpaint model
+        mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
+        if mask is None:
+            mask = torch.ones_like(noise)[:, :1]
+
+        mask = torch.mean(mask, dim=1, keepdim=True)
+        mask = utils.common_upscale(mask.to(device), noise.shape[-1] * 8, noise.shape[-2] * 8, "bilinear", "center")
+        mask = mask.view(mask.shape[0], mask.shape[2] // 8, 8, mask.shape[3] // 8, 8).permute(0, 2, 4, 1, 3).reshape(mask.shape[0], -1, mask.shape[2] // 8, mask.shape[3] // 8)
+        mask = utils.resize_to_batch_size(mask, noise.shape[0])
+        return torch.cat((image, mask), dim=1)
+
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
+        # upscale the attention mask, since now we
+        attention_mask = kwargs.get("attention_mask", None)
+        if attention_mask is not None:
+            shape = kwargs["noise"].shape
+            mask_ref_size = kwargs["attention_mask_img_shape"]
+            # the model will pad to the patch size, and then divide
+            # essentially dividing and rounding up
+            (h_tok, w_tok) = (math.ceil(shape[2] / self.diffusion_model.patch_size), math.ceil(shape[3] / self.diffusion_model.patch_size))
+            attention_mask = utils.upscale_dit_mask(attention_mask, mask_ref_size, (h_tok, w_tok))
+            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
+        guidance = 0.0
+        out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor((guidance,)))
+        return out
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 76de78a8a..daf6d04e7 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -154,6 +154,32 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["guidance_embed"] = len(guidance_keys) > 0
         return dit_config
 
+    if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
+        dit_config = {}
+        dit_config["image_model"] = "chroma"
+        dit_config["depth"] = 48
+        dit_config["in_channels"] = 64
+        patch_size = 2
+        dit_config["patch_size"] = patch_size
+        in_key = "{}img_in.weight".format(key_prefix)
+        if in_key in state_dict_keys:
+            dit_config["in_channels"] = state_dict[in_key].shape[1]
+        dit_config["out_channels"] = 64
+        dit_config["context_in_dim"] = 4096
+        dit_config["hidden_size"] = 3072
+        dit_config["mlp_ratio"] = 4.0
+        dit_config["num_heads"] = 24
+        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
+        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
+        dit_config["axes_dim"] = [16, 56, 56]
+        dit_config["theta"] = 10000
+        dit_config["qkv_bias"] = True
+        dit_config["in_dim"] = 64
+        dit_config["out_dim"] = 3072
+        dit_config["hidden_dim"] = 5120
+        dit_config["n_layers"] = 5
+        return dit_config
+
     if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and '{}img_in.weight'.format(key_prefix) in state_dict_keys: #Flux
         dit_config = {}
         dit_config["image_model"] = "flux"
diff --git a/comfy/sd.py b/comfy/sd.py
index 748f6c1ec..454b5929a 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -42,6 +42,7 @@ import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.hidream
+import comfy.text_encoders.chroma
 
 import comfy.model_patcher
 import comfy.lora
@@ -714,6 +715,7 @@ class CLIPType(Enum):
     LUMINA2 = 12
     WAN = 13
     HIDREAM = 14
+    CHROMA = 15
 
 
 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@@ -829,6 +831,9 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
                 clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**t5xxl_detect(clip_data),
                                                                         clip_l=False, clip_g=False, t5=True, llama=False, dtype_llama=None, llama_scaled_fp8=None)
                 clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
+            elif clip_type == CLIPType.CHROMA:
+                clip_target.clip = comfy.text_encoders.chroma.chroma_te(**t5xxl_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.chroma.ChromaT5Tokenizer
             else: #CLIPType.MOCHI
                 clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data))
                 clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index 69bcee1f7..f03f2790e 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -17,6 +17,7 @@ import comfy.text_encoders.hunyuan_video
 import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
+import comfy.text_encoders.chroma
 
 from . import supported_models_base
 from . import latent_formats
@@ -1068,7 +1069,34 @@ class HiDream(supported_models_base.BASE):
     def clip_target(self, state_dict={}):
         return None #  TODO
 
+class Chroma(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "chroma",
+    }
 
-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream]
+    unet_extra_config = {
+    }
+
+    sampling_settings = {
+        "multiplier": 1.0,
+    }
+
+    latent_format = comfy.latent_formats.Flux
+
+    memory_usage_factor = 3.2
+
+    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.Chroma(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        pref = self.text_encoder_key_prefix[0]
+        t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.chroma.ChromaTokenizer, comfy.text_encoders.chroma.chroma_te(**t5_detect))
+
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma]
 
 models += [SVD_img2vid]
diff --git a/comfy/text_encoders/chroma.py b/comfy/text_encoders/chroma.py
new file mode 100644
index 000000000..aa8dffb25
--- /dev/null
+++ b/comfy/text_encoders/chroma.py
@@ -0,0 +1,43 @@
+from comfy import sd1_clip
+import comfy.text_encoders.t5
+import os
+from transformers import T5TokenizerFast
+
+
+class T5XXLModel(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=False, model_options={}):
+        textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_config_xxl.json")
+        t5xxl_scaled_fp8 = model_options.get("t5xxl_scaled_fp8", None)
+        if t5xxl_scaled_fp8 is not None:
+            model_options = model_options.copy()
+            model_options["scaled_fp8"] = t5xxl_scaled_fp8
+
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+
+class ChromaT5XXL(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(device=device, dtype=dtype, name="t5xxl", clip_model=T5XXLModel, model_options=model_options)
+
+
+class T5XXLTokenizer(sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
+        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_data=tokenizer_data)
+
+
+class ChromaT5Tokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
+
+
+def chroma_te(dtype_t5=None, t5xxl_scaled_fp8=None):
+    class ChromaTEModel_(ChromaT5XXL):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
+                model_options = model_options.copy()
+                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
+            if dtype is None:
+                dtype = dtype_t5
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+    return ChromaTEModel_
diff --git a/comfy_extras/nodes_optimalsteps.py b/comfy_extras/nodes_optimalsteps.py
index f6928199b..102958734 100644
--- a/comfy_extras/nodes_optimalsteps.py
+++ b/comfy_extras/nodes_optimalsteps.py
@@ -20,13 +20,14 @@ def loglinear_interp(t_steps, num_steps):
 
 NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0.8287, 0.5512, 0.2808, 0.001],
 "Wan":[1.0, 0.997, 0.995, 0.993, 0.991, 0.989, 0.987, 0.985, 0.98, 0.975, 0.973, 0.968, 0.96, 0.946, 0.927, 0.902, 0.864, 0.776, 0.539, 0.208, 0.001],
+"Chroma": [0.9919999837875366, 0.9900000095367432, 0.9879999756813049, 0.9850000143051147, 0.9819999933242798, 0.9779999852180481, 0.9729999899864197, 0.9679999947547913, 0.9610000252723694, 0.953000009059906, 0.9430000185966492, 0.9309999942779541, 0.9169999957084656, 0.8999999761581421, 0.8809999823570251, 0.8579999804496765, 0.8320000171661377, 0.8019999861717224, 0.7689999938011169, 0.7310000061988831, 0.6899999976158142, 0.6460000276565552, 0.5989999771118164, 0.550000011920929, 0.5009999871253967, 0.45100000500679016, 0.4020000100135803, 0.35499998927116394, 0.3109999895095825, 0.27000001072883606, 0.23199999332427979, 0.19900000095367432, 0.16899999976158142, 0.14300000667572021, 0.11999999731779099, 0.10100000351667404, 0.08399999886751175, 0.07000000029802322, 0.057999998331069946, 0.04800000041723251, 0.0],
 }
 
 class OptimalStepsScheduler:
     @classmethod
     def INPUT_TYPES(s):
         return {"required":
-                    {"model_type": (["FLUX", "Wan"], ),
+                    {"model_type": (["FLUX", "Wan", "Chroma"], ),
                      "steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
                      "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
                       }
diff --git a/nodes.py b/nodes.py
index 73a62d930..f2ced2c35 100644
--- a/nodes.py
+++ b/nodes.py
@@ -917,7 +917,7 @@ class CLIPLoader:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
-                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream"], ),
+                              "type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma"], ),
                               },
                 "optional": {
                               "device": (["default", "cpu"], {"advanced": True}),

From 08ff5fa08a92e0b3f23b9abec979a830a6cffb03 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 30 Apr 2025 20:57:30 -0400
Subject: [PATCH 04/16] Cleanup chroma PR.

---
 comfy/ldm/chroma/layers.py         |  2 +-
 comfy/ldm/chroma/math.py           | 44 ---------------------
 comfy/lora.py                      |  2 +-
 comfy/model_base.py                | 63 ++++--------------------------
 comfy/model_detection.py           | 41 ++++++-------------
 comfy/sd.py                        |  6 +--
 comfy/supported_models.py          |  3 +-
 comfy/text_encoders/chroma.py      | 43 --------------------
 comfy_extras/nodes_optimalsteps.py |  2 +-
 9 files changed, 25 insertions(+), 181 deletions(-)
 delete mode 100644 comfy/ldm/chroma/math.py
 delete mode 100644 comfy/text_encoders/chroma.py

diff --git a/comfy/ldm/chroma/layers.py b/comfy/ldm/chroma/layers.py
index dd0b72f70..35da91ee2 100644
--- a/comfy/ldm/chroma/layers.py
+++ b/comfy/ldm/chroma/layers.py
@@ -1,7 +1,7 @@
 import torch
 from torch import Tensor, nn
 
-from .math import attention
+from comfy.ldm.flux.math import attention
 from comfy.ldm.flux.layers import (
     MLPEmbedder,
     RMSNorm,
diff --git a/comfy/ldm/chroma/math.py b/comfy/ldm/chroma/math.py
deleted file mode 100644
index 36b67931c..000000000
--- a/comfy/ldm/chroma/math.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import torch
-from einops import rearrange
-from torch import Tensor
-
-from comfy.ldm.modules.attention import optimized_attention
-import comfy.model_management
-
-
-def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor:
-    q_shape = q.shape
-    k_shape = k.shape
-
-    q = q.float().reshape(*q.shape[:-1], -1, 1, 2)
-    k = k.float().reshape(*k.shape[:-1], -1, 1, 2)
-    q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v)
-    k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v)
-
-    heads = q.shape[1]
-    x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask)
-    return x
-
-
-def rope(pos: Tensor, dim: int, theta: int) -> Tensor:
-    assert dim % 2 == 0
-    if comfy.model_management.is_device_mps(pos.device) or comfy.model_management.is_intel_xpu() or comfy.model_management.is_directml_enabled():
-        device = torch.device("cpu")
-    else:
-        device = pos.device
-
-    scale = torch.linspace(0, (dim - 2) / dim, steps=dim//2, dtype=torch.float64, device=device)
-    omega = 1.0 / (theta**scale)
-    out = torch.einsum("...n,d->...nd", pos.to(dtype=torch.float32, device=device), omega)
-    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
-    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
-    return out.to(dtype=torch.float32, device=pos.device)
-
-
-def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor):
-    xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2)
-    xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2)
-    xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1]
-    xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1]
-    return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk)
-
diff --git a/comfy/lora.py b/comfy/lora.py
index dbabd3335..fff524be2 100644
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -252,7 +252,7 @@ def model_lora_keys_unet(model, key_map={}):
                 key_lora = k[len("diffusion_model."):-len(".weight")]
                 key_map["base_model.model.{}".format(key_lora)] = k #official hunyuan lora format
 
-    if isinstance(model, comfy.model_base.Flux) or isinstance(model, comfy.model_base.Chroma): #Diffusers lora Flux or a diffusers lora Chroma
+    if isinstance(model, comfy.model_base.Flux): #Diffusers lora Flux
         diffusers_keys = comfy.utils.flux_to_diffusers(model.model_config.unet_config, output_prefix="diffusion_model.")
         for k in diffusers_keys:
             if k.endswith(".weight"):
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 1a06bb503..3d33086d8 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -787,8 +787,8 @@ class PixArt(BaseModel):
         return out
 
 class Flux(BaseModel):
-    def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
-        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.flux.model.Flux)
+    def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux):
+        super().__init__(model_config, model_type, device=device, unet_model=unet_model)
 
     def concat_cond(self, **kwargs):
         try:
@@ -1110,63 +1110,14 @@ class HiDream(BaseModel):
             out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond))
         return out
 
-class Chroma(BaseModel):
+class Chroma(Flux):
     def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
         super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.chroma.model.Chroma)
 
-    def concat_cond(self, **kwargs):
-        try:
-            #Handle Flux control loras dynamically changing the img_in weight.
-            num_channels = self.diffusion_model.img_in.weight.shape[1]
-        except:
-            #Some cases like tensorrt might not have the weights accessible
-            num_channels = self.model_config.unet_config["in_channels"]
-
-        out_channels = self.model_config.unet_config["out_channels"]
-
-        if num_channels <= out_channels:
-            return None
-
-        image = kwargs.get("concat_latent_image", None)
-        noise = kwargs.get("noise", None)
-        device = kwargs["device"]
-
-        if image is None:
-            image = torch.zeros_like(noise)
-
-        image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center")
-        image = utils.resize_to_batch_size(image, noise.shape[0])
-        image = self.process_latent_in(image)
-        if num_channels <= out_channels * 2:
-            return image
-
-        #inpaint model
-        mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None))
-        if mask is None:
-            mask = torch.ones_like(noise)[:, :1]
-
-        mask = torch.mean(mask, dim=1, keepdim=True)
-        mask = utils.common_upscale(mask.to(device), noise.shape[-1] * 8, noise.shape[-2] * 8, "bilinear", "center")
-        mask = mask.view(mask.shape[0], mask.shape[2] // 8, 8, mask.shape[3] // 8, 8).permute(0, 2, 4, 1, 3).reshape(mask.shape[0], -1, mask.shape[2] // 8, mask.shape[3] // 8)
-        mask = utils.resize_to_batch_size(mask, noise.shape[0])
-        return torch.cat((image, mask), dim=1)
-
-
     def extra_conds(self, **kwargs):
         out = super().extra_conds(**kwargs)
-        cross_attn = kwargs.get("cross_attn", None)
-        if cross_attn is not None:
-            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)
-        # upscale the attention mask, since now we
-        attention_mask = kwargs.get("attention_mask", None)
-        if attention_mask is not None:
-            shape = kwargs["noise"].shape
-            mask_ref_size = kwargs["attention_mask_img_shape"]
-            # the model will pad to the patch size, and then divide
-            # essentially dividing and rounding up
-            (h_tok, w_tok) = (math.ceil(shape[2] / self.diffusion_model.patch_size), math.ceil(shape[3] / self.diffusion_model.patch_size))
-            attention_mask = utils.upscale_dit_mask(attention_mask, mask_ref_size, (h_tok, w_tok))
-            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
-        guidance = 0.0
-        out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor((guidance,)))
+
+        guidance = kwargs.get("guidance", 0)
+        if guidance is not None:
+            out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
         return out
diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index daf6d04e7..9254843ea 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -154,32 +154,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["guidance_embed"] = len(guidance_keys) > 0
         return dit_config
 
-    if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
-        dit_config = {}
-        dit_config["image_model"] = "chroma"
-        dit_config["depth"] = 48
-        dit_config["in_channels"] = 64
-        patch_size = 2
-        dit_config["patch_size"] = patch_size
-        in_key = "{}img_in.weight".format(key_prefix)
-        if in_key in state_dict_keys:
-            dit_config["in_channels"] = state_dict[in_key].shape[1]
-        dit_config["out_channels"] = 64
-        dit_config["context_in_dim"] = 4096
-        dit_config["hidden_size"] = 3072
-        dit_config["mlp_ratio"] = 4.0
-        dit_config["num_heads"] = 24
-        dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.')
-        dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.')
-        dit_config["axes_dim"] = [16, 56, 56]
-        dit_config["theta"] = 10000
-        dit_config["qkv_bias"] = True
-        dit_config["in_dim"] = 64
-        dit_config["out_dim"] = 3072
-        dit_config["hidden_dim"] = 5120
-        dit_config["n_layers"] = 5
-        return dit_config
-
     if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and '{}img_in.weight'.format(key_prefix) in state_dict_keys: #Flux
         dit_config = {}
         dit_config["image_model"] = "flux"
@@ -190,7 +164,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         if in_key in state_dict_keys:
             dit_config["in_channels"] = state_dict[in_key].shape[1] // (patch_size * patch_size)
         dit_config["out_channels"] = 16
-        dit_config["vec_in_dim"] = 768
+        vec_in_key = '{}vector_in.in_layer.weight'.format(key_prefix)
+        if vec_in_key in state_dict_keys:
+            dit_config["vec_in_dim"] = state_dict[vec_in_key].shape[1]
         dit_config["context_in_dim"] = 4096
         dit_config["hidden_size"] = 3072
         dit_config["mlp_ratio"] = 4.0
@@ -200,7 +176,16 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
         dit_config["axes_dim"] = [16, 56, 56]
         dit_config["theta"] = 10000
         dit_config["qkv_bias"] = True
-        dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
+        if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
+            dit_config["image_model"] = "chroma"
+            dit_config["in_channels"] = 64
+            dit_config["out_channels"] = 64
+            dit_config["in_dim"] = 64
+            dit_config["out_dim"] = 3072
+            dit_config["hidden_dim"] = 5120
+            dit_config["n_layers"] = 5
+        else:
+            dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
         return dit_config
 
     if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview
diff --git a/comfy/sd.py b/comfy/sd.py
index 454b5929a..da9b36d0e 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -42,7 +42,6 @@ import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
 import comfy.text_encoders.hidream
-import comfy.text_encoders.chroma
 
 import comfy.model_patcher
 import comfy.lora
@@ -820,7 +819,7 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
             elif clip_type == CLIPType.LTXV:
                 clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data))
                 clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer
-            elif clip_type == CLIPType.PIXART:
+            elif clip_type == CLIPType.PIXART or clip_type == CLIPType.CHROMA:
                 clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data))
                 clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer
             elif clip_type == CLIPType.WAN:
@@ -831,9 +830,6 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
                 clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**t5xxl_detect(clip_data),
                                                                         clip_l=False, clip_g=False, t5=True, llama=False, dtype_llama=None, llama_scaled_fp8=None)
                 clip_target.tokenizer = comfy.text_encoders.hidream.HiDreamTokenizer
-            elif clip_type == CLIPType.CHROMA:
-                clip_target.clip = comfy.text_encoders.chroma.chroma_te(**t5xxl_detect(clip_data))
-                clip_target.tokenizer = comfy.text_encoders.chroma.ChromaT5Tokenizer
             else: #CLIPType.MOCHI
                 clip_target.clip = comfy.text_encoders.genmo.mochi_te(**t5xxl_detect(clip_data))
                 clip_target.tokenizer = comfy.text_encoders.genmo.MochiT5Tokenizer
diff --git a/comfy/supported_models.py b/comfy/supported_models.py
index f03f2790e..d5210cfac 100644
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -17,7 +17,6 @@ import comfy.text_encoders.hunyuan_video
 import comfy.text_encoders.cosmos
 import comfy.text_encoders.lumina2
 import comfy.text_encoders.wan
-import comfy.text_encoders.chroma
 
 from . import supported_models_base
 from . import latent_formats
@@ -1095,7 +1094,7 @@ class Chroma(supported_models_base.BASE):
     def clip_target(self, state_dict={}):
         pref = self.text_encoder_key_prefix[0]
         t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
-        return supported_models_base.ClipTarget(comfy.text_encoders.chroma.ChromaTokenizer, comfy.text_encoders.chroma.chroma_te(**t5_detect))
+        return supported_models_base.ClipTarget(comfy.text_encoders.pixart_t5.PixArtTokenizer, comfy.text_encoders.pixart_t5.pixart_te(**t5_detect))
 
 models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma]
 
diff --git a/comfy/text_encoders/chroma.py b/comfy/text_encoders/chroma.py
deleted file mode 100644
index aa8dffb25..000000000
--- a/comfy/text_encoders/chroma.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from comfy import sd1_clip
-import comfy.text_encoders.t5
-import os
-from transformers import T5TokenizerFast
-
-
-class T5XXLModel(sd1_clip.SDClipModel):
-    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=False, model_options={}):
-        textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_config_xxl.json")
-        t5xxl_scaled_fp8 = model_options.get("t5xxl_scaled_fp8", None)
-        if t5xxl_scaled_fp8 is not None:
-            model_options = model_options.copy()
-            model_options["scaled_fp8"] = t5xxl_scaled_fp8
-
-        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, special_tokens={"end": 1, "pad": 0}, model_class=comfy.text_encoders.t5.T5, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
-
-
-class ChromaT5XXL(sd1_clip.SD1ClipModel):
-    def __init__(self, device="cpu", dtype=None, model_options={}):
-        super().__init__(device=device, dtype=dtype, name="t5xxl", clip_model=T5XXLModel, model_options=model_options)
-
-
-class T5XXLTokenizer(sd1_clip.SDTokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "t5_tokenizer")
-        super().__init__(tokenizer_path, embedding_directory=embedding_directory, pad_with_end=False, embedding_size=4096, embedding_key='t5xxl', tokenizer_class=T5TokenizerFast, has_start_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_data=tokenizer_data)
-
-
-class ChromaT5Tokenizer(sd1_clip.SD1Tokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, clip_name="t5xxl", tokenizer=T5XXLTokenizer)
-
-
-def chroma_te(dtype_t5=None, t5xxl_scaled_fp8=None):
-    class ChromaTEModel_(ChromaT5XXL):
-        def __init__(self, device="cpu", dtype=None, model_options={}):
-            if t5xxl_scaled_fp8 is not None and "t5xxl_scaled_fp8" not in model_options:
-                model_options = model_options.copy()
-                model_options["t5xxl_scaled_fp8"] = t5xxl_scaled_fp8
-            if dtype is None:
-                dtype = dtype_t5
-            super().__init__(device=device, dtype=dtype, model_options=model_options)
-    return ChromaTEModel_
diff --git a/comfy_extras/nodes_optimalsteps.py b/comfy_extras/nodes_optimalsteps.py
index 102958734..e7c851ca2 100644
--- a/comfy_extras/nodes_optimalsteps.py
+++ b/comfy_extras/nodes_optimalsteps.py
@@ -20,7 +20,7 @@ def loglinear_interp(t_steps, num_steps):
 
 NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0.8287, 0.5512, 0.2808, 0.001],
 "Wan":[1.0, 0.997, 0.995, 0.993, 0.991, 0.989, 0.987, 0.985, 0.98, 0.975, 0.973, 0.968, 0.96, 0.946, 0.927, 0.902, 0.864, 0.776, 0.539, 0.208, 0.001],
-"Chroma": [0.9919999837875366, 0.9900000095367432, 0.9879999756813049, 0.9850000143051147, 0.9819999933242798, 0.9779999852180481, 0.9729999899864197, 0.9679999947547913, 0.9610000252723694, 0.953000009059906, 0.9430000185966492, 0.9309999942779541, 0.9169999957084656, 0.8999999761581421, 0.8809999823570251, 0.8579999804496765, 0.8320000171661377, 0.8019999861717224, 0.7689999938011169, 0.7310000061988831, 0.6899999976158142, 0.6460000276565552, 0.5989999771118164, 0.550000011920929, 0.5009999871253967, 0.45100000500679016, 0.4020000100135803, 0.35499998927116394, 0.3109999895095825, 0.27000001072883606, 0.23199999332427979, 0.19900000095367432, 0.16899999976158142, 0.14300000667572021, 0.11999999731779099, 0.10100000351667404, 0.08399999886751175, 0.07000000029802322, 0.057999998331069946, 0.04800000041723251, 0.0],
+"Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
 }
 
 class OptimalStepsScheduler:

From c6c19e99803a2538cee17e2f94314f77b7d48e98 Mon Sep 17 00:00:00 2001
From: Christian Byrne <cbyrne@comfy.org>
Date: Thu, 1 May 2025 00:24:32 -0700
Subject: [PATCH 05/16] fix bug (#7894)

---
 comfy_api_nodes/apis/client.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py
index 384e559dc..d3cd9ad2f 100644
--- a/comfy_api_nodes/apis/client.py
+++ b/comfy_api_nodes/apis/client.py
@@ -297,6 +297,10 @@ class SynchronousOperation(Generic[T, R]):
 
             # Convert request model to dict, but use None for EmptyRequest
             request_dict = None if isinstance(self.request, EmptyRequest) else self.request.model_dump(exclude_none=True)
+            if request_dict:
+                for key, value in request_dict.items():
+                    if isinstance(value, Enum):
+                        request_dict[key] = value.value
 
             # Debug log for request
             logging.debug(f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}")

From aa9d759df36faa2e34cc5722463749a09a7f529b Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Thu, 1 May 2025 03:33:42 -0700
Subject: [PATCH 06/16] Switch ltxv to use the pytorch RMSNorm. (#7897)

---
 comfy/ldm/lightricks/model.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py
index 6e8e06181..056e101a4 100644
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -1,7 +1,6 @@
 import torch
 from torch import nn
 import comfy.ldm.modules.attention
-from comfy.ldm.genmo.joint_model.layers import RMSNorm
 import comfy.ldm.common_dit
 from einops import rearrange
 import math
@@ -262,8 +261,8 @@ class CrossAttention(nn.Module):
         self.heads = heads
         self.dim_head = dim_head
 
-        self.q_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
-        self.k_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
+        self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
+        self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
 
         self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
         self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)

From 6d32dc049e676a1373dddc4fd912f2ba1311d4cb Mon Sep 17 00:00:00 2001
From: Chenlei Hu <hcl@comfy.org>
Date: Thu, 1 May 2025 10:57:54 -0400
Subject: [PATCH 07/16] Update frontend to v1.18 (#7898)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index f64a05947..74a4ceb02 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.17.11
+comfyui-frontend-package==1.18.5
 comfyui-workflow-templates==0.1.3
 torch
 torchsde

From 8d0661d0ba35fd54a72e4b49f68d4625febafc10 Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Thu, 1 May 2025 19:32:04 -0400
Subject: [PATCH 08/16] Lint instance methods (#7903)

---
 comfy_extras/nodes_post_processing.py | 1 +
 comfy_extras/nodes_webcam.py          | 2 +-
 pyproject.toml                        | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py
index 5b9542015..cb1a0d883 100644
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@@ -141,6 +141,7 @@ class Quantize:
 
     CATEGORY = "image/postprocessing"
 
+    @staticmethod
     def bayer(im, pal_im, order):
         def normalized_bayer_matrix(n):
             if n == 0:
diff --git a/comfy_extras/nodes_webcam.py b/comfy_extras/nodes_webcam.py
index 31eddb2d6..062b15cf8 100644
--- a/comfy_extras/nodes_webcam.py
+++ b/comfy_extras/nodes_webcam.py
@@ -20,7 +20,7 @@ class WebcamCapture(nodes.LoadImage):
 
     CATEGORY = "image"
 
-    def load_capture(s, image, **kwargs):
+    def load_capture(self, image, **kwargs):
         return super().load_image(folder_paths.get_annotated_filepath(image))
 
 
diff --git a/pyproject.toml b/pyproject.toml
index eadca662e..a9c028c7e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ documentation = "https://docs.comfy.org/"
 
 [tool.ruff]
 lint.select = [
+  "N805", # invalid-first-argument-name-for-method
   "S307", # suspicious-eval-usage
   "S102", # exec
   "T",    # print-usage

From ff99861650a195234f2858b864663920ca811c55 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 2 May 2025 02:15:32 -0700
Subject: [PATCH 09/16] Make clipsave work with more TE models. (#7908)

---
 comfy_extras/nodes_model_merging.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy_extras/nodes_model_merging.py b/comfy_extras/nodes_model_merging.py
index 78d284889..f20beab7d 100644
--- a/comfy_extras/nodes_model_merging.py
+++ b/comfy_extras/nodes_model_merging.py
@@ -276,7 +276,7 @@ class CLIPSave:
         comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True)
         clip_sd = clip.get_sd()
 
-        for prefix in ["clip_l.", "clip_g.", ""]:
+        for prefix in ["clip_l.", "clip_g.", "clip_h.", "t5xxl.", "pile_t5xl.", "mt5xl.", "umt5xxl.", "t5base.", "gemma2_2b.", "llama.", "hydit_clip.", ""]:
             k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys()))
             current_clip_sd = {}
             for x in k:

From 551fe8dceebb07abed486580d8326a6202d0cf7a Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Fri, 2 May 2025 05:28:05 -0400
Subject: [PATCH 10/16] Add node to extend sigmas (#7901)

* Add ExpandSigmas node

* Rename, add interpolation functions

Co-authored-by: liesen <liesen.dev@gmail.com>

* Move computed interpolation outside loop

* Add type hints

---------

Co-authored-by: liesen <liesen.dev@gmail.com>
---
 comfy_extras/nodes_custom_sampler.py | 51 ++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index c9689b745..3e5be3d3c 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -1,3 +1,4 @@
+import math
 import comfy.samplers
 import comfy.sample
 from comfy.k_diffusion import sampling as k_diffusion_sampling
@@ -249,6 +250,55 @@ class SetFirstSigma:
         sigmas[0] = sigma
         return (sigmas, )
 
+class ExtendIntermediateSigmas:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required":
+                    {"sigmas": ("SIGMAS", ),
+                     "steps": ("INT", {"default": 2, "min": 1, "max": 100}),
+                     "start_at_sigma": ("FLOAT", {"default": -1.0, "min": -1.0, "max": 20000.0, "step": 0.01, "round": False}),
+                     "end_at_sigma": ("FLOAT", {"default": 12.0, "min":  0.0, "max": 20000.0, "step": 0.01, "round": False}),
+                     "spacing": (['linear', 'cosine', 'sine'],),
+                    }
+               }
+    RETURN_TYPES = ("SIGMAS",)
+    CATEGORY = "sampling/custom_sampling/sigmas"
+
+    FUNCTION = "extend"
+
+    def extend(self, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str):
+        if start_at_sigma < 0:
+            start_at_sigma = float("inf")
+
+        interpolator = {
+            'linear': lambda x: x,
+            'cosine': lambda x: torch.sin(x*math.pi/2),
+            'sine':   lambda x: 1 - torch.cos(x*math.pi/2)
+        }[spacing]
+
+        # linear space for our interpolation function
+        x = torch.linspace(0, 1, steps + 1, device=sigmas.device)[1:-1]
+        computed_spacing = interpolator(x)
+
+        extended_sigmas = []
+        for i in range(len(sigmas) - 1):
+            sigma_current = sigmas[i]
+            sigma_next = sigmas[i+1]
+
+            extended_sigmas.append(sigma_current)
+
+            if end_at_sigma <= sigma_current <= start_at_sigma:
+                interpolated_steps = computed_spacing * (sigma_next - sigma_current) + sigma_current
+                extended_sigmas.extend(interpolated_steps.tolist())
+
+        # Add the last sigma value
+        if len(sigmas) > 0:
+            extended_sigmas.append(sigmas[-1])
+
+        extended_sigmas = torch.FloatTensor(extended_sigmas)
+
+        return (extended_sigmas,)
+
 class KSamplerSelect:
     @classmethod
     def INPUT_TYPES(s):
@@ -735,6 +785,7 @@ NODE_CLASS_MAPPINGS = {
     "SplitSigmasDenoise": SplitSigmasDenoise,
     "FlipSigmas": FlipSigmas,
     "SetFirstSigma": SetFirstSigma,
+    "ExtendIntermediateSigmas": ExtendIntermediateSigmas,
 
     "CFGGuider": CFGGuider,
     "DualCFGGuider": DualCFGGuider,

From d9a87c1e6a390eb0e915b544e35baf4d807919db Mon Sep 17 00:00:00 2001
From: catboxanon <122327233+catboxanon@users.noreply.github.com>
Date: Fri, 2 May 2025 05:28:27 -0400
Subject: [PATCH 11/16] Fix outdated comment about Internet connectivity
 (#7827)

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index f3f56597a..5c21542b3 100644
--- a/main.py
+++ b/main.py
@@ -13,7 +13,7 @@ import logging
 import sys
 
 if __name__ == "__main__":
-    #NOTE: These do not do anything on core ComfyUI which should already have no communication with the internet, they are for custom nodes.
+    #NOTE: These do not do anything on core ComfyUI, they are for custom nodes.
     os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
     os.environ['DO_NOT_TRACK'] = '1'
 

From 2ab9618732b738b52c96ae128597371b9b9fff96 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 3 May 2025 01:12:37 +0800
Subject: [PATCH 12/16] Fix the bugs in OFT/BOFT moule (#7909)

* Correct calculate_weight and load for OFT

* Correct calculate_weight and loading for BOFT
---
 comfy/weight_adapter/boft.py | 34 +++++++++++++++++-----------------
 comfy/weight_adapter/oft.py  | 20 +++++++++++---------
 2 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/comfy/weight_adapter/boft.py b/comfy/weight_adapter/boft.py
index c85adc7ab..b2a2f1bd4 100644
--- a/comfy/weight_adapter/boft.py
+++ b/comfy/weight_adapter/boft.py
@@ -24,7 +24,7 @@ class BOFTAdapter(WeightAdapterBase):
     ) -> Optional["BOFTAdapter"]:
         if loaded_keys is None:
             loaded_keys = set()
-        blocks_name = "{}.boft_blocks".format(x)
+        blocks_name = "{}.oft_blocks".format(x)
         rescale_name = "{}.rescale".format(x)
 
         blocks = None
@@ -32,17 +32,18 @@ class BOFTAdapter(WeightAdapterBase):
             blocks = lora[blocks_name]
             if blocks.ndim == 4:
                 loaded_keys.add(blocks_name)
+            else:
+                blocks = None
+        if blocks is None:
+            return None
 
         rescale = None
         if rescale_name in lora.keys():
             rescale = lora[rescale_name]
             loaded_keys.add(rescale_name)
 
-        if blocks is not None:
-            weights = (blocks, rescale, alpha, dora_scale)
-            return cls(loaded_keys, weights)
-        else:
-            return None
+        weights = (blocks, rescale, alpha, dora_scale)
+        return cls(loaded_keys, weights)
 
     def calculate_weight(
         self,
@@ -71,7 +72,7 @@ class BOFTAdapter(WeightAdapterBase):
             # Get r
             I = torch.eye(boft_b, device=blocks.device, dtype=blocks.dtype)
             # for Q = -Q^T
-            q = blocks - blocks.transpose(1, 2)
+            q = blocks - blocks.transpose(-1, -2)
             normed_q = q
             if alpha > 0: # alpha in boft/bboft is for constraint
                 q_norm = torch.norm(q) + 1e-8
@@ -79,9 +80,8 @@ class BOFTAdapter(WeightAdapterBase):
                     normed_q = q * alpha / q_norm
             # use float() to prevent unsupported type in .inverse()
             r = (I + normed_q) @ (I - normed_q).float().inverse()
-            r = r.to(original_weight)
-
-            inp = org = original_weight
+            r = r.to(weight)
+            inp = org = weight
 
             r_b = boft_b//2
             for i in range(boft_m):
@@ -91,14 +91,14 @@ class BOFTAdapter(WeightAdapterBase):
                 if strength != 1:
                     bi = bi * strength + (1-strength) * I
                 inp = (
-                    inp.unflatten(-1, (-1, g, k))
-                    .transpose(-2, -1)
-                    .flatten(-3)
-                    .unflatten(-1, (-1, boft_b))
+                    inp.unflatten(0, (-1, g, k))
+                    .transpose(1, 2)
+                    .flatten(0, 2)
+                    .unflatten(0, (-1, boft_b))
                 )
-                inp = torch.einsum("b n m, b n ... -> b m ...", inp, bi)
+                inp = torch.einsum("b i j, b j ...-> b i ...", bi, inp)
                 inp = (
-                    inp.flatten(-2).unflatten(-1, (-1, k, g)).transpose(-2, -1).flatten(-3)
+                    inp.flatten(0, 1).unflatten(0, (-1, k, g)).transpose(1, 2).flatten(0, 2)
                 )
 
             if rescale is not None:
@@ -109,7 +109,7 @@ class BOFTAdapter(WeightAdapterBase):
             if dora_scale is not None:
                 weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
             else:
-                weight += function(((strength * alpha) * lora_diff).type(weight.dtype))
+                weight += function((strength * lora_diff).type(weight.dtype))
         except Exception as e:
             logging.error("ERROR {} {} {}".format(self.name, key, e))
         return weight
diff --git a/comfy/weight_adapter/oft.py b/comfy/weight_adapter/oft.py
index 0ea229b79..25009eca3 100644
--- a/comfy/weight_adapter/oft.py
+++ b/comfy/weight_adapter/oft.py
@@ -32,17 +32,18 @@ class OFTAdapter(WeightAdapterBase):
             blocks = lora[blocks_name]
             if blocks.ndim == 3:
                 loaded_keys.add(blocks_name)
+            else:
+                blocks = None
+        if blocks is None:
+            return None
 
         rescale = None
         if rescale_name in lora.keys():
             rescale = lora[rescale_name]
             loaded_keys.add(rescale_name)
 
-        if blocks is not None:
-            weights = (blocks, rescale, alpha, dora_scale)
-            return cls(loaded_keys, weights)
-        else:
-            return None
+        weights = (blocks, rescale, alpha, dora_scale)
+        return cls(loaded_keys, weights)
 
     def calculate_weight(
         self,
@@ -79,16 +80,17 @@ class OFTAdapter(WeightAdapterBase):
                     normed_q = q * alpha / q_norm
             # use float() to prevent unsupported type in .inverse()
             r = (I + normed_q) @ (I - normed_q).float().inverse()
-            r = r.to(original_weight)
+            r = r.to(weight)
+            _, *shape = weight.shape
             lora_diff = torch.einsum(
                 "k n m, k n ... -> k m ...",
                 (r * strength) - strength * I,
-                original_weight,
-            )
+                weight.view(block_num, block_size, *shape),
+            ).view(-1, *shape)
             if dora_scale is not None:
                 weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
             else:
-                weight += function(((strength * alpha) * lora_diff).type(weight.dtype))
+                weight += function((strength * lora_diff).type(weight.dtype))
         except Exception as e:
             logging.error("ERROR {} {} {}".format(self.name, key, e))
         return weight

From 530494588d9e58a8bcaf55b471f4c3ce2b97ce65 Mon Sep 17 00:00:00 2001
From: Chenlei Hu <hcl@comfy.org>
Date: Fri, 2 May 2025 13:14:52 -0400
Subject: [PATCH 13/16] [BugFix] Update frontend 1.18.6 (#7910)

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 74a4ceb02..05ceba00a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.18.5
+comfyui-frontend-package==1.18.6
 comfyui-workflow-templates==0.1.3
 torch
 torchsde

From 065d855f14968406051a1340e3f2f26461a00e5d Mon Sep 17 00:00:00 2001
From: Terry Jia <terryjia88@gmail.com>
Date: Fri, 2 May 2025 13:15:54 -0400
Subject: [PATCH 14/16] upstream Preview Any from rgthree-comfy (#7815)

* upstream Preview Any from rgthree-comfy

* use IO.ANY
---
 comfy_extras/nodes_preview_any.py | 43 +++++++++++++++++++++++++++++++
 nodes.py                          |  1 +
 2 files changed, 44 insertions(+)
 create mode 100644 comfy_extras/nodes_preview_any.py

diff --git a/comfy_extras/nodes_preview_any.py b/comfy_extras/nodes_preview_any.py
new file mode 100644
index 000000000..e6805696f
--- /dev/null
+++ b/comfy_extras/nodes_preview_any.py
@@ -0,0 +1,43 @@
+import json
+from comfy.comfy_types.node_typing import IO
+
+# Preview Any - original implement from
+# https://github.com/rgthree/rgthree-comfy/blob/main/py/display_any.py
+# upstream requested in https://github.com/Kosinkadink/rfcs/blob/main/rfcs/0000-corenodes.md#preview-nodes
+class PreviewAny():
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {"source": (IO.ANY, {})},
+        }
+
+    RETURN_TYPES = ()
+    FUNCTION = "main"
+    OUTPUT_NODE = True
+
+    CATEGORY = "utils"
+
+    def main(self, source=None):
+        value = 'None'
+        if isinstance(source, str):
+            value = source
+        elif isinstance(source, (int, float, bool)):
+            value = str(source)
+        elif source is not None:
+            try:
+                value = json.dumps(source)
+            except Exception:
+                try:
+                    value = str(source)
+                except Exception:
+                    value = 'source exists, but could not be serialized.'
+
+        return {"ui": {"text": (value,)}}
+
+NODE_CLASS_MAPPINGS = {
+    "PreviewAny": PreviewAny,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "PreviewAny": "Preview Any",
+}
diff --git a/nodes.py b/nodes.py
index f2ced2c35..92b8ca6ae 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2258,6 +2258,7 @@ def init_builtin_extra_nodes():
         "nodes_optimalsteps.py",
         "nodes_hidream.py",
         "nodes_fresca.py",
+        "nodes_preview_any.py",
     ]
 
     api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes")

From 486ad8fdc55495a705a211c22cc50bec9ec95643 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Fri, 2 May 2025 21:28:10 -0700
Subject: [PATCH 15/16] Fix updater issue with newer portable. (#7917)

---
 .ci/update_windows/update.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.ci/update_windows/update.py b/.ci/update_windows/update.py
index 731b6bc53..51a263203 100755
--- a/.ci/update_windows/update.py
+++ b/.ci/update_windows/update.py
@@ -63,7 +63,12 @@ except:
 print("checking out master branch")  # noqa: T201
 branch = repo.lookup_branch('master')
 if branch is None:
-    ref = repo.lookup_reference('refs/remotes/origin/master')
+    try:
+        ref = repo.lookup_reference('refs/remotes/origin/master')
+    except:
+        print("pulling.")  # noqa: T201
+        pull(repo)
+        ref = repo.lookup_reference('refs/remotes/origin/master')
     repo.checkout(ref)
     branch = repo.lookup_branch('master')
     if branch is None:

From 7689917113fe521adfaba2a4fff952ef1805ad2b Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sat, 3 May 2025 00:34:01 -0400
Subject: [PATCH 16/16] ComfyUI version 0.3.31

---
 comfyui_version.py | 2 +-
 pyproject.toml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfyui_version.py b/comfyui_version.py
index 67d27f942..8d2068de7 100644
--- a/comfyui_version.py
+++ b/comfyui_version.py
@@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.30"
+__version__ = "0.3.31"
diff --git a/pyproject.toml b/pyproject.toml
index a9c028c7e..8b549a0b6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.30"
+version = "0.3.31"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"