mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-18 02:23:06 +08:00
Merge branch 'master' into dr-support-pip-cm
This commit is contained in:
commit
28d23a7813
@ -63,7 +63,12 @@ except:
|
|||||||
print("checking out master branch") # noqa: T201
|
print("checking out master branch") # noqa: T201
|
||||||
branch = repo.lookup_branch('master')
|
branch = repo.lookup_branch('master')
|
||||||
if branch is None:
|
if branch is None:
|
||||||
ref = repo.lookup_reference('refs/remotes/origin/master')
|
try:
|
||||||
|
ref = repo.lookup_reference('refs/remotes/origin/master')
|
||||||
|
except:
|
||||||
|
print("pulling.") # noqa: T201
|
||||||
|
pull(repo)
|
||||||
|
ref = repo.lookup_reference('refs/remotes/origin/master')
|
||||||
repo.checkout(ref)
|
repo.checkout(ref)
|
||||||
branch = repo.lookup_branch('master')
|
branch = repo.lookup_branch('master')
|
||||||
if branch is None:
|
if branch is None:
|
||||||
|
|||||||
2
.github/workflows/stable-release.yml
vendored
2
.github/workflows/stable-release.yml
vendored
@ -91,6 +91,8 @@ jobs:
|
|||||||
cd ComfyUI_windows_portable
|
cd ComfyUI_windows_portable
|
||||||
python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
|
python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
|
||||||
|
|
||||||
|
python_embeded/python.exe -s ./update/update.py ComfyUI/
|
||||||
|
|
||||||
ls
|
ls
|
||||||
|
|
||||||
- name: Upload binaries to release
|
- name: Upload binaries to release
|
||||||
|
|||||||
@ -88,6 +88,8 @@ jobs:
|
|||||||
cd ComfyUI_windows_portable
|
cd ComfyUI_windows_portable
|
||||||
python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
|
python_embeded/python.exe -s ComfyUI/main.py --quick-test-for-ci --cpu
|
||||||
|
|
||||||
|
python_embeded/python.exe -s ./update/update.py ComfyUI/
|
||||||
|
|
||||||
ls
|
ls
|
||||||
|
|
||||||
- name: Upload binaries to release
|
- name: Upload binaries to release
|
||||||
|
|||||||
183
comfy/ldm/chroma/layers.py
Normal file
183
comfy/ldm/chroma/layers.py
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
import torch
|
||||||
|
from torch import Tensor, nn
|
||||||
|
|
||||||
|
from comfy.ldm.flux.math import attention
|
||||||
|
from comfy.ldm.flux.layers import (
|
||||||
|
MLPEmbedder,
|
||||||
|
RMSNorm,
|
||||||
|
QKNorm,
|
||||||
|
SelfAttention,
|
||||||
|
ModulationOut,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ChromaModulationOut(ModulationOut):
|
||||||
|
@classmethod
|
||||||
|
def from_offset(cls, tensor: torch.Tensor, offset: int = 0) -> ModulationOut:
|
||||||
|
return cls(
|
||||||
|
shift=tensor[:, offset : offset + 1, :],
|
||||||
|
scale=tensor[:, offset + 1 : offset + 2, :],
|
||||||
|
gate=tensor[:, offset + 2 : offset + 3, :],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Approximator(nn.Module):
|
||||||
|
def __init__(self, in_dim: int, out_dim: int, hidden_dim: int, n_layers = 5, dtype=None, device=None, operations=None):
|
||||||
|
super().__init__()
|
||||||
|
self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device)
|
||||||
|
self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
|
||||||
|
self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)])
|
||||||
|
self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def device(self):
|
||||||
|
# Get the device of the module (assumes all parameters are on the same device)
|
||||||
|
return next(self.parameters()).device
|
||||||
|
|
||||||
|
def forward(self, x: Tensor) -> Tensor:
|
||||||
|
x = self.in_proj(x)
|
||||||
|
|
||||||
|
for layer, norms in zip(self.layers, self.norms):
|
||||||
|
x = x + layer(norms(x))
|
||||||
|
|
||||||
|
x = self.out_proj(x)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class DoubleStreamBlock(nn.Module):
|
||||||
|
def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
mlp_hidden_dim = int(hidden_size * mlp_ratio)
|
||||||
|
self.num_heads = num_heads
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.img_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
self.img_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
|
||||||
|
|
||||||
|
self.img_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
self.img_mlp = nn.Sequential(
|
||||||
|
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
|
||||||
|
nn.GELU(approximate="tanh"),
|
||||||
|
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.txt_norm1 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
self.txt_attn = SelfAttention(dim=hidden_size, num_heads=num_heads, qkv_bias=qkv_bias, dtype=dtype, device=device, operations=operations)
|
||||||
|
|
||||||
|
self.txt_norm2 = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
self.txt_mlp = nn.Sequential(
|
||||||
|
operations.Linear(hidden_size, mlp_hidden_dim, bias=True, dtype=dtype, device=device),
|
||||||
|
nn.GELU(approximate="tanh"),
|
||||||
|
operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device),
|
||||||
|
)
|
||||||
|
self.flipped_img_txt = flipped_img_txt
|
||||||
|
|
||||||
|
def forward(self, img: Tensor, txt: Tensor, pe: Tensor, vec: Tensor, attn_mask=None):
|
||||||
|
(img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec
|
||||||
|
|
||||||
|
# prepare image for attention
|
||||||
|
img_modulated = self.img_norm1(img)
|
||||||
|
img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift
|
||||||
|
img_qkv = self.img_attn.qkv(img_modulated)
|
||||||
|
img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
img_q, img_k = self.img_attn.norm(img_q, img_k, img_v)
|
||||||
|
|
||||||
|
# prepare txt for attention
|
||||||
|
txt_modulated = self.txt_norm1(txt)
|
||||||
|
txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift
|
||||||
|
txt_qkv = self.txt_attn.qkv(txt_modulated)
|
||||||
|
txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v)
|
||||||
|
|
||||||
|
# run actual attention
|
||||||
|
attn = attention(torch.cat((txt_q, img_q), dim=2),
|
||||||
|
torch.cat((txt_k, img_k), dim=2),
|
||||||
|
torch.cat((txt_v, img_v), dim=2),
|
||||||
|
pe=pe, mask=attn_mask)
|
||||||
|
|
||||||
|
txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1] :]
|
||||||
|
|
||||||
|
# calculate the img bloks
|
||||||
|
img = img + img_mod1.gate * self.img_attn.proj(img_attn)
|
||||||
|
img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift)
|
||||||
|
|
||||||
|
# calculate the txt bloks
|
||||||
|
txt += txt_mod1.gate * self.txt_attn.proj(txt_attn)
|
||||||
|
txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift)
|
||||||
|
|
||||||
|
if txt.dtype == torch.float16:
|
||||||
|
txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504)
|
||||||
|
|
||||||
|
return img, txt
|
||||||
|
|
||||||
|
|
||||||
|
class SingleStreamBlock(nn.Module):
|
||||||
|
"""
|
||||||
|
A DiT block with parallel linear layers as described in
|
||||||
|
https://arxiv.org/abs/2302.05442 and adapted modulation interface.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
hidden_size: int,
|
||||||
|
num_heads: int,
|
||||||
|
mlp_ratio: float = 4.0,
|
||||||
|
qk_scale: float = None,
|
||||||
|
dtype=None,
|
||||||
|
device=None,
|
||||||
|
operations=None
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.hidden_dim = hidden_size
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = hidden_size // num_heads
|
||||||
|
self.scale = qk_scale or head_dim**-0.5
|
||||||
|
|
||||||
|
self.mlp_hidden_dim = int(hidden_size * mlp_ratio)
|
||||||
|
# qkv and mlp_in
|
||||||
|
self.linear1 = operations.Linear(hidden_size, hidden_size * 3 + self.mlp_hidden_dim, dtype=dtype, device=device)
|
||||||
|
# proj and mlp_out
|
||||||
|
self.linear2 = operations.Linear(hidden_size + self.mlp_hidden_dim, hidden_size, dtype=dtype, device=device)
|
||||||
|
|
||||||
|
self.norm = QKNorm(head_dim, dtype=dtype, device=device, operations=operations)
|
||||||
|
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.pre_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
|
||||||
|
self.mlp_act = nn.GELU(approximate="tanh")
|
||||||
|
|
||||||
|
def forward(self, x: Tensor, pe: Tensor, vec: Tensor, attn_mask=None) -> Tensor:
|
||||||
|
mod = vec
|
||||||
|
x_mod = (1 + mod.scale) * self.pre_norm(x) + mod.shift
|
||||||
|
qkv, mlp = torch.split(self.linear1(x_mod), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1)
|
||||||
|
|
||||||
|
q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k = self.norm(q, k, v)
|
||||||
|
|
||||||
|
# compute attention
|
||||||
|
attn = attention(q, k, v, pe=pe, mask=attn_mask)
|
||||||
|
# compute activation in mlp stream, cat again and run second linear layer
|
||||||
|
output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2))
|
||||||
|
x += mod.gate * output
|
||||||
|
if x.dtype == torch.float16:
|
||||||
|
x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class LastLayer(nn.Module):
|
||||||
|
def __init__(self, hidden_size: int, patch_size: int, out_channels: int, dtype=None, device=None, operations=None):
|
||||||
|
super().__init__()
|
||||||
|
self.norm_final = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6, dtype=dtype, device=device)
|
||||||
|
self.linear = operations.Linear(hidden_size, out_channels, bias=True, dtype=dtype, device=device)
|
||||||
|
|
||||||
|
def forward(self, x: Tensor, vec: Tensor) -> Tensor:
|
||||||
|
shift, scale = vec
|
||||||
|
shift = shift.squeeze(1)
|
||||||
|
scale = scale.squeeze(1)
|
||||||
|
x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :]
|
||||||
|
x = self.linear(x)
|
||||||
|
return x
|
||||||
271
comfy/ldm/chroma/model.py
Normal file
271
comfy/ldm/chroma/model.py
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
#Original code can be found on: https://github.com/black-forest-labs/flux
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import Tensor, nn
|
||||||
|
from einops import rearrange, repeat
|
||||||
|
import comfy.ldm.common_dit
|
||||||
|
|
||||||
|
from comfy.ldm.flux.layers import (
|
||||||
|
EmbedND,
|
||||||
|
timestep_embedding,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .layers import (
|
||||||
|
DoubleStreamBlock,
|
||||||
|
LastLayer,
|
||||||
|
SingleStreamBlock,
|
||||||
|
Approximator,
|
||||||
|
ChromaModulationOut,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ChromaParams:
|
||||||
|
in_channels: int
|
||||||
|
out_channels: int
|
||||||
|
context_in_dim: int
|
||||||
|
hidden_size: int
|
||||||
|
mlp_ratio: float
|
||||||
|
num_heads: int
|
||||||
|
depth: int
|
||||||
|
depth_single_blocks: int
|
||||||
|
axes_dim: list
|
||||||
|
theta: int
|
||||||
|
patch_size: int
|
||||||
|
qkv_bias: bool
|
||||||
|
in_dim: int
|
||||||
|
out_dim: int
|
||||||
|
hidden_dim: int
|
||||||
|
n_layers: int
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Chroma(nn.Module):
|
||||||
|
"""
|
||||||
|
Transformer model for flow matching on sequences.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, image_model=None, final_layer=True, dtype=None, device=None, operations=None, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
self.dtype = dtype
|
||||||
|
params = ChromaParams(**kwargs)
|
||||||
|
self.params = params
|
||||||
|
self.patch_size = params.patch_size
|
||||||
|
self.in_channels = params.in_channels
|
||||||
|
self.out_channels = params.out_channels
|
||||||
|
if params.hidden_size % params.num_heads != 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"Hidden size {params.hidden_size} must be divisible by num_heads {params.num_heads}"
|
||||||
|
)
|
||||||
|
pe_dim = params.hidden_size // params.num_heads
|
||||||
|
if sum(params.axes_dim) != pe_dim:
|
||||||
|
raise ValueError(f"Got {params.axes_dim} but expected positional dim {pe_dim}")
|
||||||
|
self.hidden_size = params.hidden_size
|
||||||
|
self.num_heads = params.num_heads
|
||||||
|
self.in_dim = params.in_dim
|
||||||
|
self.out_dim = params.out_dim
|
||||||
|
self.hidden_dim = params.hidden_dim
|
||||||
|
self.n_layers = params.n_layers
|
||||||
|
self.pe_embedder = EmbedND(dim=pe_dim, theta=params.theta, axes_dim=params.axes_dim)
|
||||||
|
self.img_in = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
|
||||||
|
self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, dtype=dtype, device=device)
|
||||||
|
# set as nn identity for now, will overwrite it later.
|
||||||
|
self.distilled_guidance_layer = Approximator(
|
||||||
|
in_dim=self.in_dim,
|
||||||
|
hidden_dim=self.hidden_dim,
|
||||||
|
out_dim=self.out_dim,
|
||||||
|
n_layers=self.n_layers,
|
||||||
|
dtype=dtype, device=device, operations=operations
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
self.double_blocks = nn.ModuleList(
|
||||||
|
[
|
||||||
|
DoubleStreamBlock(
|
||||||
|
self.hidden_size,
|
||||||
|
self.num_heads,
|
||||||
|
mlp_ratio=params.mlp_ratio,
|
||||||
|
qkv_bias=params.qkv_bias,
|
||||||
|
dtype=dtype, device=device, operations=operations
|
||||||
|
)
|
||||||
|
for _ in range(params.depth)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.single_blocks = nn.ModuleList(
|
||||||
|
[
|
||||||
|
SingleStreamBlock(self.hidden_size, self.num_heads, mlp_ratio=params.mlp_ratio, dtype=dtype, device=device, operations=operations)
|
||||||
|
for _ in range(params.depth_single_blocks)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if final_layer:
|
||||||
|
self.final_layer = LastLayer(self.hidden_size, 1, self.out_channels, dtype=dtype, device=device, operations=operations)
|
||||||
|
|
||||||
|
self.skip_mmdit = []
|
||||||
|
self.skip_dit = []
|
||||||
|
self.lite = False
|
||||||
|
|
||||||
|
def get_modulations(self, tensor: torch.Tensor, block_type: str, *, idx: int = 0):
|
||||||
|
# This function slices up the modulations tensor which has the following layout:
|
||||||
|
# single : num_single_blocks * 3 elements
|
||||||
|
# double_img : num_double_blocks * 6 elements
|
||||||
|
# double_txt : num_double_blocks * 6 elements
|
||||||
|
# final : 2 elements
|
||||||
|
if block_type == "final":
|
||||||
|
return (tensor[:, -2:-1, :], tensor[:, -1:, :])
|
||||||
|
single_block_count = self.params.depth_single_blocks
|
||||||
|
double_block_count = self.params.depth
|
||||||
|
offset = 3 * idx
|
||||||
|
if block_type == "single":
|
||||||
|
return ChromaModulationOut.from_offset(tensor, offset)
|
||||||
|
# Double block modulations are 6 elements so we double 3 * idx.
|
||||||
|
offset *= 2
|
||||||
|
if block_type in {"double_img", "double_txt"}:
|
||||||
|
# Advance past the single block modulations.
|
||||||
|
offset += 3 * single_block_count
|
||||||
|
if block_type == "double_txt":
|
||||||
|
# Advance past the double block img modulations.
|
||||||
|
offset += 6 * double_block_count
|
||||||
|
return (
|
||||||
|
ChromaModulationOut.from_offset(tensor, offset),
|
||||||
|
ChromaModulationOut.from_offset(tensor, offset + 3),
|
||||||
|
)
|
||||||
|
raise ValueError("Bad block_type")
|
||||||
|
|
||||||
|
|
||||||
|
def forward_orig(
|
||||||
|
self,
|
||||||
|
img: Tensor,
|
||||||
|
img_ids: Tensor,
|
||||||
|
txt: Tensor,
|
||||||
|
txt_ids: Tensor,
|
||||||
|
timesteps: Tensor,
|
||||||
|
guidance: Tensor = None,
|
||||||
|
control = None,
|
||||||
|
transformer_options={},
|
||||||
|
attn_mask: Tensor = None,
|
||||||
|
) -> Tensor:
|
||||||
|
patches_replace = transformer_options.get("patches_replace", {})
|
||||||
|
if img.ndim != 3 or txt.ndim != 3:
|
||||||
|
raise ValueError("Input img and txt tensors must have 3 dimensions.")
|
||||||
|
|
||||||
|
# running on sequences img
|
||||||
|
img = self.img_in(img)
|
||||||
|
|
||||||
|
# distilled vector guidance
|
||||||
|
mod_index_length = 344
|
||||||
|
distill_timestep = timestep_embedding(timesteps.detach().clone(), 16).to(img.device, img.dtype)
|
||||||
|
# guidance = guidance *
|
||||||
|
distil_guidance = timestep_embedding(guidance.detach().clone(), 16).to(img.device, img.dtype)
|
||||||
|
|
||||||
|
# get all modulation index
|
||||||
|
modulation_index = timestep_embedding(torch.arange(mod_index_length), 32).to(img.device, img.dtype)
|
||||||
|
# we need to broadcast the modulation index here so each batch has all of the index
|
||||||
|
modulation_index = modulation_index.unsqueeze(0).repeat(img.shape[0], 1, 1).to(img.device, img.dtype)
|
||||||
|
# and we need to broadcast timestep and guidance along too
|
||||||
|
timestep_guidance = torch.cat([distill_timestep, distil_guidance], dim=1).unsqueeze(1).repeat(1, mod_index_length, 1).to(img.dtype).to(img.device, img.dtype)
|
||||||
|
# then and only then we could concatenate it together
|
||||||
|
input_vec = torch.cat([timestep_guidance, modulation_index], dim=-1).to(img.device, img.dtype)
|
||||||
|
|
||||||
|
mod_vectors = self.distilled_guidance_layer(input_vec)
|
||||||
|
|
||||||
|
txt = self.txt_in(txt)
|
||||||
|
|
||||||
|
ids = torch.cat((txt_ids, img_ids), dim=1)
|
||||||
|
pe = self.pe_embedder(ids)
|
||||||
|
|
||||||
|
blocks_replace = patches_replace.get("dit", {})
|
||||||
|
for i, block in enumerate(self.double_blocks):
|
||||||
|
if i not in self.skip_mmdit:
|
||||||
|
double_mod = (
|
||||||
|
self.get_modulations(mod_vectors, "double_img", idx=i),
|
||||||
|
self.get_modulations(mod_vectors, "double_txt", idx=i),
|
||||||
|
)
|
||||||
|
if ("double_block", i) in blocks_replace:
|
||||||
|
def block_wrap(args):
|
||||||
|
out = {}
|
||||||
|
out["img"], out["txt"] = block(img=args["img"],
|
||||||
|
txt=args["txt"],
|
||||||
|
vec=args["vec"],
|
||||||
|
pe=args["pe"],
|
||||||
|
attn_mask=args.get("attn_mask"))
|
||||||
|
return out
|
||||||
|
|
||||||
|
out = blocks_replace[("double_block", i)]({"img": img,
|
||||||
|
"txt": txt,
|
||||||
|
"vec": double_mod,
|
||||||
|
"pe": pe,
|
||||||
|
"attn_mask": attn_mask},
|
||||||
|
{"original_block": block_wrap})
|
||||||
|
txt = out["txt"]
|
||||||
|
img = out["img"]
|
||||||
|
else:
|
||||||
|
img, txt = block(img=img,
|
||||||
|
txt=txt,
|
||||||
|
vec=double_mod,
|
||||||
|
pe=pe,
|
||||||
|
attn_mask=attn_mask)
|
||||||
|
|
||||||
|
if control is not None: # Controlnet
|
||||||
|
control_i = control.get("input")
|
||||||
|
if i < len(control_i):
|
||||||
|
add = control_i[i]
|
||||||
|
if add is not None:
|
||||||
|
img += add
|
||||||
|
|
||||||
|
img = torch.cat((txt, img), 1)
|
||||||
|
|
||||||
|
for i, block in enumerate(self.single_blocks):
|
||||||
|
if i not in self.skip_dit:
|
||||||
|
single_mod = self.get_modulations(mod_vectors, "single", idx=i)
|
||||||
|
if ("single_block", i) in blocks_replace:
|
||||||
|
def block_wrap(args):
|
||||||
|
out = {}
|
||||||
|
out["img"] = block(args["img"],
|
||||||
|
vec=args["vec"],
|
||||||
|
pe=args["pe"],
|
||||||
|
attn_mask=args.get("attn_mask"))
|
||||||
|
return out
|
||||||
|
|
||||||
|
out = blocks_replace[("single_block", i)]({"img": img,
|
||||||
|
"vec": single_mod,
|
||||||
|
"pe": pe,
|
||||||
|
"attn_mask": attn_mask},
|
||||||
|
{"original_block": block_wrap})
|
||||||
|
img = out["img"]
|
||||||
|
else:
|
||||||
|
img = block(img, vec=single_mod, pe=pe, attn_mask=attn_mask)
|
||||||
|
|
||||||
|
if control is not None: # Controlnet
|
||||||
|
control_o = control.get("output")
|
||||||
|
if i < len(control_o):
|
||||||
|
add = control_o[i]
|
||||||
|
if add is not None:
|
||||||
|
img[:, txt.shape[1] :, ...] += add
|
||||||
|
|
||||||
|
img = img[:, txt.shape[1] :, ...]
|
||||||
|
final_mod = self.get_modulations(mod_vectors, "final")
|
||||||
|
img = self.final_layer(img, vec=final_mod) # (N, T, patch_size ** 2 * out_channels)
|
||||||
|
return img
|
||||||
|
|
||||||
|
def forward(self, x, timestep, context, guidance, control=None, transformer_options={}, **kwargs):
|
||||||
|
bs, c, h, w = x.shape
|
||||||
|
patch_size = 2
|
||||||
|
x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))
|
||||||
|
|
||||||
|
img = rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
|
||||||
|
|
||||||
|
h_len = ((h + (patch_size // 2)) // patch_size)
|
||||||
|
w_len = ((w + (patch_size // 2)) // patch_size)
|
||||||
|
img_ids = torch.zeros((h_len, w_len, 3), device=x.device, dtype=x.dtype)
|
||||||
|
img_ids[:, :, 1] = img_ids[:, :, 1] + torch.linspace(0, h_len - 1, steps=h_len, device=x.device, dtype=x.dtype).unsqueeze(1)
|
||||||
|
img_ids[:, :, 2] = img_ids[:, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).unsqueeze(0)
|
||||||
|
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=bs)
|
||||||
|
|
||||||
|
txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype)
|
||||||
|
out = self.forward_orig(img, img_ids, context, txt_ids, timestep, guidance, control, transformer_options, attn_mask=kwargs.get("attention_mask", None))
|
||||||
|
return rearrange(out, "b (h w) (c ph pw) -> b c (h ph) (w pw)", h=h_len, w=w_len, ph=2, pw=2)[:,:,:h,:w]
|
||||||
@ -1,7 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
import comfy.ldm.modules.attention
|
import comfy.ldm.modules.attention
|
||||||
from comfy.ldm.genmo.joint_model.layers import RMSNorm
|
|
||||||
import comfy.ldm.common_dit
|
import comfy.ldm.common_dit
|
||||||
from einops import rearrange
|
from einops import rearrange
|
||||||
import math
|
import math
|
||||||
@ -262,8 +261,8 @@ class CrossAttention(nn.Module):
|
|||||||
self.heads = heads
|
self.heads = heads
|
||||||
self.dim_head = dim_head
|
self.dim_head = dim_head
|
||||||
|
|
||||||
self.q_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
|
self.q_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
|
||||||
self.k_norm = RMSNorm(inner_dim, dtype=dtype, device=device)
|
self.k_norm = operations.RMSNorm(inner_dim, dtype=dtype, device=device)
|
||||||
|
|
||||||
self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
|
self.to_q = operations.Linear(query_dim, inner_dim, bias=True, dtype=dtype, device=device)
|
||||||
self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
|
self.to_k = operations.Linear(context_dim, inner_dim, bias=True, dtype=dtype, device=device)
|
||||||
|
|||||||
@ -38,6 +38,7 @@ import comfy.ldm.lumina.model
|
|||||||
import comfy.ldm.wan.model
|
import comfy.ldm.wan.model
|
||||||
import comfy.ldm.hunyuan3d.model
|
import comfy.ldm.hunyuan3d.model
|
||||||
import comfy.ldm.hidream.model
|
import comfy.ldm.hidream.model
|
||||||
|
import comfy.ldm.chroma.model
|
||||||
|
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
import comfy.patcher_extension
|
import comfy.patcher_extension
|
||||||
@ -786,8 +787,8 @@ class PixArt(BaseModel):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
class Flux(BaseModel):
|
class Flux(BaseModel):
|
||||||
def __init__(self, model_config, model_type=ModelType.FLUX, device=None):
|
def __init__(self, model_config, model_type=ModelType.FLUX, device=None, unet_model=comfy.ldm.flux.model.Flux):
|
||||||
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.flux.model.Flux)
|
super().__init__(model_config, model_type, device=device, unet_model=unet_model)
|
||||||
|
|
||||||
def concat_cond(self, **kwargs):
|
def concat_cond(self, **kwargs):
|
||||||
try:
|
try:
|
||||||
@ -1108,3 +1109,15 @@ class HiDream(BaseModel):
|
|||||||
if image_cond is not None:
|
if image_cond is not None:
|
||||||
out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond))
|
out['image_cond'] = comfy.conds.CONDNoiseShape(self.process_latent_in(image_cond))
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
class Chroma(Flux):
|
||||||
|
def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
|
||||||
|
super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.chroma.model.Chroma)
|
||||||
|
|
||||||
|
def extra_conds(self, **kwargs):
|
||||||
|
out = super().extra_conds(**kwargs)
|
||||||
|
|
||||||
|
guidance = kwargs.get("guidance", 0)
|
||||||
|
if guidance is not None:
|
||||||
|
out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance]))
|
||||||
|
return out
|
||||||
|
|||||||
@ -164,7 +164,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
|||||||
if in_key in state_dict_keys:
|
if in_key in state_dict_keys:
|
||||||
dit_config["in_channels"] = state_dict[in_key].shape[1] // (patch_size * patch_size)
|
dit_config["in_channels"] = state_dict[in_key].shape[1] // (patch_size * patch_size)
|
||||||
dit_config["out_channels"] = 16
|
dit_config["out_channels"] = 16
|
||||||
dit_config["vec_in_dim"] = 768
|
vec_in_key = '{}vector_in.in_layer.weight'.format(key_prefix)
|
||||||
|
if vec_in_key in state_dict_keys:
|
||||||
|
dit_config["vec_in_dim"] = state_dict[vec_in_key].shape[1]
|
||||||
dit_config["context_in_dim"] = 4096
|
dit_config["context_in_dim"] = 4096
|
||||||
dit_config["hidden_size"] = 3072
|
dit_config["hidden_size"] = 3072
|
||||||
dit_config["mlp_ratio"] = 4.0
|
dit_config["mlp_ratio"] = 4.0
|
||||||
@ -174,7 +176,16 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
|||||||
dit_config["axes_dim"] = [16, 56, 56]
|
dit_config["axes_dim"] = [16, 56, 56]
|
||||||
dit_config["theta"] = 10000
|
dit_config["theta"] = 10000
|
||||||
dit_config["qkv_bias"] = True
|
dit_config["qkv_bias"] = True
|
||||||
dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
|
if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma
|
||||||
|
dit_config["image_model"] = "chroma"
|
||||||
|
dit_config["in_channels"] = 64
|
||||||
|
dit_config["out_channels"] = 64
|
||||||
|
dit_config["in_dim"] = 64
|
||||||
|
dit_config["out_dim"] = 3072
|
||||||
|
dit_config["hidden_dim"] = 5120
|
||||||
|
dit_config["n_layers"] = 5
|
||||||
|
else:
|
||||||
|
dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys
|
||||||
return dit_config
|
return dit_config
|
||||||
|
|
||||||
if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview
|
if '{}t5_yproj.weight'.format(key_prefix) in state_dict_keys: #Genmo mochi preview
|
||||||
|
|||||||
@ -714,6 +714,7 @@ class CLIPType(Enum):
|
|||||||
LUMINA2 = 12
|
LUMINA2 = 12
|
||||||
WAN = 13
|
WAN = 13
|
||||||
HIDREAM = 14
|
HIDREAM = 14
|
||||||
|
CHROMA = 15
|
||||||
|
|
||||||
|
|
||||||
def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
|
def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
|
||||||
@ -818,7 +819,7 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
|
|||||||
elif clip_type == CLIPType.LTXV:
|
elif clip_type == CLIPType.LTXV:
|
||||||
clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data))
|
clip_target.clip = comfy.text_encoders.lt.ltxv_te(**t5xxl_detect(clip_data))
|
||||||
clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer
|
clip_target.tokenizer = comfy.text_encoders.lt.LTXVT5Tokenizer
|
||||||
elif clip_type == CLIPType.PIXART:
|
elif clip_type == CLIPType.PIXART or clip_type == CLIPType.CHROMA:
|
||||||
clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data))
|
clip_target.clip = comfy.text_encoders.pixart_t5.pixart_te(**t5xxl_detect(clip_data))
|
||||||
clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer
|
clip_target.tokenizer = comfy.text_encoders.pixart_t5.PixArtTokenizer
|
||||||
elif clip_type == CLIPType.WAN:
|
elif clip_type == CLIPType.WAN:
|
||||||
|
|||||||
@ -1068,7 +1068,34 @@ class HiDream(supported_models_base.BASE):
|
|||||||
def clip_target(self, state_dict={}):
|
def clip_target(self, state_dict={}):
|
||||||
return None # TODO
|
return None # TODO
|
||||||
|
|
||||||
|
class Chroma(supported_models_base.BASE):
|
||||||
|
unet_config = {
|
||||||
|
"image_model": "chroma",
|
||||||
|
}
|
||||||
|
|
||||||
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream]
|
unet_extra_config = {
|
||||||
|
}
|
||||||
|
|
||||||
|
sampling_settings = {
|
||||||
|
"multiplier": 1.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
latent_format = comfy.latent_formats.Flux
|
||||||
|
|
||||||
|
memory_usage_factor = 3.2
|
||||||
|
|
||||||
|
supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
|
||||||
|
|
||||||
|
|
||||||
|
def get_model(self, state_dict, prefix="", device=None):
|
||||||
|
out = model_base.Chroma(self, device=device)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def clip_target(self, state_dict={}):
|
||||||
|
pref = self.text_encoder_key_prefix[0]
|
||||||
|
t5_detect = comfy.text_encoders.sd3_clip.t5_xxl_detect(state_dict, "{}t5xxl.transformer.".format(pref))
|
||||||
|
return supported_models_base.ClipTarget(comfy.text_encoders.pixart_t5.PixArtTokenizer, comfy.text_encoders.pixart_t5.pixart_te(**t5_detect))
|
||||||
|
|
||||||
|
models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, Hunyuan3Dv2mini, Hunyuan3Dv2, HiDream, Chroma]
|
||||||
|
|
||||||
models += [SVD_img2vid]
|
models += [SVD_img2vid]
|
||||||
|
|||||||
@ -24,7 +24,7 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
) -> Optional["BOFTAdapter"]:
|
) -> Optional["BOFTAdapter"]:
|
||||||
if loaded_keys is None:
|
if loaded_keys is None:
|
||||||
loaded_keys = set()
|
loaded_keys = set()
|
||||||
blocks_name = "{}.boft_blocks".format(x)
|
blocks_name = "{}.oft_blocks".format(x)
|
||||||
rescale_name = "{}.rescale".format(x)
|
rescale_name = "{}.rescale".format(x)
|
||||||
|
|
||||||
blocks = None
|
blocks = None
|
||||||
@ -32,17 +32,18 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
blocks = lora[blocks_name]
|
blocks = lora[blocks_name]
|
||||||
if blocks.ndim == 4:
|
if blocks.ndim == 4:
|
||||||
loaded_keys.add(blocks_name)
|
loaded_keys.add(blocks_name)
|
||||||
|
else:
|
||||||
|
blocks = None
|
||||||
|
if blocks is None:
|
||||||
|
return None
|
||||||
|
|
||||||
rescale = None
|
rescale = None
|
||||||
if rescale_name in lora.keys():
|
if rescale_name in lora.keys():
|
||||||
rescale = lora[rescale_name]
|
rescale = lora[rescale_name]
|
||||||
loaded_keys.add(rescale_name)
|
loaded_keys.add(rescale_name)
|
||||||
|
|
||||||
if blocks is not None:
|
weights = (blocks, rescale, alpha, dora_scale)
|
||||||
weights = (blocks, rescale, alpha, dora_scale)
|
return cls(loaded_keys, weights)
|
||||||
return cls(loaded_keys, weights)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def calculate_weight(
|
def calculate_weight(
|
||||||
self,
|
self,
|
||||||
@ -71,7 +72,7 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
# Get r
|
# Get r
|
||||||
I = torch.eye(boft_b, device=blocks.device, dtype=blocks.dtype)
|
I = torch.eye(boft_b, device=blocks.device, dtype=blocks.dtype)
|
||||||
# for Q = -Q^T
|
# for Q = -Q^T
|
||||||
q = blocks - blocks.transpose(1, 2)
|
q = blocks - blocks.transpose(-1, -2)
|
||||||
normed_q = q
|
normed_q = q
|
||||||
if alpha > 0: # alpha in boft/bboft is for constraint
|
if alpha > 0: # alpha in boft/bboft is for constraint
|
||||||
q_norm = torch.norm(q) + 1e-8
|
q_norm = torch.norm(q) + 1e-8
|
||||||
@ -79,9 +80,8 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
normed_q = q * alpha / q_norm
|
normed_q = q * alpha / q_norm
|
||||||
# use float() to prevent unsupported type in .inverse()
|
# use float() to prevent unsupported type in .inverse()
|
||||||
r = (I + normed_q) @ (I - normed_q).float().inverse()
|
r = (I + normed_q) @ (I - normed_q).float().inverse()
|
||||||
r = r.to(original_weight)
|
r = r.to(weight)
|
||||||
|
inp = org = weight
|
||||||
inp = org = original_weight
|
|
||||||
|
|
||||||
r_b = boft_b//2
|
r_b = boft_b//2
|
||||||
for i in range(boft_m):
|
for i in range(boft_m):
|
||||||
@ -91,14 +91,14 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
if strength != 1:
|
if strength != 1:
|
||||||
bi = bi * strength + (1-strength) * I
|
bi = bi * strength + (1-strength) * I
|
||||||
inp = (
|
inp = (
|
||||||
inp.unflatten(-1, (-1, g, k))
|
inp.unflatten(0, (-1, g, k))
|
||||||
.transpose(-2, -1)
|
.transpose(1, 2)
|
||||||
.flatten(-3)
|
.flatten(0, 2)
|
||||||
.unflatten(-1, (-1, boft_b))
|
.unflatten(0, (-1, boft_b))
|
||||||
)
|
)
|
||||||
inp = torch.einsum("b n m, b n ... -> b m ...", inp, bi)
|
inp = torch.einsum("b i j, b j ...-> b i ...", bi, inp)
|
||||||
inp = (
|
inp = (
|
||||||
inp.flatten(-2).unflatten(-1, (-1, k, g)).transpose(-2, -1).flatten(-3)
|
inp.flatten(0, 1).unflatten(0, (-1, k, g)).transpose(1, 2).flatten(0, 2)
|
||||||
)
|
)
|
||||||
|
|
||||||
if rescale is not None:
|
if rescale is not None:
|
||||||
@ -109,7 +109,7 @@ class BOFTAdapter(WeightAdapterBase):
|
|||||||
if dora_scale is not None:
|
if dora_scale is not None:
|
||||||
weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
|
weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
|
||||||
else:
|
else:
|
||||||
weight += function(((strength * alpha) * lora_diff).type(weight.dtype))
|
weight += function((strength * lora_diff).type(weight.dtype))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("ERROR {} {} {}".format(self.name, key, e))
|
logging.error("ERROR {} {} {}".format(self.name, key, e))
|
||||||
return weight
|
return weight
|
||||||
|
|||||||
@ -32,17 +32,18 @@ class OFTAdapter(WeightAdapterBase):
|
|||||||
blocks = lora[blocks_name]
|
blocks = lora[blocks_name]
|
||||||
if blocks.ndim == 3:
|
if blocks.ndim == 3:
|
||||||
loaded_keys.add(blocks_name)
|
loaded_keys.add(blocks_name)
|
||||||
|
else:
|
||||||
|
blocks = None
|
||||||
|
if blocks is None:
|
||||||
|
return None
|
||||||
|
|
||||||
rescale = None
|
rescale = None
|
||||||
if rescale_name in lora.keys():
|
if rescale_name in lora.keys():
|
||||||
rescale = lora[rescale_name]
|
rescale = lora[rescale_name]
|
||||||
loaded_keys.add(rescale_name)
|
loaded_keys.add(rescale_name)
|
||||||
|
|
||||||
if blocks is not None:
|
weights = (blocks, rescale, alpha, dora_scale)
|
||||||
weights = (blocks, rescale, alpha, dora_scale)
|
return cls(loaded_keys, weights)
|
||||||
return cls(loaded_keys, weights)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def calculate_weight(
|
def calculate_weight(
|
||||||
self,
|
self,
|
||||||
@ -79,16 +80,17 @@ class OFTAdapter(WeightAdapterBase):
|
|||||||
normed_q = q * alpha / q_norm
|
normed_q = q * alpha / q_norm
|
||||||
# use float() to prevent unsupported type in .inverse()
|
# use float() to prevent unsupported type in .inverse()
|
||||||
r = (I + normed_q) @ (I - normed_q).float().inverse()
|
r = (I + normed_q) @ (I - normed_q).float().inverse()
|
||||||
r = r.to(original_weight)
|
r = r.to(weight)
|
||||||
|
_, *shape = weight.shape
|
||||||
lora_diff = torch.einsum(
|
lora_diff = torch.einsum(
|
||||||
"k n m, k n ... -> k m ...",
|
"k n m, k n ... -> k m ...",
|
||||||
(r * strength) - strength * I,
|
(r * strength) - strength * I,
|
||||||
original_weight,
|
weight.view(block_num, block_size, *shape),
|
||||||
)
|
).view(-1, *shape)
|
||||||
if dora_scale is not None:
|
if dora_scale is not None:
|
||||||
weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
|
weight = weight_decompose(dora_scale, weight, lora_diff, alpha, strength, intermediate_dtype, function)
|
||||||
else:
|
else:
|
||||||
weight += function(((strength * alpha) * lora_diff).type(weight.dtype))
|
weight += function((strength * lora_diff).type(weight.dtype))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("ERROR {} {} {}".format(self.name, key, e))
|
logging.error("ERROR {} {} {}".format(self.name, key, e))
|
||||||
return weight
|
return weight
|
||||||
|
|||||||
@ -297,6 +297,10 @@ class SynchronousOperation(Generic[T, R]):
|
|||||||
|
|
||||||
# Convert request model to dict, but use None for EmptyRequest
|
# Convert request model to dict, but use None for EmptyRequest
|
||||||
request_dict = None if isinstance(self.request, EmptyRequest) else self.request.model_dump(exclude_none=True)
|
request_dict = None if isinstance(self.request, EmptyRequest) else self.request.model_dump(exclude_none=True)
|
||||||
|
if request_dict:
|
||||||
|
for key, value in request_dict.items():
|
||||||
|
if isinstance(value, Enum):
|
||||||
|
request_dict[key] = value.value
|
||||||
|
|
||||||
# Debug log for request
|
# Debug log for request
|
||||||
logging.debug(f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}")
|
logging.debug(f"[DEBUG] API Request: {self.endpoint.method.value} {self.endpoint.path}")
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import math
|
||||||
import comfy.samplers
|
import comfy.samplers
|
||||||
import comfy.sample
|
import comfy.sample
|
||||||
from comfy.k_diffusion import sampling as k_diffusion_sampling
|
from comfy.k_diffusion import sampling as k_diffusion_sampling
|
||||||
@ -249,6 +250,55 @@ class SetFirstSigma:
|
|||||||
sigmas[0] = sigma
|
sigmas[0] = sigma
|
||||||
return (sigmas, )
|
return (sigmas, )
|
||||||
|
|
||||||
|
class ExtendIntermediateSigmas:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required":
|
||||||
|
{"sigmas": ("SIGMAS", ),
|
||||||
|
"steps": ("INT", {"default": 2, "min": 1, "max": 100}),
|
||||||
|
"start_at_sigma": ("FLOAT", {"default": -1.0, "min": -1.0, "max": 20000.0, "step": 0.01, "round": False}),
|
||||||
|
"end_at_sigma": ("FLOAT", {"default": 12.0, "min": 0.0, "max": 20000.0, "step": 0.01, "round": False}),
|
||||||
|
"spacing": (['linear', 'cosine', 'sine'],),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RETURN_TYPES = ("SIGMAS",)
|
||||||
|
CATEGORY = "sampling/custom_sampling/sigmas"
|
||||||
|
|
||||||
|
FUNCTION = "extend"
|
||||||
|
|
||||||
|
def extend(self, sigmas: torch.Tensor, steps: int, start_at_sigma: float, end_at_sigma: float, spacing: str):
|
||||||
|
if start_at_sigma < 0:
|
||||||
|
start_at_sigma = float("inf")
|
||||||
|
|
||||||
|
interpolator = {
|
||||||
|
'linear': lambda x: x,
|
||||||
|
'cosine': lambda x: torch.sin(x*math.pi/2),
|
||||||
|
'sine': lambda x: 1 - torch.cos(x*math.pi/2)
|
||||||
|
}[spacing]
|
||||||
|
|
||||||
|
# linear space for our interpolation function
|
||||||
|
x = torch.linspace(0, 1, steps + 1, device=sigmas.device)[1:-1]
|
||||||
|
computed_spacing = interpolator(x)
|
||||||
|
|
||||||
|
extended_sigmas = []
|
||||||
|
for i in range(len(sigmas) - 1):
|
||||||
|
sigma_current = sigmas[i]
|
||||||
|
sigma_next = sigmas[i+1]
|
||||||
|
|
||||||
|
extended_sigmas.append(sigma_current)
|
||||||
|
|
||||||
|
if end_at_sigma <= sigma_current <= start_at_sigma:
|
||||||
|
interpolated_steps = computed_spacing * (sigma_next - sigma_current) + sigma_current
|
||||||
|
extended_sigmas.extend(interpolated_steps.tolist())
|
||||||
|
|
||||||
|
# Add the last sigma value
|
||||||
|
if len(sigmas) > 0:
|
||||||
|
extended_sigmas.append(sigmas[-1])
|
||||||
|
|
||||||
|
extended_sigmas = torch.FloatTensor(extended_sigmas)
|
||||||
|
|
||||||
|
return (extended_sigmas,)
|
||||||
|
|
||||||
class KSamplerSelect:
|
class KSamplerSelect:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
@ -735,6 +785,7 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"SplitSigmasDenoise": SplitSigmasDenoise,
|
"SplitSigmasDenoise": SplitSigmasDenoise,
|
||||||
"FlipSigmas": FlipSigmas,
|
"FlipSigmas": FlipSigmas,
|
||||||
"SetFirstSigma": SetFirstSigma,
|
"SetFirstSigma": SetFirstSigma,
|
||||||
|
"ExtendIntermediateSigmas": ExtendIntermediateSigmas,
|
||||||
|
|
||||||
"CFGGuider": CFGGuider,
|
"CFGGuider": CFGGuider,
|
||||||
"DualCFGGuider": DualCFGGuider,
|
"DualCFGGuider": DualCFGGuider,
|
||||||
|
|||||||
@ -276,7 +276,7 @@ class CLIPSave:
|
|||||||
comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True)
|
comfy.model_management.load_models_gpu([clip.load_model()], force_patch_weights=True)
|
||||||
clip_sd = clip.get_sd()
|
clip_sd = clip.get_sd()
|
||||||
|
|
||||||
for prefix in ["clip_l.", "clip_g.", ""]:
|
for prefix in ["clip_l.", "clip_g.", "clip_h.", "t5xxl.", "pile_t5xl.", "mt5xl.", "umt5xxl.", "t5base.", "gemma2_2b.", "llama.", "hydit_clip.", ""]:
|
||||||
k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys()))
|
k = list(filter(lambda a: a.startswith(prefix), clip_sd.keys()))
|
||||||
current_clip_sd = {}
|
current_clip_sd = {}
|
||||||
for x in k:
|
for x in k:
|
||||||
|
|||||||
@ -20,13 +20,14 @@ def loglinear_interp(t_steps, num_steps):
|
|||||||
|
|
||||||
NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0.8287, 0.5512, 0.2808, 0.001],
|
NOISE_LEVELS = {"FLUX": [0.9968, 0.9886, 0.9819, 0.975, 0.966, 0.9471, 0.9158, 0.8287, 0.5512, 0.2808, 0.001],
|
||||||
"Wan":[1.0, 0.997, 0.995, 0.993, 0.991, 0.989, 0.987, 0.985, 0.98, 0.975, 0.973, 0.968, 0.96, 0.946, 0.927, 0.902, 0.864, 0.776, 0.539, 0.208, 0.001],
|
"Wan":[1.0, 0.997, 0.995, 0.993, 0.991, 0.989, 0.987, 0.985, 0.98, 0.975, 0.973, 0.968, 0.96, 0.946, 0.927, 0.902, 0.864, 0.776, 0.539, 0.208, 0.001],
|
||||||
|
"Chroma": [0.992, 0.99, 0.988, 0.985, 0.982, 0.978, 0.973, 0.968, 0.961, 0.953, 0.943, 0.931, 0.917, 0.9, 0.881, 0.858, 0.832, 0.802, 0.769, 0.731, 0.69, 0.646, 0.599, 0.55, 0.501, 0.451, 0.402, 0.355, 0.311, 0.27, 0.232, 0.199, 0.169, 0.143, 0.12, 0.101, 0.084, 0.07, 0.058, 0.048, 0.001],
|
||||||
}
|
}
|
||||||
|
|
||||||
class OptimalStepsScheduler:
|
class OptimalStepsScheduler:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
return {"required":
|
return {"required":
|
||||||
{"model_type": (["FLUX", "Wan"], ),
|
{"model_type": (["FLUX", "Wan", "Chroma"], ),
|
||||||
"steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
|
"steps": ("INT", {"default": 20, "min": 3, "max": 1000}),
|
||||||
"denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
"denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -141,6 +141,7 @@ class Quantize:
|
|||||||
|
|
||||||
CATEGORY = "image/postprocessing"
|
CATEGORY = "image/postprocessing"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
def bayer(im, pal_im, order):
|
def bayer(im, pal_im, order):
|
||||||
def normalized_bayer_matrix(n):
|
def normalized_bayer_matrix(n):
|
||||||
if n == 0:
|
if n == 0:
|
||||||
|
|||||||
43
comfy_extras/nodes_preview_any.py
Normal file
43
comfy_extras/nodes_preview_any.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import json
|
||||||
|
from comfy.comfy_types.node_typing import IO
|
||||||
|
|
||||||
|
# Preview Any - original implement from
|
||||||
|
# https://github.com/rgthree/rgthree-comfy/blob/main/py/display_any.py
|
||||||
|
# upstream requested in https://github.com/Kosinkadink/rfcs/blob/main/rfcs/0000-corenodes.md#preview-nodes
|
||||||
|
class PreviewAny():
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {"source": (IO.ANY, {})},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ()
|
||||||
|
FUNCTION = "main"
|
||||||
|
OUTPUT_NODE = True
|
||||||
|
|
||||||
|
CATEGORY = "utils"
|
||||||
|
|
||||||
|
def main(self, source=None):
|
||||||
|
value = 'None'
|
||||||
|
if isinstance(source, str):
|
||||||
|
value = source
|
||||||
|
elif isinstance(source, (int, float, bool)):
|
||||||
|
value = str(source)
|
||||||
|
elif source is not None:
|
||||||
|
try:
|
||||||
|
value = json.dumps(source)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
value = str(source)
|
||||||
|
except Exception:
|
||||||
|
value = 'source exists, but could not be serialized.'
|
||||||
|
|
||||||
|
return {"ui": {"text": (value,)}}
|
||||||
|
|
||||||
|
NODE_CLASS_MAPPINGS = {
|
||||||
|
"PreviewAny": PreviewAny,
|
||||||
|
}
|
||||||
|
|
||||||
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
|
"PreviewAny": "Preview Any",
|
||||||
|
}
|
||||||
@ -20,7 +20,7 @@ class WebcamCapture(nodes.LoadImage):
|
|||||||
|
|
||||||
CATEGORY = "image"
|
CATEGORY = "image"
|
||||||
|
|
||||||
def load_capture(s, image, **kwargs):
|
def load_capture(self, image, **kwargs):
|
||||||
return super().load_image(folder_paths.get_annotated_filepath(image))
|
return super().load_image(folder_paths.get_annotated_filepath(image))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
# This file is automatically generated by the build process when version is
|
# This file is automatically generated by the build process when version is
|
||||||
# updated in pyproject.toml.
|
# updated in pyproject.toml.
|
||||||
__version__ = "0.3.30"
|
__version__ = "0.3.31"
|
||||||
|
|||||||
2
main.py
2
main.py
@ -16,7 +16,7 @@ if not args.disable_manager:
|
|||||||
import comfyui_manager
|
import comfyui_manager
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#NOTE: These do not do anything on core ComfyUI which should already have no communication with the internet, they are for custom nodes.
|
#NOTE: These do not do anything on core ComfyUI, they are for custom nodes.
|
||||||
os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
|
os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
|
||||||
os.environ['DO_NOT_TRACK'] = '1'
|
os.environ['DO_NOT_TRACK'] = '1'
|
||||||
|
|
||||||
|
|||||||
3
nodes.py
3
nodes.py
@ -920,7 +920,7 @@ class CLIPLoader:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(s):
|
def INPUT_TYPES(s):
|
||||||
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
|
return {"required": { "clip_name": (folder_paths.get_filename_list("text_encoders"), ),
|
||||||
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream"], ),
|
"type": (["stable_diffusion", "stable_cascade", "sd3", "stable_audio", "mochi", "ltxv", "pixart", "cosmos", "lumina2", "wan", "hidream", "chroma"], ),
|
||||||
},
|
},
|
||||||
"optional": {
|
"optional": {
|
||||||
"device": (["default", "cpu"], {"advanced": True}),
|
"device": (["default", "cpu"], {"advanced": True}),
|
||||||
@ -2267,6 +2267,7 @@ def init_builtin_extra_nodes():
|
|||||||
"nodes_optimalsteps.py",
|
"nodes_optimalsteps.py",
|
||||||
"nodes_hidream.py",
|
"nodes_hidream.py",
|
||||||
"nodes_fresca.py",
|
"nodes_fresca.py",
|
||||||
|
"nodes_preview_any.py",
|
||||||
]
|
]
|
||||||
|
|
||||||
api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes")
|
api_nodes_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_api_nodes")
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "ComfyUI"
|
name = "ComfyUI"
|
||||||
version = "0.3.30"
|
version = "0.3.31"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { file = "LICENSE" }
|
license = { file = "LICENSE" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
@ -12,6 +12,7 @@ documentation = "https://docs.comfy.org/"
|
|||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
lint.select = [
|
lint.select = [
|
||||||
|
"N805", # invalid-first-argument-name-for-method
|
||||||
"S307", # suspicious-eval-usage
|
"S307", # suspicious-eval-usage
|
||||||
"S102", # exec
|
"S102", # exec
|
||||||
"T", # print-usage
|
"T", # print-usage
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
comfyui-frontend-package==1.17.11
|
comfyui-frontend-package==1.18.6
|
||||||
comfyui-workflow-templates==0.1.3
|
comfyui-workflow-templates==0.1.3
|
||||||
comfyui_manager
|
comfyui_manager
|
||||||
torch
|
torch
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user