Merge branch 'comfyanonymous:master' into master

This commit is contained in:
Jake D 2023-04-25 15:47:37 -04:00 committed by GitHub
commit 397477e877
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 1293 additions and 183 deletions

View File

@ -0,0 +1,30 @@
name: "Windows Release cu118 dependencies 2"
on:
workflow_dispatch:
# push:
# branches:
# - master
jobs:
build_dependencies:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10.9'
- shell: bash
run: |
python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers==0.0.19.dev516 --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
python -m pip install --no-cache-dir ./temp_wheel_dir/*
echo installed basic
ls -lah temp_wheel_dir
mv temp_wheel_dir cu118_python_deps
tar cf cu118_python_deps.tar cu118_python_deps
- uses: actions/cache/save@v3
with:
path: cu118_python_deps.tar
key: ${{ runner.os }}-build-cu118

View File

@ -17,6 +17,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
- Embeddings/Textual inversion
- [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
- [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/)
- Loading full workflows (with seeds) from generated PNG files.
- Saving/Loading workflows as Json files.
- Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones.
@ -25,6 +26,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
- [ControlNet and T2I-Adapter](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/)
- [Upscale Models (ESRGAN, ESRGAN variants, SwinIR, Swin2SR, etc...)](https://comfyanonymous.github.io/ComfyUI_examples/upscale_models/)
- [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/)
- [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/)
- Starts up very fast.
- Works fully offline: will never download anything.
- [Config file](extra_model_paths.yaml.example) to set the search paths for models.
@ -83,7 +85,7 @@ Put your VAE in: models/vae
At the time of writing this pytorch has issues with python versions higher than 3.10 so make sure your python/pip versions are 3.10.
### AMD (Linux only)
### AMD GPUs (Linux only)
AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:
```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.4.2```

343
comfy/gligen.py Normal file
View File

@ -0,0 +1,343 @@
import torch
from torch import nn, einsum
from ldm.modules.attention import CrossAttention
from inspect import isfunction
def exists(val):
return val is not None
def uniq(arr):
return{el: True for el in arr}.keys()
def default(val, d):
if exists(val):
return val
return d() if isfunction(d) else d
# feedforward
class GEGLU(nn.Module):
def __init__(self, dim_in, dim_out):
super().__init__()
self.proj = nn.Linear(dim_in, dim_out * 2)
def forward(self, x):
x, gate = self.proj(x).chunk(2, dim=-1)
return x * torch.nn.functional.gelu(gate)
class FeedForward(nn.Module):
def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
super().__init__()
inner_dim = int(dim * mult)
dim_out = default(dim_out, dim)
project_in = nn.Sequential(
nn.Linear(dim, inner_dim),
nn.GELU()
) if not glu else GEGLU(dim, inner_dim)
self.net = nn.Sequential(
project_in,
nn.Dropout(dropout),
nn.Linear(inner_dim, dim_out)
)
def forward(self, x):
return self.net(x)
class GatedCrossAttentionDense(nn.Module):
def __init__(self, query_dim, context_dim, n_heads, d_head):
super().__init__()
self.attn = CrossAttention(
query_dim=query_dim,
context_dim=context_dim,
heads=n_heads,
dim_head=d_head)
self.ff = FeedForward(query_dim, glu=True)
self.norm1 = nn.LayerNorm(query_dim)
self.norm2 = nn.LayerNorm(query_dim)
self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
# this can be useful: we can externally change magnitude of tanh(alpha)
# for example, when it is set to 0, then the entire model is same as
# original one
self.scale = 1
def forward(self, x, objs):
x = x + self.scale * \
torch.tanh(self.alpha_attn) * self.attn(self.norm1(x), objs, objs)
x = x + self.scale * \
torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
return x
class GatedSelfAttentionDense(nn.Module):
def __init__(self, query_dim, context_dim, n_heads, d_head):
super().__init__()
# we need a linear projection since we need cat visual feature and obj
# feature
self.linear = nn.Linear(context_dim, query_dim)
self.attn = CrossAttention(
query_dim=query_dim,
context_dim=query_dim,
heads=n_heads,
dim_head=d_head)
self.ff = FeedForward(query_dim, glu=True)
self.norm1 = nn.LayerNorm(query_dim)
self.norm2 = nn.LayerNorm(query_dim)
self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
# this can be useful: we can externally change magnitude of tanh(alpha)
# for example, when it is set to 0, then the entire model is same as
# original one
self.scale = 1
def forward(self, x, objs):
N_visual = x.shape[1]
objs = self.linear(objs)
x = x + self.scale * torch.tanh(self.alpha_attn) * self.attn(
self.norm1(torch.cat([x, objs], dim=1)))[:, 0:N_visual, :]
x = x + self.scale * \
torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
return x
class GatedSelfAttentionDense2(nn.Module):
def __init__(self, query_dim, context_dim, n_heads, d_head):
super().__init__()
# we need a linear projection since we need cat visual feature and obj
# feature
self.linear = nn.Linear(context_dim, query_dim)
self.attn = CrossAttention(
query_dim=query_dim, context_dim=query_dim, dim_head=d_head)
self.ff = FeedForward(query_dim, glu=True)
self.norm1 = nn.LayerNorm(query_dim)
self.norm2 = nn.LayerNorm(query_dim)
self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
# this can be useful: we can externally change magnitude of tanh(alpha)
# for example, when it is set to 0, then the entire model is same as
# original one
self.scale = 1
def forward(self, x, objs):
B, N_visual, _ = x.shape
B, N_ground, _ = objs.shape
objs = self.linear(objs)
# sanity check
size_v = math.sqrt(N_visual)
size_g = math.sqrt(N_ground)
assert int(size_v) == size_v, "Visual tokens must be square rootable"
assert int(size_g) == size_g, "Grounding tokens must be square rootable"
size_v = int(size_v)
size_g = int(size_g)
# select grounding token and resize it to visual token size as residual
out = self.attn(self.norm1(torch.cat([x, objs], dim=1)))[
:, N_visual:, :]
out = out.permute(0, 2, 1).reshape(B, -1, size_g, size_g)
out = torch.nn.functional.interpolate(
out, (size_v, size_v), mode='bicubic')
residual = out.reshape(B, -1, N_visual).permute(0, 2, 1)
# add residual to visual feature
x = x + self.scale * torch.tanh(self.alpha_attn) * residual
x = x + self.scale * \
torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
return x
class FourierEmbedder():
def __init__(self, num_freqs=64, temperature=100):
self.num_freqs = num_freqs
self.temperature = temperature
self.freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs)
@torch.no_grad()
def __call__(self, x, cat_dim=-1):
"x: arbitrary shape of tensor. dim: cat dim"
out = []
for freq in self.freq_bands:
out.append(torch.sin(freq * x))
out.append(torch.cos(freq * x))
return torch.cat(out, cat_dim)
class PositionNet(nn.Module):
def __init__(self, in_dim, out_dim, fourier_freqs=8):
super().__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs)
self.position_dim = fourier_freqs * 2 * 4 # 2 is sin&cos, 4 is xyxy
self.linears = nn.Sequential(
nn.Linear(self.in_dim + self.position_dim, 512),
nn.SiLU(),
nn.Linear(512, 512),
nn.SiLU(),
nn.Linear(512, out_dim),
)
self.null_positive_feature = torch.nn.Parameter(
torch.zeros([self.in_dim]))
self.null_position_feature = torch.nn.Parameter(
torch.zeros([self.position_dim]))
def forward(self, boxes, masks, positive_embeddings):
B, N, _ = boxes.shape
masks = masks.unsqueeze(-1)
# embedding position (it may includes padding as placeholder)
xyxy_embedding = self.fourier_embedder(boxes) # B*N*4 --> B*N*C
# learnable null embedding
positive_null = self.null_positive_feature.view(1, 1, -1)
xyxy_null = self.null_position_feature.view(1, 1, -1)
# replace padding with learnable null embedding
positive_embeddings = positive_embeddings * \
masks + (1 - masks) * positive_null
xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null
objs = self.linears(
torch.cat([positive_embeddings, xyxy_embedding], dim=-1))
assert objs.shape == torch.Size([B, N, self.out_dim])
return objs
class Gligen(nn.Module):
def __init__(self, modules, position_net, key_dim):
super().__init__()
self.module_list = nn.ModuleList(modules)
self.position_net = position_net
self.key_dim = key_dim
self.max_objs = 30
def _set_position(self, boxes, masks, positive_embeddings):
objs = self.position_net(boxes, masks, positive_embeddings)
def func(key, x):
module = self.module_list[key]
return module(x, objs)
return func
def set_position(self, latent_image_shape, position_params, device):
batch, c, h, w = latent_image_shape
masks = torch.zeros([self.max_objs], device="cpu")
boxes = []
positive_embeddings = []
for p in position_params:
x1 = (p[4]) / w
y1 = (p[3]) / h
x2 = (p[4] + p[2]) / w
y2 = (p[3] + p[1]) / h
masks[len(boxes)] = 1.0
boxes += [torch.tensor((x1, y1, x2, y2)).unsqueeze(0)]
positive_embeddings += [p[0]]
append_boxes = []
append_conds = []
if len(boxes) < self.max_objs:
append_boxes = [torch.zeros(
[self.max_objs - len(boxes), 4], device="cpu")]
append_conds = [torch.zeros(
[self.max_objs - len(boxes), self.key_dim], device="cpu")]
box_out = torch.cat(
boxes + append_boxes).unsqueeze(0).repeat(batch, 1, 1)
masks = masks.unsqueeze(0).repeat(batch, 1)
conds = torch.cat(positive_embeddings +
append_conds).unsqueeze(0).repeat(batch, 1, 1)
return self._set_position(
box_out.to(device),
masks.to(device),
conds.to(device))
def set_empty(self, latent_image_shape, device):
batch, c, h, w = latent_image_shape
masks = torch.zeros([self.max_objs], device="cpu").repeat(batch, 1)
box_out = torch.zeros([self.max_objs, 4],
device="cpu").repeat(batch, 1, 1)
conds = torch.zeros([self.max_objs, self.key_dim],
device="cpu").repeat(batch, 1, 1)
return self._set_position(
box_out.to(device),
masks.to(device),
conds.to(device))
def cleanup(self):
pass
def get_models(self):
return [self]
def load_gligen(sd):
sd_k = sd.keys()
output_list = []
key_dim = 768
for a in ["input_blocks", "middle_block", "output_blocks"]:
for b in range(20):
k_temp = filter(lambda k: "{}.{}.".format(a, b)
in k and ".fuser." in k, sd_k)
k_temp = map(lambda k: (k, k.split(".fuser.")[-1]), k_temp)
n_sd = {}
for k in k_temp:
n_sd[k[1]] = sd[k[0]]
if len(n_sd) > 0:
query_dim = n_sd["linear.weight"].shape[0]
key_dim = n_sd["linear.weight"].shape[1]
if key_dim == 768: # SD1.x
n_heads = 8
d_head = query_dim // n_heads
else:
d_head = 64
n_heads = query_dim // d_head
gated = GatedSelfAttentionDense(
query_dim, key_dim, n_heads, d_head)
gated.load_state_dict(n_sd, strict=False)
output_list.append(gated)
if "position_net.null_positive_feature" in sd_k:
in_dim = sd["position_net.null_positive_feature"].shape[0]
out_dim = sd["position_net.linears.4.weight"].shape[0]
class WeightsLoader(torch.nn.Module):
pass
w = WeightsLoader()
w.position_net = PositionNet(in_dim, out_dim)
w.load_state_dict(sd, strict=False)
gligen = Gligen(output_list, w.position_net, key_dim)
return gligen

View File

@ -163,13 +163,17 @@ class CrossAttentionBirchSan(nn.Module):
nn.Dropout(dropout)
)
def forward(self, x, context=None, mask=None):
def forward(self, x, context=None, value=None, mask=None):
h = self.heads
query = self.to_q(x)
context = default(context, x)
key = self.to_k(context)
value = self.to_v(context)
if value is not None:
value = self.to_v(value)
else:
value = self.to_v(context)
del context, x
query = query.unflatten(-1, (self.heads, -1)).transpose(1,2).flatten(end_dim=1)
@ -256,13 +260,17 @@ class CrossAttentionDoggettx(nn.Module):
nn.Dropout(dropout)
)
def forward(self, x, context=None, mask=None):
def forward(self, x, context=None, value=None, mask=None):
h = self.heads
q_in = self.to_q(x)
context = default(context, x)
k_in = self.to_k(context)
v_in = self.to_v(context)
if value is not None:
v_in = self.to_v(value)
del value
else:
v_in = self.to_v(context)
del context, x
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
@ -350,13 +358,17 @@ class CrossAttention(nn.Module):
nn.Dropout(dropout)
)
def forward(self, x, context=None, mask=None):
def forward(self, x, context=None, value=None, mask=None):
h = self.heads
q = self.to_q(x)
context = default(context, x)
k = self.to_k(context)
v = self.to_v(context)
if value is not None:
v = self.to_v(value)
del value
else:
v = self.to_v(context)
q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
@ -402,11 +414,15 @@ class MemoryEfficientCrossAttention(nn.Module):
self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout))
self.attention_op: Optional[Any] = None
def forward(self, x, context=None, mask=None):
def forward(self, x, context=None, value=None, mask=None):
q = self.to_q(x)
context = default(context, x)
k = self.to_k(context)
v = self.to_v(context)
if value is not None:
v = self.to_v(value)
del value
else:
v = self.to_v(context)
b, _, _ = q.shape
q, k, v = map(
@ -447,19 +463,19 @@ class CrossAttentionPytorch(nn.Module):
self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout))
self.attention_op: Optional[Any] = None
def forward(self, x, context=None, mask=None):
def forward(self, x, context=None, value=None, mask=None):
q = self.to_q(x)
context = default(context, x)
k = self.to_k(context)
v = self.to_v(context)
if value is not None:
v = self.to_v(value)
del value
else:
v = self.to_v(context)
b, _, _ = q.shape
q, k, v = map(
lambda t: t.unsqueeze(3)
.reshape(b, t.shape[1], self.heads, self.dim_head)
.permute(0, 2, 1, 3)
.reshape(b * self.heads, t.shape[1], self.dim_head)
.contiguous(),
lambda t: t.view(b, -1, self.heads, self.dim_head).transpose(1, 2),
(q, k, v),
)
@ -468,10 +484,7 @@ class CrossAttentionPytorch(nn.Module):
if exists(mask):
raise NotImplementedError
out = (
out.unsqueeze(0)
.reshape(b, self.heads, out.shape[1], self.dim_head)
.permute(0, 2, 1, 3)
.reshape(b, out.shape[1], self.heads * self.dim_head)
out.transpose(1, 2).reshape(b, -1, self.heads * self.dim_head)
)
return self.to_out(out)
@ -510,19 +523,58 @@ class BasicTransformerBlock(nn.Module):
return checkpoint(self._forward, (x, context, transformer_options), self.parameters(), self.checkpoint)
def _forward(self, x, context=None, transformer_options={}):
current_index = None
if "current_index" in transformer_options:
current_index = transformer_options["current_index"]
if "patches" in transformer_options:
transformer_patches = transformer_options["patches"]
else:
transformer_patches = {}
n = self.norm1(x)
if self.disable_self_attn:
context_attn1 = context
else:
context_attn1 = None
value_attn1 = None
if "attn1_patch" in transformer_patches:
patch = transformer_patches["attn1_patch"]
if context_attn1 is None:
context_attn1 = n
value_attn1 = context_attn1
for p in patch:
n, context_attn1, value_attn1 = p(current_index, n, context_attn1, value_attn1)
if "tomesd" in transformer_options:
m, u = tomesd.get_functions(x, transformer_options["tomesd"]["ratio"], transformer_options["original_shape"])
n = u(self.attn1(m(n), context=context if self.disable_self_attn else None))
n = u(self.attn1(m(n), context=context_attn1, value=value_attn1))
else:
n = self.attn1(n, context=context if self.disable_self_attn else None)
n = self.attn1(n, context=context_attn1, value=value_attn1)
x += n
if "middle_patch" in transformer_patches:
patch = transformer_patches["middle_patch"]
for p in patch:
x = p(current_index, x)
n = self.norm2(x)
n = self.attn2(n, context=context)
context_attn2 = context
value_attn2 = None
if "attn2_patch" in transformer_patches:
patch = transformer_patches["attn2_patch"]
value_attn2 = context_attn2
for p in patch:
n, context_attn2, value_attn2 = p(current_index, n, context_attn2, value_attn2)
n = self.attn2(n, context=context_attn2, value=value_attn2)
x += n
x = self.ff(self.norm3(x)) + x
if current_index is not None:
transformer_options["current_index"] += 1
return x

View File

@ -782,6 +782,8 @@ class UNetModel(nn.Module):
:return: an [N x C x ...] Tensor of outputs.
"""
transformer_options["original_shape"] = list(x.shape)
transformer_options["current_index"] = 0
assert (y is not None) == (
self.num_classes is not None
), "must specify y if and only if the model is class-conditional"

View File

@ -133,6 +133,7 @@ def unload_model():
#never unload models from GPU on high vram
if vram_state != VRAMState.HIGH_VRAM:
current_loaded_model.model.cpu()
current_loaded_model.model_patches_to("cpu")
current_loaded_model.unpatch_model()
current_loaded_model = None
@ -156,6 +157,8 @@ def load_model_gpu(model):
except Exception as e:
model.unpatch_model()
raise e
model.model_patches_to(get_torch_device())
current_loaded_model = model
if vram_state == VRAMState.CPU:
pass
@ -176,7 +179,7 @@ def load_model_gpu(model):
model_accelerated = True
return current_loaded_model
def load_controlnet_gpu(models):
def load_controlnet_gpu(control_models):
global current_gpu_controlnets
global vram_state
if vram_state == VRAMState.CPU:
@ -186,6 +189,10 @@ def load_controlnet_gpu(models):
#don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
return
models = []
for m in control_models:
models += m.get_models()
for m in current_gpu_controlnets:
if m not in models:
m.cpu()

83
comfy/sample.py Normal file
View File

@ -0,0 +1,83 @@
import torch
import comfy.model_management
import comfy.samplers
import math
def prepare_noise(latent_image, seed, skip=0):
"""
creates random noise given a latent image and a seed.
optional arg skip can be used to skip and discard x number of noise generations for a given seed
"""
generator = torch.manual_seed(seed)
for _ in range(skip):
noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
return noise
def prepare_mask(noise_mask, shape, device):
"""ensures noise mask is of proper dimensions"""
noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear")
noise_mask = noise_mask.round()
noise_mask = torch.cat([noise_mask] * shape[1], dim=1)
if noise_mask.shape[0] < shape[0]:
noise_mask = noise_mask.repeat(math.ceil(shape[0] / noise_mask.shape[0]), 1, 1, 1)[:shape[0]]
noise_mask = noise_mask.to(device)
return noise_mask
def broadcast_cond(cond, batch, device):
"""broadcasts conditioning to the batch size"""
copy = []
for p in cond:
t = p[0]
if t.shape[0] < batch:
t = torch.cat([t] * batch)
t = t.to(device)
copy += [[t] + p[1:]]
return copy
def get_models_from_cond(cond, model_type):
models = []
for c in cond:
if model_type in c[1]:
models += [c[1][model_type]]
return models
def load_additional_models(positive, negative):
"""loads additional models in positive and negative conditioning"""
control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control")
gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen")
gligen = [x[1] for x in gligen]
models = control_nets + gligen
comfy.model_management.load_controlnet_gpu(models)
return models
def cleanup_additional_models(models):
"""cleanup additional models that were loaded"""
for m in models:
m.cleanup()
def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None):
device = comfy.model_management.get_torch_device()
if noise_mask is not None:
noise_mask = prepare_mask(noise_mask, noise.shape, device)
real_model = None
comfy.model_management.load_model_gpu(model)
real_model = model.model
noise = noise.to(device)
latent_image = latent_image.to(device)
positive_copy = broadcast_cond(positive, noise.shape[0], device)
negative_copy = broadcast_cond(negative, noise.shape[0], device)
models = load_additional_models(positive, negative)
sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas)
samples = samples.cpu()
cleanup_additional_models(models)
return samples

View File

@ -7,23 +7,6 @@ from comfy import model_management
from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
class CFGDenoiser(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.inner_model = model
def forward(self, x, sigma, uncond, cond, cond_scale):
if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead
x_in = torch.cat([x] * 2)
sigma_in = torch.cat([sigma] * 2)
cond_in = torch.cat([uncond, cond])
uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
else:
cond = self.inner_model(x, sigma, cond=cond)
uncond = self.inner_model(x, sigma, cond=uncond)
return uncond + (cond - uncond) * cond_scale
#The main sampling function shared by all the samplers
#Returns predicted noise
def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}):
@ -36,8 +19,8 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
strength = cond[1]['strength']
adm_cond = None
if 'adm' in cond[1]:
adm_cond = cond[1]['adm']
if 'adm_encoded' in cond[1]:
adm_cond = cond[1]['adm_encoded']
input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
mult = torch.ones_like(input_x) * strength
@ -70,7 +53,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
control = None
if 'control' in cond[1]:
control = cond[1]['control']
return (input_x, mult, conditionning, area, control)
patches = None
if 'gligen' in cond[1]:
gligen = cond[1]['gligen']
patches = {}
gligen_type = gligen[0]
gligen_model = gligen[1]
if gligen_type == "position":
gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device)
else:
gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device)
patches['middle_patch'] = [gligen_patch]
return (input_x, mult, conditionning, area, control, patches)
def cond_equal_size(c1, c2):
if c1 is c2:
@ -91,12 +88,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
def can_concat_cond(c1, c2):
if c1[0].shape != c2[0].shape:
return False
#control
if (c1[4] is None) != (c2[4] is None):
return False
if c1[4] is not None:
if c1[4] is not c2[4]:
return False
#patches
if (c1[5] is None) != (c2[5] is None):
return False
if (c1[5] is not None):
if c1[5] is not c2[5]:
return False
return cond_equal_size(c1[2], c2[2])
def cond_cat(c_list):
@ -166,6 +172,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
cond_or_uncond = []
area = []
control = None
patches = None
for x in to_batch:
o = to_run.pop(x)
p = o[0]
@ -175,6 +182,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
area += [p[3]]
cond_or_uncond += [o[1]]
control = p[4]
patches = p[5]
batch_chunks = len(cond_or_uncond)
input_x = torch.cat(input_x)
@ -184,8 +192,22 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
if control is not None:
c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond))
transformer_options = {}
if 'transformer_options' in model_options:
c['transformer_options'] = model_options['transformer_options']
transformer_options = model_options['transformer_options'].copy()
if patches is not None:
if "patches" in transformer_options:
cur_patches = transformer_options["patches"].copy()
for p in patches:
if p in cur_patches:
cur_patches[p] = cur_patches[p] + patches[p]
else:
cur_patches[p] = patches[p]
else:
transformer_options["patches"] = patches
c['transformer_options'] = transformer_options
output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
del input_x
@ -211,7 +233,10 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
max_total_area = model_management.maximum_batch_area()
cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
return uncond + (cond - uncond) * cond_scale
if "sampler_cfg_function" in model_options:
return model_options["sampler_cfg_function"](cond, uncond, cond_scale)
else:
return uncond + (cond - uncond) * cond_scale
class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
@ -306,8 +331,7 @@ def create_cond_with_same_area_if_none(conds, c):
n = c[1].copy()
conds += [[smallest[0], n]]
def apply_control_net_to_equal_area(conds, uncond):
def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
cond_cnets = []
cond_other = []
uncond_cnets = []
@ -315,15 +339,15 @@ def apply_control_net_to_equal_area(conds, uncond):
for t in range(len(conds)):
x = conds[t]
if 'area' not in x[1]:
if 'control' in x[1] and x[1]['control'] is not None:
cond_cnets.append(x[1]['control'])
if name in x[1] and x[1][name] is not None:
cond_cnets.append(x[1][name])
else:
cond_other.append((x, t))
for t in range(len(uncond)):
x = uncond[t]
if 'area' not in x[1]:
if 'control' in x[1] and x[1]['control'] is not None:
uncond_cnets.append(x[1]['control'])
if name in x[1] and x[1][name] is not None:
uncond_cnets.append(x[1][name])
else:
uncond_other.append((x, t))
@ -333,15 +357,16 @@ def apply_control_net_to_equal_area(conds, uncond):
for x in range(len(cond_cnets)):
temp = uncond_other[x % len(uncond_other)]
o = temp[0]
if 'control' in o[1] and o[1]['control'] is not None:
if name in o[1] and o[1][name] is not None:
n = o[1].copy()
n['control'] = cond_cnets[x]
n[name] = uncond_fill_func(cond_cnets, x)
uncond += [[o[0], n]]
else:
n = o[1].copy()
n['control'] = cond_cnets[x]
n[name] = uncond_fill_func(cond_cnets, x)
uncond[temp[1]] = [o[0], n]
def encode_adm(noise_augmentor, conds, batch_size, device):
for t in range(len(conds)):
x = conds[t]
@ -371,10 +396,11 @@ def encode_adm(noise_augmentor, conds, batch_size, device):
else:
adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
x[1] = x[1].copy()
x[1]["adm"] = torch.cat([adm_out] * batch_size)
x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size)
return conds
class KSampler:
SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"]
SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
@ -403,7 +429,7 @@ class KSampler:
self.denoise = denoise
self.model_options = model_options
def _calculate_sigmas(self, steps):
def calculate_sigmas(self, steps):
sigmas = None
discard_penultimate_sigma = False
@ -412,13 +438,13 @@ class KSampler:
discard_penultimate_sigma = True
if self.scheduler == "karras":
sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device)
sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
elif self.scheduler == "normal":
sigmas = self.model_wrap.get_sigmas(steps).to(self.device)
sigmas = self.model_wrap.get_sigmas(steps)
elif self.scheduler == "simple":
sigmas = simple_scheduler(self.model_wrap, steps).to(self.device)
sigmas = simple_scheduler(self.model_wrap, steps)
elif self.scheduler == "ddim_uniform":
sigmas = ddim_scheduler(self.model_wrap, steps).to(self.device)
sigmas = ddim_scheduler(self.model_wrap, steps)
else:
print("error invalid scheduler", self.scheduler)
@ -429,15 +455,16 @@ class KSampler:
def set_steps(self, steps, denoise=None):
self.steps = steps
if denoise is None or denoise > 0.9999:
self.sigmas = self._calculate_sigmas(steps)
self.sigmas = self.calculate_sigmas(steps).to(self.device)
else:
new_steps = int(steps/denoise)
sigmas = self._calculate_sigmas(new_steps)
sigmas = self.calculate_sigmas(new_steps).to(self.device)
self.sigmas = sigmas[-(steps + 1):]
def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None):
sigmas = self.sigmas
def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None):
if sigmas is None:
sigmas = self.sigmas
sigma_min = self.sigma_min
if last_step is not None and last_step < (len(sigmas) - 1):
@ -463,7 +490,8 @@ class KSampler:
for c in negative:
create_cond_with_same_area_if_none(positive, c)
apply_control_net_to_equal_area(positive, negative)
apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x])
apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])
if self.model.model.diffusion_model.dtype == torch.float16:
precision_scope = torch.autocast

View File

@ -13,6 +13,7 @@ from .t2i_adapter import adapter
from . import utils
from . import clip_vision
from . import gligen
def load_model_weights(model, sd, verbose=False, load_state_dict_to=[]):
m, u = model.load_state_dict(sd, strict=False)
@ -250,6 +251,32 @@ class ModelPatcher:
def set_model_tomesd(self, ratio):
self.model_options["transformer_options"]["tomesd"] = {"ratio": ratio}
def set_model_sampler_cfg_function(self, sampler_cfg_function):
self.model_options["sampler_cfg_function"] = sampler_cfg_function
def set_model_patch(self, patch, name):
to = self.model_options["transformer_options"]
if "patches" not in to:
to["patches"] = {}
to["patches"][name] = to["patches"].get(name, []) + [patch]
def set_model_attn1_patch(self, patch):
self.set_model_patch(patch, "attn1_patch")
def set_model_attn2_patch(self, patch):
self.set_model_patch(patch, "attn2_patch")
def model_patches_to(self, device):
to = self.model_options["transformer_options"]
if "patches" in to:
patches = to["patches"]
for name in patches:
patch_list = patches[name]
for i in range(len(patch_list)):
if hasattr(patch_list[i], "to"):
patch_list[i] = patch_list[i].to(device)
def model_dtype(self):
return self.model.diffusion_model.dtype
@ -375,7 +402,7 @@ class CLIP:
def tokenize(self, text, return_word_ids=False):
return self.tokenizer.tokenize_with_weights(text, return_word_ids)
def encode_from_tokens(self, tokens):
def encode_from_tokens(self, tokens, return_pooled=False):
if self.layer_idx is not None:
self.cond_stage_model.clip_layer(self.layer_idx)
try:
@ -385,6 +412,10 @@ class CLIP:
except Exception as e:
self.patcher.unpatch_model()
raise e
if return_pooled:
eos_token_index = max(range(len(tokens[0])), key=tokens[0].__getitem__)
pooled = cond[:, eos_token_index]
return cond, pooled
return cond
def encode(self, text):
@ -561,10 +592,10 @@ class ControlNet:
c.strength = self.strength
return c
def get_control_models(self):
def get_models(self):
out = []
if self.previous_controlnet is not None:
out += self.previous_controlnet.get_control_models()
out += self.previous_controlnet.get_models()
out.append(self.control_model)
return out
@ -734,10 +765,10 @@ class T2IAdapter:
del self.cond_hint
self.cond_hint = None
def get_control_models(self):
def get_models(self):
out = []
if self.previous_controlnet is not None:
out += self.previous_controlnet.get_control_models()
out += self.previous_controlnet.get_models()
return out
def load_t2i_adapter(t2i_data):
@ -784,6 +815,13 @@ def load_clip(ckpt_path, embedding_directory=None):
clip.load_from_state_dict(clip_data)
return clip
def load_gligen(ckpt_path):
data = utils.load_torch_file(ckpt_path)
model = gligen.load_gligen(data)
if model_management.should_use_fp16():
model = model.half()
return model
def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=None):
with open(config_path, 'r') as stream:
config = yaml.safe_load(stream)

View File

@ -1,11 +1,14 @@
import torch
def load_torch_file(ckpt):
def load_torch_file(ckpt, safe_load=False):
if ckpt.lower().endswith(".safetensors"):
import safetensors.torch
sd = safetensors.torch.load_file(ckpt, device="cpu")
else:
pl_sd = torch.load(ckpt, map_location="cpu")
if safe_load:
pl_sd = torch.load(ckpt, map_location="cpu", weights_only=True)
else:
pl_sd = torch.load(ckpt, map_location="cpu")
if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}")
if "state_dict" in pl_sd:

View File

@ -4,7 +4,10 @@
from __future__ import annotations
from collections import OrderedDict
from typing import Literal
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal
import torch
import torch.nn as nn

View File

@ -0,0 +1,109 @@
import comfy.utils
import folder_paths
import torch
def load_hypernetwork_patch(path, strength):
sd = comfy.utils.load_torch_file(path, safe_load=True)
activation_func = sd.get('activation_func', 'linear')
is_layer_norm = sd.get('is_layer_norm', False)
use_dropout = sd.get('use_dropout', False)
activate_output = sd.get('activate_output', False)
last_layer_dropout = sd.get('last_layer_dropout', False)
valid_activation = {
"linear": torch.nn.Identity,
"relu": torch.nn.ReLU,
"leakyrelu": torch.nn.LeakyReLU,
"elu": torch.nn.ELU,
"swish": torch.nn.Hardswish,
"tanh": torch.nn.Tanh,
"sigmoid": torch.nn.Sigmoid,
}
if activation_func not in valid_activation:
print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout)
return None
out = {}
for d in sd:
try:
dim = int(d)
except:
continue
output = []
for index in [0, 1]:
attn_weights = sd[dim][index]
keys = attn_weights.keys()
linears = filter(lambda a: a.endswith(".weight"), keys)
linears = list(map(lambda a: a[:-len(".weight")], linears))
layers = []
for i in range(len(linears)):
lin_name = linears[i]
last_layer = (i == (len(linears) - 1))
penultimate_layer = (i == (len(linears) - 2))
lin_weight = attn_weights['{}.weight'.format(lin_name)]
lin_bias = attn_weights['{}.bias'.format(lin_name)]
layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0])
layer.load_state_dict({"weight": lin_weight, "bias": lin_bias})
layers.append(layer)
if activation_func != "linear":
if (not last_layer) or (activate_output):
layers.append(valid_activation[activation_func]())
if is_layer_norm:
layers.append(torch.nn.LayerNorm(lin_weight.shape[0]))
if use_dropout:
if (not last_layer) and (not penultimate_layer or last_layer_dropout):
layers.append(torch.nn.Dropout(p=0.3))
output.append(torch.nn.Sequential(*layers))
out[dim] = torch.nn.ModuleList(output)
class hypernetwork_patch:
def __init__(self, hypernet, strength):
self.hypernet = hypernet
self.strength = strength
def __call__(self, current_index, q, k, v):
dim = k.shape[-1]
if dim in self.hypernet:
hn = self.hypernet[dim]
k = k + hn[0](k) * self.strength
v = v + hn[1](v) * self.strength
return q, k, v
def to(self, device):
for d in self.hypernet.keys():
self.hypernet[d] = self.hypernet[d].to(device)
return self
return hypernetwork_patch(out, strength)
class HypernetworkLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "model": ("MODEL",),
"hypernetwork_name": (folder_paths.get_filename_list("hypernetworks"), ),
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
}}
RETURN_TYPES = ("MODEL",)
FUNCTION = "load_hypernetwork"
CATEGORY = "loaders"
def load_hypernetwork(self, model, hypernetwork_name, strength):
hypernetwork_path = folder_paths.get_full_path("hypernetworks", hypernetwork_name)
model_hypernetwork = model.clone()
patch = load_hypernetwork_patch(hypernetwork_path, strength)
if patch is not None:
model_hypernetwork.set_model_attn1_patch(patch)
model_hypernetwork.set_model_attn2_patch(patch)
return (model_hypernetwork,)
NODE_CLASS_MAPPINGS = {
"HypernetworkLoader": HypernetworkLoader
}

View File

@ -40,15 +40,13 @@ def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_da
input_data_all[x] = unique_id
return input_data_all
def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
def recursive_execute(server, prompt, outputs, current_item, extra_data, executed):
unique_id = current_item
inputs = prompt[unique_id]['inputs']
class_type = prompt[unique_id]['class_type']
class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
if unique_id in outputs:
return []
executed = []
return
for x in inputs:
input_data = inputs[x]
@ -57,7 +55,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
input_unique_id = input_data[0]
output_index = input_data[1]
if input_unique_id not in outputs:
executed += recursive_execute(server, prompt, outputs, input_unique_id, extra_data)
recursive_execute(server, prompt, outputs, input_unique_id, extra_data, executed)
input_data_all = get_input_data(inputs, class_def, unique_id, outputs, prompt, extra_data)
if server.client_id is not None:
@ -72,7 +70,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
server.send_sync("executed", { "node": unique_id, "output": outputs[unique_id]["ui"] }, server.client_id)
if "result" in outputs[unique_id]:
outputs[unique_id] = outputs[unique_id]["result"]
return executed + [unique_id]
executed.add(unique_id)
def recursive_will_execute(prompt, outputs, current_item):
unique_id = current_item
@ -158,7 +156,7 @@ class PromptExecutor:
recursive_output_delete_if_changed(prompt, self.old_prompt, self.outputs, x)
current_outputs = set(self.outputs.keys())
executed = []
executed = set()
try:
to_execute = []
for x in prompt:
@ -181,12 +179,12 @@ class PromptExecutor:
except:
valid = False
if valid:
executed += recursive_execute(self.server, prompt, self.outputs, x, extra_data)
recursive_execute(self.server, prompt, self.outputs, x, extra_data, executed)
except Exception as e:
print(traceback.format_exc())
to_delete = []
for o in self.outputs:
if o not in current_outputs:
if (o not in current_outputs) and (o not in executed):
to_delete += [o]
if o in self.old_prompt:
d = self.old_prompt.pop(o)
@ -194,11 +192,9 @@ class PromptExecutor:
for o in to_delete:
d = self.outputs.pop(o)
del d
else:
executed = set(executed)
finally:
for x in executed:
self.old_prompt[x] = copy.deepcopy(prompt[x])
finally:
self.server.last_node_id = None
if self.server.client_id is not None:
self.server.send_sync("executing", { "node": None }, self.server.client_id)
@ -249,9 +245,15 @@ def validate_inputs(prompt, item):
if "max" in info[1] and val > info[1]["max"]:
return (False, "Value bigger than max. {}, {}".format(class_type, x))
if isinstance(type_input, list):
if val not in type_input:
return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input))
if hasattr(obj_class, "VALIDATE_INPUTS"):
input_data_all = get_input_data(inputs, obj_class, unique_id)
ret = obj_class.VALIDATE_INPUTS(**input_data_all)
if ret != True:
return (False, "{}, {}".format(class_type, ret))
else:
if isinstance(type_input, list):
if val not in type_input:
return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input))
return (True, "")
def validate_prompt(prompt):
@ -273,7 +275,8 @@ def validate_prompt(prompt):
m = validate_inputs(prompt, o)
valid = m[0]
reason = m[1]
except:
except Exception as e:
print(traceback.format_exc())
valid = False
reason = "Parsing error"

View File

@ -13,11 +13,13 @@ a111:
models/ESRGAN
models/SwinIR
embeddings: embeddings
hypernetworks: models/hypernetworks
controlnet: models/ControlNet
#other_ui:
# base_path: path/to/ui
# checkpoints: models/checkpoints
# gligen: models/gligen
# custom_nodes: path/custom_nodes

View File

@ -26,10 +26,13 @@ folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")]
folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"])
folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions)
folder_names_and_paths["gligen"] = ([os.path.join(models_dir, "gligen")], supported_pt_extensions)
folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions)
folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], [])
folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions)
output_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output")
temp_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp")
@ -66,6 +69,46 @@ def get_directory_by_type(type_name):
return None
# determine base_dir rely on annotation if name is 'filename.ext [annotation]' format
# otherwise use default_path as base_dir
def annotated_filepath(name):
if name.endswith("[output]"):
base_dir = get_output_directory()
name = name[:-9]
elif name.endswith("[input]"):
base_dir = get_input_directory()
name = name[:-8]
elif name.endswith("[temp]"):
base_dir = get_temp_directory()
name = name[:-7]
else:
return name, None
return name, base_dir
def get_annotated_filepath(name, default_dir=None):
name, base_dir = annotated_filepath(name)
if base_dir is None:
if default_dir is not None:
base_dir = default_dir
else:
base_dir = get_input_directory() # fallback path
return os.path.join(base_dir, name)
def exists_annotated_filepath(name):
name, base_dir = annotated_filepath(name)
if base_dir is None:
base_dir = get_input_directory() # fallback path
filepath = os.path.join(base_dir, name)
return os.path.exists(filepath)
def add_model_folder_path(folder_name, full_folder_path):
global folder_names_and_paths
if folder_name in folder_names_and_paths:

View File

173
nodes.py
View File

@ -16,6 +16,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "co
import comfy.diffusers_convert
import comfy.samplers
import comfy.sample
import comfy.sd
import comfy.utils
@ -171,24 +172,24 @@ class VAEEncodeForInpaint:
def encode(self, vae, pixels, mask):
x = (pixels.shape[1] // 64) * 64
y = (pixels.shape[2] // 64) * 64
mask = torch.nn.functional.interpolate(mask[None,None,], size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")[0][0]
mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")
pixels = pixels.clone()
if pixels.shape[1] != x or pixels.shape[2] != y:
pixels = pixels[:,:x,:y,:]
mask = mask[:x,:y]
mask = mask[:,:,:x,:y]
#grow mask by a few pixels to keep things seamless in latent space
kernel_tensor = torch.ones((1, 1, 6, 6))
mask_erosion = torch.clamp(torch.nn.functional.conv2d((mask.round())[None], kernel_tensor, padding=3), 0, 1)
m = (1.0 - mask.round())
mask_erosion = torch.clamp(torch.nn.functional.conv2d(mask.round(), kernel_tensor, padding=3), 0, 1)
m = (1.0 - mask.round()).squeeze(1)
for i in range(3):
pixels[:,:,:,i] -= 0.5
pixels[:,:,:,i] *= m
pixels[:,:,:,i] += 0.5
t = vae.encode(pixels)
return ({"samples":t, "noise_mask": (mask_erosion[0][:x,:y].round())}, )
return ({"samples":t, "noise_mask": (mask_erosion[:,:,:x,:y].round())}, )
class CheckpointLoader:
@classmethod
@ -490,6 +491,51 @@ class unCLIPConditioning:
c.append(n)
return (c, )
class GLIGENLoader:
@classmethod
def INPUT_TYPES(s):
return {"required": { "gligen_name": (folder_paths.get_filename_list("gligen"), )}}
RETURN_TYPES = ("GLIGEN",)
FUNCTION = "load_gligen"
CATEGORY = "loaders"
def load_gligen(self, gligen_name):
gligen_path = folder_paths.get_full_path("gligen", gligen_name)
gligen = comfy.sd.load_gligen(gligen_path)
return (gligen,)
class GLIGENTextBoxApply:
@classmethod
def INPUT_TYPES(s):
return {"required": {"conditioning_to": ("CONDITIONING", ),
"clip": ("CLIP", ),
"gligen_textbox_model": ("GLIGEN", ),
"text": ("STRING", {"multiline": True}),
"width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
"x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
"y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
}}
RETURN_TYPES = ("CONDITIONING",)
FUNCTION = "append"
CATEGORY = "conditioning/gligen"
def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y):
c = []
cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True)
for t in conditioning_to:
n = [t[0], t[1].copy()]
position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)]
prev = []
if "gligen" in n[1]:
prev = n[1]['gligen'][2]
n[1]['gligen'] = ("position", gligen_textbox_model, prev + position_params)
c.append(n)
return (c, )
class EmptyLatentImage:
def __init__(self, device="cpu"):
@ -510,6 +556,24 @@ class EmptyLatentImage:
return ({"samples":latent}, )
class LatentFromBatch:
@classmethod
def INPUT_TYPES(s):
return {"required": { "samples": ("LATENT",),
"batch_index": ("INT", {"default": 0, "min": 0, "max": 63}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "rotate"
CATEGORY = "latent"
def rotate(self, samples, batch_index):
s = samples.copy()
s_in = samples["samples"]
batch_index = min(s_in.shape[0] - 1, batch_index)
s["samples"] = s_in[batch_index:batch_index + 1].clone()
s["batch_index"] = batch_index
return (s,)
class LatentUpscale:
upscale_methods = ["nearest-exact", "bilinear", "area"]
@ -676,69 +740,23 @@ class SetLatentNoiseMask:
s["noise_mask"] = mask
return (s,)
def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
latent_image = latent["samples"]
noise_mask = None
device = comfy.model_management.get_torch_device()
latent_image = latent["samples"]
if disable_noise:
noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
else:
noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=torch.manual_seed(seed), device="cpu")
skip = latent["batch_index"] if "batch_index" in latent else 0
noise = comfy.sample.prepare_noise(latent_image, seed, skip)
noise_mask = None
if "noise_mask" in latent:
noise_mask = latent['noise_mask']
noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear")
noise_mask = noise_mask.round()
noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1)
noise_mask = torch.cat([noise_mask] * noise.shape[0])
noise_mask = noise_mask.to(device)
real_model = None
comfy.model_management.load_model_gpu(model)
real_model = model.model
noise = noise.to(device)
latent_image = latent_image.to(device)
positive_copy = []
negative_copy = []
control_nets = []
for p in positive:
t = p[0]
if t.shape[0] < noise.shape[0]:
t = torch.cat([t] * noise.shape[0])
t = t.to(device)
if 'control' in p[1]:
control_nets += [p[1]['control']]
positive_copy += [[t] + p[1:]]
for n in negative:
t = n[0]
if t.shape[0] < noise.shape[0]:
t = torch.cat([t] * noise.shape[0])
t = t.to(device)
if 'control' in n[1]:
control_nets += [n[1]['control']]
negative_copy += [[t] + n[1:]]
control_net_models = []
for x in control_nets:
control_net_models += x.get_control_models()
comfy.model_management.load_controlnet_gpu(control_net_models)
if sampler_name in comfy.samplers.KSampler.SAMPLERS:
sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
else:
#other samplers
pass
samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask)
samples = samples.cpu()
for c in control_nets:
c.cleanup()
noise_mask = latent["noise_mask"]
samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step,
force_full_denoise=force_full_denoise, noise_mask=noise_mask)
out = latent.copy()
out["samples"] = samples
return (out, )
@ -901,8 +919,7 @@ class LoadImage:
RETURN_TYPES = ("IMAGE", "MASK")
FUNCTION = "load_image"
def load_image(self, image):
input_dir = folder_paths.get_input_directory()
image_path = os.path.join(input_dir, image)
image_path = folder_paths.get_annotated_filepath(image)
i = Image.open(image_path)
image = i.convert("RGB")
image = np.array(image).astype(np.float32) / 255.0
@ -916,20 +933,27 @@ class LoadImage:
@classmethod
def IS_CHANGED(s, image):
input_dir = folder_paths.get_input_directory()
image_path = os.path.join(input_dir, image)
image_path = folder_paths.get_annotated_filepath(image)
m = hashlib.sha256()
with open(image_path, 'rb') as f:
m.update(f.read())
return m.digest().hex()
@classmethod
def VALIDATE_INPUTS(s, image):
if not folder_paths.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image)
return True
class LoadImageMask:
_color_channels = ["alpha", "red", "green", "blue"]
@classmethod
def INPUT_TYPES(s):
input_dir = folder_paths.get_input_directory()
return {"required":
{"image": (sorted(os.listdir(input_dir)), ),
"channel": (["alpha", "red", "green", "blue"], ),}
"channel": (s._color_channels, ),}
}
CATEGORY = "mask"
@ -937,8 +961,7 @@ class LoadImageMask:
RETURN_TYPES = ("MASK",)
FUNCTION = "load_image"
def load_image(self, image, channel):
input_dir = folder_paths.get_input_directory()
image_path = os.path.join(input_dir, image)
image_path = folder_paths.get_annotated_filepath(image)
i = Image.open(image_path)
if i.getbands() != ("R", "G", "B", "A"):
i = i.convert("RGBA")
@ -955,13 +978,22 @@ class LoadImageMask:
@classmethod
def IS_CHANGED(s, image, channel):
input_dir = folder_paths.get_input_directory()
image_path = os.path.join(input_dir, image)
image_path = folder_paths.get_annotated_filepath(image)
m = hashlib.sha256()
with open(image_path, 'rb') as f:
m.update(f.read())
return m.digest().hex()
@classmethod
def VALIDATE_INPUTS(s, image, channel):
if not folder_paths.exists_annotated_filepath(image):
return "Invalid image file: {}".format(image)
if channel not in s._color_channels:
return "Invalid color channel: {}".format(channel)
return True
class ImageScale:
upscale_methods = ["nearest-exact", "bilinear", "area"]
crop_methods = ["disabled", "center"]
@ -1073,6 +1105,7 @@ NODE_CLASS_MAPPINGS = {
"VAELoader": VAELoader,
"EmptyLatentImage": EmptyLatentImage,
"LatentUpscale": LatentUpscale,
"LatentFromBatch": LatentFromBatch,
"SaveImage": SaveImage,
"PreviewImage": PreviewImage,
"LoadImage": LoadImage,
@ -1102,6 +1135,9 @@ NODE_CLASS_MAPPINGS = {
"VAEEncodeTiled": VAEEncodeTiled,
"TomePatchModel": TomePatchModel,
"unCLIPCheckpointLoader": unCLIPCheckpointLoader,
"GLIGENLoader": GLIGENLoader,
"GLIGENTextBoxApply": GLIGENTextBoxApply,
"CheckpointLoader": CheckpointLoader,
"DiffusersLoader": DiffusersLoader,
}
@ -1191,6 +1227,7 @@ def load_custom_nodes():
def init_custom_nodes():
load_custom_nodes()
load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_hypernetwork.py"))
load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_upscale_model.py"))
load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_post_processing.py"))
load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_mask.py"))

View File

@ -138,6 +138,11 @@
"# Controlnet Preprocessor nodes by Fannovel16\n",
"#!cd custom_nodes && git clone https://github.com/Fannovel16/comfy_controlnet_preprocessors; cd comfy_controlnet_preprocessors && python install.py\n",
"\n",
"\n",
"# GLIGEN\n",
"#!wget -c https://huggingface.co/comfyanonymous/GLIGEN_pruned_safetensors/resolve/main/gligen_sd14_textbox_pruned_fp16.safetensors -P ./models/gligen/\n",
"\n",
"\n",
"# ESRGAN upscale model\n",
"#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth -P ./models/upscale_models/\n",
"#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth -P ./models/upscale_models/\n",

View File

@ -112,13 +112,20 @@ class PromptServer():
@routes.post("/upload/image")
async def upload_image(request):
upload_dir = folder_paths.get_input_directory()
post = await request.post()
image = post.get("image")
if post.get("type") is None:
upload_dir = folder_paths.get_input_directory()
elif post.get("type") == "input":
upload_dir = folder_paths.get_input_directory()
elif post.get("type") == "temp":
upload_dir = folder_paths.get_temp_directory()
elif post.get("type") == "output":
upload_dir = folder_paths.get_output_directory()
if not os.path.exists(upload_dir):
os.makedirs(upload_dir)
post = await request.post()
image = post.get("image")
if image and image.file:
filename = image.filename

View File

@ -0,0 +1,144 @@
import { app } from "/scripts/app.js";
// Allows you to edit the attention weight by holding ctrl (or cmd) and using the up/down arrow keys
app.registerExtension({
name: "Comfy.EditAttention",
init() {
const editAttentionDelta = app.ui.settings.addSetting({
id: "Comfy.EditAttention.Delta",
name: "Ctrl+up/down precision",
type: "slider",
attrs: {
min: 0.01,
max: 0.5,
step: 0.01,
},
defaultValue: 0.05,
});
function incrementWeight(weight, delta) {
const floatWeight = parseFloat(weight);
if (isNaN(floatWeight)) return weight;
const newWeight = floatWeight + delta;
if (newWeight < 0) return "0";
return String(Number(newWeight.toFixed(10)));
}
function findNearestEnclosure(text, cursorPos) {
let start = cursorPos, end = cursorPos;
let openCount = 0, closeCount = 0;
// Find opening parenthesis before cursor
while (start >= 0) {
start--;
if (text[start] === "(" && openCount === closeCount) break;
if (text[start] === "(") openCount++;
if (text[start] === ")") closeCount++;
}
if (start < 0) return false;
openCount = 0;
closeCount = 0;
// Find closing parenthesis after cursor
while (end < text.length) {
if (text[end] === ")" && openCount === closeCount) break;
if (text[end] === "(") openCount++;
if (text[end] === ")") closeCount++;
end++;
}
if (end === text.length) return false;
return { start: start + 1, end: end };
}
function addWeightToParentheses(text) {
const parenRegex = /^\((.*)\)$/;
const parenMatch = text.match(parenRegex);
const floatRegex = /:([+-]?(\d*\.)?\d+([eE][+-]?\d+)?)/;
const floatMatch = text.match(floatRegex);
if (parenMatch && !floatMatch) {
return `(${parenMatch[1]}:1.0)`;
} else {
return text;
}
};
function editAttention(event) {
const inputField = event.composedPath()[0];
const delta = parseFloat(editAttentionDelta.value);
if (inputField.tagName !== "TEXTAREA") return;
if (!(event.key === "ArrowUp" || event.key === "ArrowDown")) return;
if (!event.ctrlKey && !event.metaKey) return;
event.preventDefault();
let start = inputField.selectionStart;
let end = inputField.selectionEnd;
let selectedText = inputField.value.substring(start, end);
// If there is no selection, attempt to find the nearest enclosure, or select the current word
if (!selectedText) {
const nearestEnclosure = findNearestEnclosure(inputField.value, start);
if (nearestEnclosure) {
start = nearestEnclosure.start;
end = nearestEnclosure.end;
selectedText = inputField.value.substring(start, end);
} else {
// Select the current word, find the start and end of the word
const delimiters = " .,\\/!?%^*;:{}=-_`~()\r\n\t";
while (!delimiters.includes(inputField.value[start - 1]) && start > 0) {
start--;
}
while (!delimiters.includes(inputField.value[end]) && end < inputField.value.length) {
end++;
}
selectedText = inputField.value.substring(start, end);
if (!selectedText) return;
}
}
// If the selection ends with a space, remove it
if (selectedText[selectedText.length - 1] === " ") {
selectedText = selectedText.substring(0, selectedText.length - 1);
end -= 1;
}
// If there are parentheses left and right of the selection, select them
if (inputField.value[start - 1] === "(" && inputField.value[end] === ")") {
start -= 1;
end += 1;
selectedText = inputField.value.substring(start, end);
}
// If the selection is not enclosed in parentheses, add them
if (selectedText[0] !== "(" || selectedText[selectedText.length - 1] !== ")") {
selectedText = `(${selectedText})`;
}
// If the selection does not have a weight, add a weight of 1.0
selectedText = addWeightToParentheses(selectedText);
// Increment the weight
const weightDelta = event.key === "ArrowUp" ? delta : -delta;
const updatedText = selectedText.replace(/\((.*):(\d+(?:\.\d+)?)\)/, (match, text, weight) => {
weight = incrementWeight(weight, weightDelta);
if (weight == 1) {
return text;
} else {
return `(${text}:${weight})`;
}
});
inputField.setRangeText(updatedText, start, end, "select");
}
window.addEventListener("keydown", editAttention);
},
});

View File

@ -5,12 +5,6 @@ app.registerExtension({
name: id,
init() {
const keybindListener = function(event) {
const target = event.composedPath()[0];
if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") {
return;
}
const modifierPressed = event.ctrlKey || event.metaKey;
// Queue prompt using ctrl or command + enter
@ -19,6 +13,12 @@ app.registerExtension({
return;
}
const target = event.composedPath()[0];
if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") {
return;
}
const modifierKeyIdMap = {
"s": "#comfy-save-button",
83: "#comfy-save-button",

View File

@ -1,21 +1,72 @@
import { app } from "/scripts/app.js";
import { ComfyWidgets } from "/scripts/widgets.js";
// Adds defaults for quickly adding nodes with middle click on the input/output
app.registerExtension({
name: "Comfy.SlotDefaults",
suggestionsNumber: null,
init() {
LiteGraph.middle_click_slot_add_default_node = true;
LiteGraph.slot_types_default_in = {
MODEL: "CheckpointLoaderSimple",
LATENT: "EmptyLatentImage",
VAE: "VAELoader",
};
LiteGraph.slot_types_default_out = {
LATENT: "VAEDecode",
IMAGE: "SaveImage",
CLIP: "CLIPTextEncode",
};
this.suggestionsNumber = app.ui.settings.addSetting({
id: "Comfy.NodeSuggestions.number",
name: "number of nodes suggestions",
type: "slider",
attrs: {
min: 1,
max: 100,
step: 1,
},
defaultValue: 5,
onChange: (newVal, oldVal) => {
this.setDefaults(newVal);
}
});
},
slot_types_default_out: {},
slot_types_default_in: {},
async beforeRegisterNodeDef(nodeType, nodeData, app) {
var nodeId = nodeData.name;
var inputs = [];
inputs = nodeData["input"]["required"]; //only show required inputs to reduce the mess also not logical to create node with optional inputs
for (const inputKey in inputs) {
var input = (inputs[inputKey]);
if (typeof input[0] !== "string") continue;
var type = input[0]
if (type in ComfyWidgets) {
var customProperties = input[1]
if (!(customProperties?.forceInput)) continue; //ignore widgets that don't force input
}
if (!(type in this.slot_types_default_out)) {
this.slot_types_default_out[type] = ["Reroute"];
}
if (this.slot_types_default_out[type].includes(nodeId)) continue;
this.slot_types_default_out[type].push(nodeId);
}
var outputs = nodeData["output"];
for (const key in outputs) {
var type = outputs[key];
if (!(type in this.slot_types_default_in)) {
this.slot_types_default_in[type] = ["Reroute"];// ["Reroute", "Primitive"]; primitive doesn't always work :'()
}
this.slot_types_default_in[type].push(nodeId);
}
var maxNum = this.suggestionsNumber.value;
this.setDefaults(maxNum);
},
setDefaults(maxNum) {
LiteGraph.slot_types_default_out = {};
LiteGraph.slot_types_default_in = {};
for (const type in this.slot_types_default_out) {
LiteGraph.slot_types_default_out[type] = this.slot_types_default_out[type].slice(0, maxNum);
}
for (const type in this.slot_types_default_in) {
LiteGraph.slot_types_default_in[type] = this.slot_types_default_in[type].slice(0, maxNum);
}
}
});

View File

@ -9,7 +9,7 @@ app.registerExtension({
app.ui.settings.addSetting({
id: "Comfy.SnapToGrid.GridSize",
name: "Grid Size",
type: "number",
type: "slider",
attrs: {
min: 1,
max: 500,

View File

@ -35,7 +35,7 @@ class ComfyApi extends EventTarget {
}
let opened = false;
let existingSession = sessionStorage["Comfy.SessionId"] || "";
let existingSession = window.name;
if (existingSession) {
existingSession = "?clientId=" + existingSession;
}
@ -75,7 +75,7 @@ class ComfyApi extends EventTarget {
case "status":
if (msg.data.sid) {
this.clientId = msg.data.sid;
sessionStorage["Comfy.SessionId"] = this.clientId;
window.name = this.clientId;
}
this.dispatchEvent(new CustomEvent("status", { detail: msg.data.status }));
break;

View File

@ -20,6 +20,12 @@ export class ComfyApp {
*/
#processingQueue = false;
/**
* Content Clipboard
* @type {serialized node object}
*/
static clipspace = null;
constructor() {
this.ui = new ComfyUI(this);
@ -130,6 +136,83 @@ export class ComfyApp {
);
}
}
options.push(
{
content: "Copy (Clipspace)",
callback: (obj) => {
var widgets = null;
if(this.widgets) {
widgets = this.widgets.map(({ type, name, value }) => ({ type, name, value }));
}
let img = new Image();
var imgs = undefined;
if(this.imgs != undefined) {
img.src = this.imgs[0].src;
imgs = [img];
}
ComfyApp.clipspace = {
'widgets': widgets,
'imgs': imgs,
'original_imgs': imgs,
'images': this.images
};
}
});
if(ComfyApp.clipspace != null) {
options.push(
{
content: "Paste (Clipspace)",
callback: () => {
if(ComfyApp.clipspace != null) {
if(ComfyApp.clipspace.widgets != null && this.widgets != null) {
ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => {
const prop = Object.values(this.widgets).find(obj => obj.type === type && obj.name === name);
if (prop) {
prop.callback(value);
}
});
}
// image paste
if(ComfyApp.clipspace.imgs != undefined && this.imgs != undefined && this.widgets != null) {
var filename = "";
if(this.images && ComfyApp.clipspace.images) {
this.images = ComfyApp.clipspace.images;
}
if(ComfyApp.clipspace.images != undefined) {
const clip_image = ComfyApp.clipspace.images[0];
if(clip_image.subfolder != '')
filename = `${clip_image.subfolder}/`;
filename += `${clip_image.filename} [${clip_image.type}]`;
}
else if(ComfyApp.clipspace.widgets != undefined) {
const index_in_clip = ComfyApp.clipspace.widgets.findIndex(obj => obj.name === 'image');
if(index_in_clip >= 0) {
filename = `${ComfyApp.clipspace.widgets[index_in_clip].value}`;
}
}
const index = this.widgets.findIndex(obj => obj.name === 'image');
if(index >= 0 && filename != "" && ComfyApp.clipspace.imgs != undefined) {
this.imgs = ComfyApp.clipspace.imgs;
this.widgets[index].value = filename;
if(this.widgets_values != undefined) {
this.widgets_values[index] = filename;
}
}
}
this.trigger('changed');
}
}
}
);
}
};
}

View File

@ -270,6 +270,30 @@ class ComfySettingsDialog extends ComfyDialog {
]),
]);
break;
case "slider":
element = $el("div", [
$el("label", { textContent: name }, [
$el("input", {
type: "range",
value,
oninput: (e) => {
setter(e.target.value);
e.target.nextElementSibling.value = e.target.value;
},
...attrs
}),
$el("input", {
type: "number",
value,
oninput: (e) => {
setter(e.target.value);
e.target.previousElementSibling.value = e.target.value;
},
...attrs
}),
]),
]);
break;
default:
console.warn("Unsupported setting type, defaulting to text");
element = $el("div", [

View File

@ -272,6 +272,9 @@ export const ComfyWidgets = {
app.graph.setDirtyCanvas(true);
};
img.src = `/view?filename=${name}&type=input`;
if ((node.size[1] - node.imageOffset) < 100) {
node.size[1] = 250 + node.imageOffset;
}
}
// Add our own callback to the combo widget to render an image when it changes

View File

@ -217,6 +217,14 @@ button.comfy-queue-btn {
z-index: 99;
}
.comfy-modal.comfy-settings input[type="range"] {
vertical-align: middle;
}
.comfy-modal.comfy-settings input[type="range"] + input[type="number"] {
width: 3.5em;
}
.comfy-modal input,
.comfy-modal select {
color: var(--input-text);