diff --git a/.ci/update_windows/update.py b/.ci/update_windows/update.py
index 6067d1a12..127247b2f 100755
--- a/.ci/update_windows/update.py
+++ b/.ci/update_windows/update.py
@@ -104,7 +104,7 @@ if self_update and not files_equal(update_py_path, repo_update_py_path) and file
if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
import subprocess
try:
- subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', repo_req_path])
+ subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_req_path])
shutil.copy(repo_req_path, req_path)
except:
pass
diff --git a/.github/workflows/windows_release_dependencies.yml b/.github/workflows/windows_release_dependencies.yml
index 110c49e1a..681602bdd 100644
--- a/.github/workflows/windows_release_dependencies.yml
+++ b/.github/workflows/windows_release_dependencies.yml
@@ -24,7 +24,7 @@ on:
description: 'python patch version'
required: true
type: string
- default: "6"
+ default: "8"
# push:
# branches:
# - master
diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml
index 48fdc4caa..506ab541b 100644
--- a/.github/workflows/windows_release_nightly_pytorch.yml
+++ b/.github/workflows/windows_release_nightly_pytorch.yml
@@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
- default: "1"
+ default: "2"
# push:
# branches:
# - master
@@ -49,7 +49,7 @@ jobs:
echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
./python.exe get-pip.py
- python -m pip wheel torch torchvision --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
+ python -m pip wheel torch torchvision mpmath==1.3.0 --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
ls ../temp_wheel_dir
./python.exe -s -m pip install --pre ../temp_wheel_dir/*
sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
diff --git a/.github/workflows/windows_release_package.yml b/.github/workflows/windows_release_package.yml
index 87d37c24d..4e3cdabd2 100644
--- a/.github/workflows/windows_release_package.yml
+++ b/.github/workflows/windows_release_package.yml
@@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
- default: "6"
+ default: "8"
# push:
# branches:
# - master
diff --git a/comfy/cmd/cuda_malloc.py b/comfy/cmd/cuda_malloc.py
index 4cae2296f..f41647785 100644
--- a/comfy/cmd/cuda_malloc.py
+++ b/comfy/cmd/cuda_malloc.py
@@ -1,6 +1,7 @@
import os
import importlib.util
from ..cli_args import args
+import subprocess
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
def get_gpu_names():
@@ -34,7 +35,15 @@ def get_gpu_names():
return gpu_names
return enum_display_devices()
else:
- return set()
+ gpu_names = set()
+ try:
+ out = subprocess.check_output(['nvidia-smi', '-L'])
+ for l in out.split(b'\n'):
+ if len(l) > 0:
+ gpu_names.add(l.decode('utf-8').split(' (UUID')[0])
+ except IOError as error:
+ pass
+ return gpu_names
blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
"GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index 7e5fd3a02..5c569c31f 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -10,6 +10,7 @@ from . import ops
from .cldm import cldm
from .t2i_adapter import adapter
+from .ldm.cascade import controlnet
def broadcast_image_to(tensor, target_batch_size, batched_number):
@@ -38,6 +39,8 @@ class ControlBase:
self.timestep_percent_range = (0.0, 1.0)
self.global_average_pooling = False
self.timestep_range = None
+ self.compression_ratio = 8
+ self.upscale_algorithm = 'nearest-exact'
if device is None:
device = model_management.get_torch_device()
@@ -78,6 +81,8 @@ class ControlBase:
c.strength = self.strength
c.timestep_percent_range = self.timestep_percent_range
c.global_average_pooling = self.global_average_pooling
+ c.compression_ratio = self.compression_ratio
+ c.upscale_algorithm = self.upscale_algorithm
def inference_memory_requirements(self, dtype):
if self.previous_controlnet is not None:
@@ -159,11 +164,11 @@ class ControlNet(ControlBase):
dtype = self.manual_cast_dtype
output_dtype = x_noisy.dtype
- if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
+ if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
if self.cond_hint is not None:
del self.cond_hint
self.cond_hint = None
- self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(dtype).to(self.device)
+ self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio, self.upscale_algorithm, "center").to(dtype).to(self.device)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
@@ -288,13 +293,13 @@ class ControlLora(ControlNet):
for k in sd:
weight = sd[k]
try:
- utils.set_attr(self.control_model, k, weight)
+ utils.set_attr_param(self.control_model, k, weight)
except:
pass
for k in self.control_weights:
if k not in {"lora_controlnet"}:
- utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
+ utils.set_attr_param(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
def copy(self):
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
@@ -433,11 +438,13 @@ def load_controlnet(ckpt_path, model=None):
return control
class T2IAdapter(ControlBase):
- def __init__(self, t2i_model, channels_in, device=None):
+ def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
super().__init__(device)
self.t2i_model = t2i_model
self.channels_in = channels_in
self.control_input = None
+ self.compression_ratio = compression_ratio
+ self.upscale_algorithm = upscale_algorithm
def scale_image_to(self, width, height):
unshuffle_amount = self.t2i_model.unshuffle_amount
@@ -457,13 +464,13 @@ class T2IAdapter(ControlBase):
else:
return None
- if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
+ if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
if self.cond_hint is not None:
del self.cond_hint
self.control_input = None
self.cond_hint = None
- width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8)
- self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
+ width, height = self.scale_image_to(x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio)
+ self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, self.upscale_algorithm, "center").float().to(self.device)
if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
@@ -482,11 +489,14 @@ class T2IAdapter(ControlBase):
return self.control_merge(control_input, mid, control_prev, x_noisy.dtype)
def copy(self):
- c = T2IAdapter(self.t2i_model, self.channels_in)
+ c = T2IAdapter(self.t2i_model, self.channels_in, self.compression_ratio, self.upscale_algorithm)
self.copy_to(c)
return c
def load_t2i_adapter(t2i_data):
+ compression_ratio = 8
+ upscale_algorithm = 'nearest-exact'
+
if 'adapter' in t2i_data:
t2i_data = t2i_data['adapter']
if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
@@ -514,8 +524,17 @@ def load_t2i_adapter(t2i_data):
if cin == 256 or cin == 768:
xl = True
model_ad = adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
+ elif "backbone.0.0.weight" in keys:
+ model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.0.weight'].shape[1], proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+ compression_ratio = 32
+ upscale_algorithm = 'bilinear'
+ elif "backbone.10.blocks.0.weight" in keys:
+ model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.weight'].shape[1], bottleneck_mode="large", proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
+ compression_ratio = 1
+ upscale_algorithm = 'nearest-exact'
else:
return None
+
missing, unexpected = model_ad.load_state_dict(t2i_data)
if len(missing) > 0:
print("t2i missing", missing)
@@ -523,4 +542,4 @@ def load_t2i_adapter(t2i_data):
if len(unexpected) > 0:
print("t2i unexpected", unexpected)
- return T2IAdapter(model_ad, model_ad.input_channels)
+ return T2IAdapter(model_ad, model_ad.input_channels, compression_ratio, upscale_algorithm)
diff --git a/comfy/ldm/cascade/controlnet.py b/comfy/ldm/cascade/controlnet.py
new file mode 100644
index 000000000..5dac59394
--- /dev/null
+++ b/comfy/ldm/cascade/controlnet.py
@@ -0,0 +1,93 @@
+"""
+ This file is part of ComfyUI.
+ Copyright (C) 2024 Stability AI
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+"""
+
+import torch
+import torchvision
+from torch import nn
+from .common import LayerNorm2d_op
+
+
+class CNetResBlock(nn.Module):
+ def __init__(self, c, dtype=None, device=None, operations=None):
+ super().__init__()
+ self.blocks = nn.Sequential(
+ LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
+ nn.GELU(),
+ operations.Conv2d(c, c, kernel_size=3, padding=1),
+ LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
+ nn.GELU(),
+ operations.Conv2d(c, c, kernel_size=3, padding=1),
+ )
+
+ def forward(self, x):
+ return x + self.blocks(x)
+
+
+class ControlNet(nn.Module):
+ def __init__(self, c_in=3, c_proj=2048, proj_blocks=None, bottleneck_mode=None, dtype=None, device=None, operations=nn):
+ super().__init__()
+ if bottleneck_mode is None:
+ bottleneck_mode = 'effnet'
+ self.proj_blocks = proj_blocks
+ if bottleneck_mode == 'effnet':
+ embd_channels = 1280
+ self.backbone = torchvision.models.efficientnet_v2_s().features.eval()
+ if c_in != 3:
+ in_weights = self.backbone[0][0].weight.data
+ self.backbone[0][0] = operations.Conv2d(c_in, 24, kernel_size=3, stride=2, bias=False, dtype=dtype, device=device)
+ if c_in > 3:
+ # nn.init.constant_(self.backbone[0][0].weight, 0)
+ self.backbone[0][0].weight.data[:, :3] = in_weights[:, :3].clone()
+ else:
+ self.backbone[0][0].weight.data = in_weights[:, :c_in].clone()
+ elif bottleneck_mode == 'simple':
+ embd_channels = c_in
+ self.backbone = nn.Sequential(
+ operations.Conv2d(embd_channels, embd_channels * 4, kernel_size=3, padding=1, dtype=dtype, device=device),
+ nn.LeakyReLU(0.2, inplace=True),
+ operations.Conv2d(embd_channels * 4, embd_channels, kernel_size=3, padding=1, dtype=dtype, device=device),
+ )
+ elif bottleneck_mode == 'large':
+ self.backbone = nn.Sequential(
+ operations.Conv2d(c_in, 4096 * 4, kernel_size=1, dtype=dtype, device=device),
+ nn.LeakyReLU(0.2, inplace=True),
+ operations.Conv2d(4096 * 4, 1024, kernel_size=1, dtype=dtype, device=device),
+ *[CNetResBlock(1024, dtype=dtype, device=device, operations=operations) for _ in range(8)],
+ operations.Conv2d(1024, 1280, kernel_size=1, dtype=dtype, device=device),
+ )
+ embd_channels = 1280
+ else:
+ raise ValueError(f'Unknown bottleneck mode: {bottleneck_mode}')
+ self.projections = nn.ModuleList()
+ for _ in range(len(proj_blocks)):
+ self.projections.append(nn.Sequential(
+ operations.Conv2d(embd_channels, embd_channels, kernel_size=1, bias=False, dtype=dtype, device=device),
+ nn.LeakyReLU(0.2, inplace=True),
+ operations.Conv2d(embd_channels, c_proj, kernel_size=1, bias=False, dtype=dtype, device=device),
+ ))
+ # nn.init.constant_(self.projections[-1][-1].weight, 0) # zero output projection
+ self.xl = False
+ self.input_channels = c_in
+ self.unshuffle_amount = 8
+
+ def forward(self, x):
+ x = self.backbone(x)
+ proj_outputs = [None for _ in range(max(self.proj_blocks) + 1)]
+ for i, idx in enumerate(self.proj_blocks):
+ proj_outputs[idx] = self.projections[i](x)
+ return proj_outputs
diff --git a/comfy/ldm/cascade/stage_c.py b/comfy/ldm/cascade/stage_c.py
index 08e33aded..67c1e52b6 100644
--- a/comfy/ldm/cascade/stage_c.py
+++ b/comfy/ldm/cascade/stage_c.py
@@ -194,10 +194,10 @@ class StageC(nn.Module):
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
ResBlock)):
if cnet is not None:
- next_cnet = cnet()
+ next_cnet = cnet.pop()
if next_cnet is not None:
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
- align_corners=True)
+ align_corners=True).to(x.dtype)
x = block(x)
elif isinstance(block, AttnBlock) or (
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
@@ -228,10 +228,10 @@ class StageC(nn.Module):
x = torch.nn.functional.interpolate(x, skip.shape[-2:], mode='bilinear',
align_corners=True)
if cnet is not None:
- next_cnet = cnet()
+ next_cnet = cnet.pop()
if next_cnet is not None:
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
- align_corners=True)
+ align_corners=True).to(x.dtype)
x = block(x, skip)
elif isinstance(block, AttnBlock) or (
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
@@ -248,7 +248,7 @@ class StageC(nn.Module):
x = upscaler(x)
return x
- def forward(self, x, r, clip_text, clip_text_pooled, clip_img, cnet=None, **kwargs):
+ def forward(self, x, r, clip_text, clip_text_pooled, clip_img, control=None, **kwargs):
# Process the conditioning embeddings
r_embed = self.gen_r_embedding(r).to(dtype=x.dtype)
for c in self.t_conds:
@@ -256,10 +256,13 @@ class StageC(nn.Module):
r_embed = torch.cat([r_embed, self.gen_r_embedding(t_cond).to(dtype=x.dtype)], dim=1)
clip = self.gen_c_embeddings(clip_text, clip_text_pooled, clip_img)
+ if control is not None:
+ cnet = control.get("input")
+ else:
+ cnet = None
+
# Model Blocks
x = self.embedding(x)
- if cnet is not None:
- cnet = ControlNetDeliverer(cnet)
level_outputs = self._down_encode(x, r_embed, clip, cnet)
x = self._up_decode(level_outputs, r_embed, clip, cnet)
return self.clf(x)
diff --git a/comfy/model_base.py b/comfy/model_base.py
index 2b902fad0..6f4aae681 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -166,6 +166,10 @@ class BaseModel(torch.nn.Module):
if cross_attn_cnet is not None:
out['crossattn_controlnet'] = conds.CONDCrossAttn(cross_attn_cnet)
+ c_concat = kwargs.get("noise_concat", None)
+ if c_concat is not None:
+ out['c_concat'] = comfy.conds.CONDNoiseShape(data)
+
return out
def load_model_weights(self, sd, unet_prefix=""):
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 8e79fa03b..9dcffdca8 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -763,7 +763,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
#when the model doesn't actually fit on the card
#TODO: actually test if GP106 and others have the same type of behavior
- nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"]
+ nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
for x in nvidia_10_series:
if x in props.name.lower():
fp16_works = True
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
index 814537171..fcc8fe5b0 100644
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -67,6 +67,9 @@ class ModelPatcher:
def set_model_unet_function_wrapper(self, unet_wrapper_function):
self.model_options["model_function_wrapper"] = unet_wrapper_function
+ def set_model_denoise_mask_function(self, denoise_mask_function):
+ self.model_options["denoise_mask_function"] = denoise_mask_function
+
def set_model_patch(self, patch, name):
to = self.model_options["transformer_options"]
if "patches" not in to:
@@ -176,10 +179,9 @@ class ModelPatcher:
def patch_model(self, device_to=None, patch_weights=True):
for k in self.object_patches:
- old = getattr(self.model, k)
+ old = utils.set_attr(self.model, k, self.object_patches[k])
if k not in self.object_patches_backup:
self.object_patches_backup[k] = old
- setattr(self.model, k, self.object_patches[k])
if patch_weights:
model_sd = self.model_state_dict()
@@ -203,7 +205,7 @@ class ModelPatcher:
if inplace_update:
utils.copy_to_param(self.model, key, out_weight)
else:
- utils.set_attr(self.model, key, out_weight)
+ utils.set_attr_param(self.model, key, out_weight)
del temp_weight
if device_to is not None:
@@ -342,7 +344,7 @@ class ModelPatcher:
utils.copy_to_param(self.model, k, self.backup[k])
else:
for k in keys:
- utils.set_attr(self.model, k, self.backup[k])
+ utils.set_attr_param(self.model, k, self.backup[k])
self.backup = {}
@@ -352,6 +354,6 @@ class ModelPatcher:
keys = list(self.object_patches_backup.keys())
for k in keys:
- setattr(self.model, k, self.object_patches_backup[k])
+ utils.set_attr(self.model, k, self.object_patches_backup[k])
self.object_patches_backup = {}
diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py
index e7f8bc6a3..d325f76d9 100644
--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@@ -11,6 +11,14 @@ class EPS:
sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
return model_input - model_output * sigma
+ def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+ if max_denoise:
+ noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
+ else:
+ noise = noise * sigma
+
+ noise += latent_image
+ return noise
class V_PREDICTION(EPS):
def calculate_denoised(self, sigma, model_output, model_input):
diff --git a/comfy/samplers.py b/comfy/samplers.py
index 2c6291f9c..30fbc0c96 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -275,15 +275,16 @@ class CFGNoisePredictor(torch.nn.Module):
return self.apply_model(*args, **kwargs)
class KSamplerX0Inpaint(torch.nn.Module):
- def __init__(self, model):
+ def __init__(self, model, sigmas):
super().__init__()
self.inner_model = model
+ self.sigmas = sigmas
def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None):
if denoise_mask is not None:
if "denoise_mask_function" in model_options:
- denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask)
+ denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask, extra_options={"model": self.inner_model, "sigmas": self.sigmas})
latent_mask = 1. - denoise_mask
- x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
+ x = x * denoise_mask + self.inner_model.inner_model.model_sampling.noise_scaling(sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1)), self.noise, self.latent_image) * latent_mask
out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, model_options=model_options, seed=seed)
if denoise_mask is not None:
out = out * denoise_mask + self.latent_image * latent_mask
@@ -531,7 +532,7 @@ class KSAMPLER(Sampler):
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
extra_args["denoise_mask"] = denoise_mask
- model_k = KSamplerX0Inpaint(model_wrap)
+ model_k = KSamplerX0Inpaint(model_wrap, sigmas)
model_k.latent_image = latent_image
if self.inpaint_options.get("random", False): #TODO: Should this be the default?
generator = torch.manual_seed(extra_args.get("seed", 41) + 1)
@@ -539,19 +540,13 @@ class KSAMPLER(Sampler):
else:
model_k.noise = noise
- if self.max_denoise(model_wrap, sigmas):
- noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0)
- else:
- noise = noise * sigmas[0]
+ noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas[0], noise, latent_image, self.max_denoise(model_wrap, sigmas))
k_callback = None
total_steps = len(sigmas) - 1
if callback is not None:
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
- if latent_image is not None:
- noise += latent_image
-
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
return samples
diff --git a/comfy/utils.py b/comfy/utils.py
index 8dc8920c9..80695c0f6 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -296,8 +296,11 @@ def set_attr(obj, attr, value):
for name in attrs[:-1]:
obj = getattr(obj, name)
prev = getattr(obj, attrs[-1])
- setattr(obj, attrs[-1], torch.nn.Parameter(value, requires_grad=False))
- del prev
+ setattr(obj, attrs[-1], value)
+ return prev
+
+def set_attr_param(obj, attr, value):
+ return set_attr(obj, attr, torch.nn.Parameter(value, requires_grad=False))
def copy_to_param(obj, attr, value):
# inplace update tensor instead of replacing it
diff --git a/comfy_extras/nodes/nodes_canny.py b/comfy_extras/nodes/nodes_canny.py
index 730dded5f..8138b5f73 100644
--- a/comfy_extras/nodes/nodes_canny.py
+++ b/comfy_extras/nodes/nodes_canny.py
@@ -5,275 +5,7 @@ import torch
import torch.nn.functional as F
import comfy.model_management
-def get_canny_nms_kernel(device=None, dtype=None):
- """Utility function that returns 3x3 kernels for the Canny Non-maximal suppression."""
- return torch.tensor(
- [
- [[[0.0, 0.0, 0.0], [0.0, 1.0, -1.0], [0.0, 0.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, -1.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, -1.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [-1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[0.0, -1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[0.0, 0.0, -1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
- ],
- device=device,
- dtype=dtype,
- )
-
-
-def get_hysteresis_kernel(device=None, dtype=None):
- """Utility function that returns the 3x3 kernels for the Canny hysteresis."""
- return torch.tensor(
- [
- [[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]],
- [[[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
- [[[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
- ],
- device=device,
- dtype=dtype,
- )
-
-def gaussian_blur_2d(img, kernel_size, sigma):
- ksize_half = (kernel_size - 1) * 0.5
-
- x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
-
- pdf = torch.exp(-0.5 * (x / sigma).pow(2))
-
- x_kernel = pdf / pdf.sum()
- x_kernel = x_kernel.to(device=img.device, dtype=img.dtype)
-
- kernel2d = torch.mm(x_kernel[:, None], x_kernel[None, :])
- kernel2d = kernel2d.expand(img.shape[-3], 1, kernel2d.shape[0], kernel2d.shape[1])
-
- padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2]
-
- img = torch.nn.functional.pad(img, padding, mode="reflect")
- img = torch.nn.functional.conv2d(img, kernel2d, groups=img.shape[-3])
-
- return img
-
-def get_sobel_kernel2d(device=None, dtype=None):
- kernel_x = torch.tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=device, dtype=dtype)
- kernel_y = kernel_x.transpose(0, 1)
- return torch.stack([kernel_x, kernel_y])
-
-def spatial_gradient(input, normalized: bool = True):
- r"""Compute the first order image derivative in both x and y using a Sobel operator.
- .. image:: _static/img/spatial_gradient.png
- Args:
- input: input image tensor with shape :math:`(B, C, H, W)`.
- mode: derivatives modality, can be: `sobel` or `diff`.
- order: the order of the derivatives.
- normalized: whether the output is normalized.
- Return:
- the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`.
- .. note::
- See a working example `here `__.
- Examples:
- >>> input = torch.rand(1, 3, 4, 4)
- >>> output = spatial_gradient(input) # 1x3x2x4x4
- >>> output.shape
- torch.Size([1, 3, 2, 4, 4])
- """
- # KORNIA_CHECK_IS_TENSOR(input)
- # KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
-
- # allocate kernel
- kernel = get_sobel_kernel2d(device=input.device, dtype=input.dtype)
- if normalized:
- kernel = normalize_kernel2d(kernel)
-
- # prepare kernel
- b, c, h, w = input.shape
- tmp_kernel = kernel[:, None, ...]
-
- # Pad with "replicate for spatial dims, but with zeros for channel
- spatial_pad = [kernel.size(1) // 2, kernel.size(1) // 2, kernel.size(2) // 2, kernel.size(2) // 2]
- out_channels: int = 2
- padded_inp = torch.nn.functional.pad(input.reshape(b * c, 1, h, w), spatial_pad, 'replicate')
- out = F.conv2d(padded_inp, tmp_kernel, groups=1, padding=0, stride=1)
- return out.reshape(b, c, out_channels, h, w)
-
-def rgb_to_grayscale(image, rgb_weights = None):
- r"""Convert a RGB image to grayscale version of image.
-
- .. image:: _static/img/rgb_to_grayscale.png
-
- The image data is assumed to be in the range of (0, 1).
-
- Args:
- image: RGB image to be converted to grayscale with shape :math:`(*,3,H,W)`.
- rgb_weights: Weights that will be applied on each channel (RGB).
- The sum of the weights should add up to one.
- Returns:
- grayscale version of the image with shape :math:`(*,1,H,W)`.
-
- .. note::
- See a working example `here `__.
-
- Example:
- >>> input = torch.rand(2, 3, 4, 5)
- >>> gray = rgb_to_grayscale(input) # 2x1x4x5
- """
-
- if len(image.shape) < 3 or image.shape[-3] != 3:
- raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}")
-
- if rgb_weights is None:
- # 8 bit images
- if image.dtype == torch.uint8:
- rgb_weights = torch.tensor([76, 150, 29], device=image.device, dtype=torch.uint8)
- # floating point images
- elif image.dtype in (torch.float16, torch.float32, torch.float64):
- rgb_weights = torch.tensor([0.299, 0.587, 0.114], device=image.device, dtype=image.dtype)
- else:
- raise TypeError(f"Unknown data type: {image.dtype}")
- else:
- # is tensor that we make sure is in the same device/dtype
- rgb_weights = rgb_weights.to(image)
-
- # unpack the color image channels with RGB order
- r: Tensor = image[..., 0:1, :, :]
- g: Tensor = image[..., 1:2, :, :]
- b: Tensor = image[..., 2:3, :, :]
-
- w_r, w_g, w_b = rgb_weights.unbind()
- return w_r * r + w_g * g + w_b * b
-
-def canny(
- input,
- low_threshold = 0.1,
- high_threshold = 0.2,
- kernel_size = 5,
- sigma = 1,
- hysteresis = True,
- eps = 1e-6,
-):
- r"""Find edges of the input image and filters them using the Canny algorithm.
- .. image:: _static/img/canny.png
- Args:
- input: input image tensor with shape :math:`(B,C,H,W)`.
- low_threshold: lower threshold for the hysteresis procedure.
- high_threshold: upper threshold for the hysteresis procedure.
- kernel_size: the size of the kernel for the gaussian blur.
- sigma: the standard deviation of the kernel for the gaussian blur.
- hysteresis: if True, applies the hysteresis edge tracking.
- Otherwise, the edges are divided between weak (0.5) and strong (1) edges.
- eps: regularization number to avoid NaN during backprop.
- Returns:
- - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`.
- - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`.
- .. note::
- See a working example `here `__.
- Example:
- >>> input = torch.rand(5, 3, 4, 4)
- >>> magnitude, edges = canny(input) # 5x3x4x4
- >>> magnitude.shape
- torch.Size([5, 1, 4, 4])
- >>> edges.shape
- torch.Size([5, 1, 4, 4])
- """
- # KORNIA_CHECK_IS_TENSOR(input)
- # KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
- # KORNIA_CHECK(
- # low_threshold <= high_threshold,
- # "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: "
- # f"{low_threshold}>{high_threshold}",
- # )
- # KORNIA_CHECK(0 < low_threshold < 1, f'Invalid low threshold. Should be in range (0, 1). Got: {low_threshold}')
- # KORNIA_CHECK(0 < high_threshold < 1, f'Invalid high threshold. Should be in range (0, 1). Got: {high_threshold}')
-
- device = input.device
- dtype = input.dtype
-
- # To Grayscale
- if input.shape[1] == 3:
- input = rgb_to_grayscale(input)
-
- # Gaussian filter
- blurred: Tensor = gaussian_blur_2d(input, kernel_size, sigma)
-
- # Compute the gradients
- gradients: Tensor = spatial_gradient(blurred, normalized=False)
-
- # Unpack the edges
- gx: Tensor = gradients[:, :, 0]
- gy: Tensor = gradients[:, :, 1]
-
- # Compute gradient magnitude and angle
- magnitude: Tensor = torch.sqrt(gx * gx + gy * gy + eps)
- angle: Tensor = torch.atan2(gy, gx)
-
- # Radians to Degrees
- angle = 180.0 * angle / math.pi
-
- # Round angle to the nearest 45 degree
- angle = torch.round(angle / 45) * 45
-
- # Non-maximal suppression
- nms_kernels: Tensor = get_canny_nms_kernel(device, dtype)
- nms_magnitude: Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2)
-
- # Get the indices for both directions
- positive_idx: Tensor = (angle / 45) % 8
- positive_idx = positive_idx.long()
-
- negative_idx: Tensor = ((angle / 45) + 4) % 8
- negative_idx = negative_idx.long()
-
- # Apply the non-maximum suppression to the different directions
- channel_select_filtered_positive: Tensor = torch.gather(nms_magnitude, 1, positive_idx)
- channel_select_filtered_negative: Tensor = torch.gather(nms_magnitude, 1, negative_idx)
-
- channel_select_filtered: Tensor = torch.stack(
- [channel_select_filtered_positive, channel_select_filtered_negative], 1
- )
-
- is_max: Tensor = channel_select_filtered.min(dim=1)[0] > 0.0
-
- magnitude = magnitude * is_max
-
- # Threshold
- edges: Tensor = F.threshold(magnitude, low_threshold, 0.0)
-
- low: Tensor = magnitude > low_threshold
- high: Tensor = magnitude > high_threshold
-
- edges = low * 0.5 + high * 0.5
- edges = edges.to(dtype)
-
- # Hysteresis
- if hysteresis:
- edges_old: Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype)
- hysteresis_kernels: Tensor = get_hysteresis_kernel(device, dtype)
-
- while ((edges_old - edges).abs() != 0).any():
- weak: Tensor = (edges == 0.5).float()
- strong: Tensor = (edges == 1).float()
-
- hysteresis_magnitude: Tensor = F.conv2d(
- edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2
- )
- hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype)
- hysteresis_magnitude = hysteresis_magnitude * weak + strong
-
- edges_old = edges.clone()
- edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5
-
- edges = hysteresis_magnitude
-
- return magnitude, edges
+from kornia.filters import canny
class Canny:
diff --git a/comfy_extras/nodes/nodes_mask.py b/comfy_extras/nodes/nodes_mask.py
index 239b69809..4b532c707 100644
--- a/comfy_extras/nodes/nodes_mask.py
+++ b/comfy_extras/nodes/nodes_mask.py
@@ -342,6 +342,24 @@ class GrowMask:
out.append(output)
return (torch.stack(out, dim=0),)
+class ThresholdMask:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {
+ "required": {
+ "mask": ("MASK",),
+ "value": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
+ }
+ }
+
+ CATEGORY = "mask"
+
+ RETURN_TYPES = ("MASK",)
+ FUNCTION = "image_to_mask"
+
+ def image_to_mask(self, mask, value):
+ mask = (mask > value).float()
+ return (mask,)
NODE_CLASS_MAPPINGS = {
@@ -356,6 +374,7 @@ NODE_CLASS_MAPPINGS = {
"MaskComposite": MaskComposite,
"FeatherMask": FeatherMask,
"GrowMask": GrowMask,
+ "ThresholdMask": ThresholdMask,
}
NODE_DISPLAY_NAME_MAPPINGS = {
diff --git a/comfy_extras/nodes/nodes_stable_cascade.py b/comfy_extras/nodes/nodes_stable_cascade.py
index 517e27bfc..e0bca8bf9 100644
--- a/comfy_extras/nodes/nodes_stable_cascade.py
+++ b/comfy_extras/nodes/nodes_stable_cascade.py
@@ -37,7 +37,7 @@ class StableCascade_EmptyLatentImage:
RETURN_NAMES = ("stage_c", "stage_b")
FUNCTION = "generate"
- CATEGORY = "_for_testing/stable_cascade"
+ CATEGORY = "latent/stable_cascade"
def generate(self, width, height, compression, batch_size=1):
c_latent = torch.zeros([batch_size, 16, height // compression, width // compression])
@@ -63,7 +63,7 @@ class StableCascade_StageC_VAEEncode:
RETURN_NAMES = ("stage_c", "stage_b")
FUNCTION = "generate"
- CATEGORY = "_for_testing/stable_cascade"
+ CATEGORY = "latent/stable_cascade"
def generate(self, image, vae, compression):
width = image.shape[-2]
@@ -91,7 +91,7 @@ class StableCascade_StageB_Conditioning:
FUNCTION = "set_prior"
- CATEGORY = "_for_testing/stable_cascade"
+ CATEGORY = "conditioning/stable_cascade"
def set_prior(self, conditioning, stage_c):
c = []
@@ -102,8 +102,39 @@ class StableCascade_StageB_Conditioning:
c.append(n)
return (c, )
+class StableCascade_SuperResolutionControlnet:
+ def __init__(self, device="cpu"):
+ self.device = device
+
+ @classmethod
+ def INPUT_TYPES(s):
+ return {"required": {
+ "image": ("IMAGE",),
+ "vae": ("VAE", ),
+ }}
+ RETURN_TYPES = ("IMAGE", "LATENT", "LATENT")
+ RETURN_NAMES = ("controlnet_input", "stage_c", "stage_b")
+ FUNCTION = "generate"
+
+ CATEGORY = "_for_testing/stable_cascade"
+
+ def generate(self, image, vae):
+ width = image.shape[-2]
+ height = image.shape[-3]
+ batch_size = image.shape[0]
+ controlnet_input = vae.encode(image[:,:,:,:3]).movedim(1, -1)
+
+ c_latent = torch.zeros([batch_size, 16, height // 16, width // 16])
+ b_latent = torch.zeros([batch_size, 4, height // 2, width // 2])
+ return (controlnet_input, {
+ "samples": c_latent,
+ }, {
+ "samples": b_latent,
+ })
+
NODE_CLASS_MAPPINGS = {
"StableCascade_EmptyLatentImage": StableCascade_EmptyLatentImage,
"StableCascade_StageB_Conditioning": StableCascade_StageB_Conditioning,
"StableCascade_StageC_VAEEncode": StableCascade_StageC_VAEEncode,
+ "StableCascade_SuperResolutionControlnet": StableCascade_SuperResolutionControlnet,
}
diff --git a/comfy_extras/nodes_differential_diffusion.py b/comfy_extras/nodes_differential_diffusion.py
new file mode 100644
index 000000000..98dbbf102
--- /dev/null
+++ b/comfy_extras/nodes_differential_diffusion.py
@@ -0,0 +1,42 @@
+# code adapted from https://github.com/exx8/differential-diffusion
+
+import torch
+
+class DifferentialDiffusion():
+ @classmethod
+ def INPUT_TYPES(s):
+ return {"required": {"model": ("MODEL", ),
+ }}
+ RETURN_TYPES = ("MODEL",)
+ FUNCTION = "apply"
+ CATEGORY = "_for_testing"
+ INIT = False
+
+ def apply(self, model):
+ model = model.clone()
+ model.set_model_denoise_mask_function(self.forward)
+ return (model,)
+
+ def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict):
+ model = extra_options["model"]
+ step_sigmas = extra_options["sigmas"]
+ sigma_to = model.inner_model.model_sampling.sigma_min
+ if step_sigmas[-1] > sigma_to:
+ sigma_to = step_sigmas[-1]
+ sigma_from = step_sigmas[0]
+
+ ts_from = model.inner_model.model_sampling.timestep(sigma_from)
+ ts_to = model.inner_model.model_sampling.timestep(sigma_to)
+ current_ts = model.inner_model.model_sampling.timestep(sigma[0])
+
+ threshold = (current_ts - ts_to) / (ts_from - ts_to)
+
+ return (denoise_mask >= threshold).to(denoise_mask.dtype)
+
+
+NODE_CLASS_MAPPINGS = {
+ "DifferentialDiffusion": DifferentialDiffusion,
+}
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "DifferentialDiffusion": "Differential Diffusion",
+}
diff --git a/comfy_extras/nodes_morphology.py b/comfy_extras/nodes_morphology.py
new file mode 100644
index 000000000..071521d87
--- /dev/null
+++ b/comfy_extras/nodes_morphology.py
@@ -0,0 +1,49 @@
+import torch
+import comfy.model_management
+
+from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
+
+
+class Morphology:
+ @classmethod
+ def INPUT_TYPES(s):
+ return {"required": {"image": ("IMAGE",),
+ "operation": (["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
+ "kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
+ }}
+
+ RETURN_TYPES = ("IMAGE",)
+ FUNCTION = "process"
+
+ CATEGORY = "image/postprocessing"
+
+ def process(self, image, operation, kernel_size):
+ device = comfy.model_management.get_torch_device()
+ kernel = torch.ones(kernel_size, kernel_size, device=device)
+ image_k = image.to(device).movedim(-1, 1)
+ if operation == "erode":
+ output = erosion(image_k, kernel)
+ elif operation == "dilate":
+ output = dilation(image_k, kernel)
+ elif operation == "open":
+ output = opening(image_k, kernel)
+ elif operation == "close":
+ output = closing(image_k, kernel)
+ elif operation == "gradient":
+ output = gradient(image_k, kernel)
+ elif operation == "top_hat":
+ output = top_hat(image_k, kernel)
+ elif operation == "bottom_hat":
+ output = bottom_hat(image_k, kernel)
+ else:
+ raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
+ img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
+ return (img_out,)
+
+NODE_CLASS_MAPPINGS = {
+ "Morphology": Morphology,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+ "Morphology": "ImageMorphology",
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 819bf5661..3020cd3d0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,4 +28,6 @@ protobuf
psutil
ConfigArgParse
aio-pika
-pyjwt[crypto]
\ No newline at end of file
+pyjwt[crypto]
+kornia>=0.7.1
+mpmath>=1.0,!=1.4.0a0
\ No newline at end of file