Merge with upstream

This commit is contained in:
doctorpangloss 2024-03-08 15:17:20 -08:00
commit c0d9bc0129
20 changed files with 326 additions and 315 deletions

View File

@ -104,7 +104,7 @@ if self_update and not files_equal(update_py_path, repo_update_py_path) and file
if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
import subprocess
try:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', repo_req_path])
subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_req_path])
shutil.copy(repo_req_path, req_path)
except:
pass

View File

@ -24,7 +24,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "6"
default: "8"
# push:
# branches:
# - master

View File

@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "1"
default: "2"
# push:
# branches:
# - master
@ -49,7 +49,7 @@ jobs:
echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
./python.exe get-pip.py
python -m pip wheel torch torchvision --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
python -m pip wheel torch torchvision mpmath==1.3.0 --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
ls ../temp_wheel_dir
./python.exe -s -m pip install --pre ../temp_wheel_dir/*
sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth

View File

@ -19,7 +19,7 @@ on:
description: 'python patch version'
required: true
type: string
default: "6"
default: "8"
# push:
# branches:
# - master

View File

@ -1,6 +1,7 @@
import os
import importlib.util
from ..cli_args import args
import subprocess
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
def get_gpu_names():
@ -34,7 +35,15 @@ def get_gpu_names():
return gpu_names
return enum_display_devices()
else:
return set()
gpu_names = set()
try:
out = subprocess.check_output(['nvidia-smi', '-L'])
for l in out.split(b'\n'):
if len(l) > 0:
gpu_names.add(l.decode('utf-8').split(' (UUID')[0])
except IOError as error:
pass
return gpu_names
blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
"GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",

View File

@ -10,6 +10,7 @@ from . import ops
from .cldm import cldm
from .t2i_adapter import adapter
from .ldm.cascade import controlnet
def broadcast_image_to(tensor, target_batch_size, batched_number):
@ -38,6 +39,8 @@ class ControlBase:
self.timestep_percent_range = (0.0, 1.0)
self.global_average_pooling = False
self.timestep_range = None
self.compression_ratio = 8
self.upscale_algorithm = 'nearest-exact'
if device is None:
device = model_management.get_torch_device()
@ -78,6 +81,8 @@ class ControlBase:
c.strength = self.strength
c.timestep_percent_range = self.timestep_percent_range
c.global_average_pooling = self.global_average_pooling
c.compression_ratio = self.compression_ratio
c.upscale_algorithm = self.upscale_algorithm
def inference_memory_requirements(self, dtype):
if self.previous_controlnet is not None:
@ -159,11 +164,11 @@ class ControlNet(ControlBase):
dtype = self.manual_cast_dtype
output_dtype = x_noisy.dtype
if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
if self.cond_hint is not None:
del self.cond_hint
self.cond_hint = None
self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(dtype).to(self.device)
self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio, self.upscale_algorithm, "center").to(dtype).to(self.device)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
@ -288,13 +293,13 @@ class ControlLora(ControlNet):
for k in sd:
weight = sd[k]
try:
utils.set_attr(self.control_model, k, weight)
utils.set_attr_param(self.control_model, k, weight)
except:
pass
for k in self.control_weights:
if k not in {"lora_controlnet"}:
utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
utils.set_attr_param(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
def copy(self):
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
@ -433,11 +438,13 @@ def load_controlnet(ckpt_path, model=None):
return control
class T2IAdapter(ControlBase):
def __init__(self, t2i_model, channels_in, device=None):
def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
super().__init__(device)
self.t2i_model = t2i_model
self.channels_in = channels_in
self.control_input = None
self.compression_ratio = compression_ratio
self.upscale_algorithm = upscale_algorithm
def scale_image_to(self, width, height):
unshuffle_amount = self.t2i_model.unshuffle_amount
@ -457,13 +464,13 @@ class T2IAdapter(ControlBase):
else:
return None
if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
if self.cond_hint is not None:
del self.cond_hint
self.control_input = None
self.cond_hint = None
width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8)
self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
width, height = self.scale_image_to(x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio)
self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, self.upscale_algorithm, "center").float().to(self.device)
if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
if x_noisy.shape[0] != self.cond_hint.shape[0]:
@ -482,11 +489,14 @@ class T2IAdapter(ControlBase):
return self.control_merge(control_input, mid, control_prev, x_noisy.dtype)
def copy(self):
c = T2IAdapter(self.t2i_model, self.channels_in)
c = T2IAdapter(self.t2i_model, self.channels_in, self.compression_ratio, self.upscale_algorithm)
self.copy_to(c)
return c
def load_t2i_adapter(t2i_data):
compression_ratio = 8
upscale_algorithm = 'nearest-exact'
if 'adapter' in t2i_data:
t2i_data = t2i_data['adapter']
if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
@ -514,8 +524,17 @@ def load_t2i_adapter(t2i_data):
if cin == 256 or cin == 768:
xl = True
model_ad = adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
elif "backbone.0.0.weight" in keys:
model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.0.weight'].shape[1], proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
compression_ratio = 32
upscale_algorithm = 'bilinear'
elif "backbone.10.blocks.0.weight" in keys:
model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.weight'].shape[1], bottleneck_mode="large", proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
compression_ratio = 1
upscale_algorithm = 'nearest-exact'
else:
return None
missing, unexpected = model_ad.load_state_dict(t2i_data)
if len(missing) > 0:
print("t2i missing", missing)
@ -523,4 +542,4 @@ def load_t2i_adapter(t2i_data):
if len(unexpected) > 0:
print("t2i unexpected", unexpected)
return T2IAdapter(model_ad, model_ad.input_channels)
return T2IAdapter(model_ad, model_ad.input_channels, compression_ratio, upscale_algorithm)

View File

@ -0,0 +1,93 @@
"""
This file is part of ComfyUI.
Copyright (C) 2024 Stability AI
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
import torch
import torchvision
from torch import nn
from .common import LayerNorm2d_op
class CNetResBlock(nn.Module):
def __init__(self, c, dtype=None, device=None, operations=None):
super().__init__()
self.blocks = nn.Sequential(
LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
nn.GELU(),
operations.Conv2d(c, c, kernel_size=3, padding=1),
LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
nn.GELU(),
operations.Conv2d(c, c, kernel_size=3, padding=1),
)
def forward(self, x):
return x + self.blocks(x)
class ControlNet(nn.Module):
def __init__(self, c_in=3, c_proj=2048, proj_blocks=None, bottleneck_mode=None, dtype=None, device=None, operations=nn):
super().__init__()
if bottleneck_mode is None:
bottleneck_mode = 'effnet'
self.proj_blocks = proj_blocks
if bottleneck_mode == 'effnet':
embd_channels = 1280
self.backbone = torchvision.models.efficientnet_v2_s().features.eval()
if c_in != 3:
in_weights = self.backbone[0][0].weight.data
self.backbone[0][0] = operations.Conv2d(c_in, 24, kernel_size=3, stride=2, bias=False, dtype=dtype, device=device)
if c_in > 3:
# nn.init.constant_(self.backbone[0][0].weight, 0)
self.backbone[0][0].weight.data[:, :3] = in_weights[:, :3].clone()
else:
self.backbone[0][0].weight.data = in_weights[:, :c_in].clone()
elif bottleneck_mode == 'simple':
embd_channels = c_in
self.backbone = nn.Sequential(
operations.Conv2d(embd_channels, embd_channels * 4, kernel_size=3, padding=1, dtype=dtype, device=device),
nn.LeakyReLU(0.2, inplace=True),
operations.Conv2d(embd_channels * 4, embd_channels, kernel_size=3, padding=1, dtype=dtype, device=device),
)
elif bottleneck_mode == 'large':
self.backbone = nn.Sequential(
operations.Conv2d(c_in, 4096 * 4, kernel_size=1, dtype=dtype, device=device),
nn.LeakyReLU(0.2, inplace=True),
operations.Conv2d(4096 * 4, 1024, kernel_size=1, dtype=dtype, device=device),
*[CNetResBlock(1024, dtype=dtype, device=device, operations=operations) for _ in range(8)],
operations.Conv2d(1024, 1280, kernel_size=1, dtype=dtype, device=device),
)
embd_channels = 1280
else:
raise ValueError(f'Unknown bottleneck mode: {bottleneck_mode}')
self.projections = nn.ModuleList()
for _ in range(len(proj_blocks)):
self.projections.append(nn.Sequential(
operations.Conv2d(embd_channels, embd_channels, kernel_size=1, bias=False, dtype=dtype, device=device),
nn.LeakyReLU(0.2, inplace=True),
operations.Conv2d(embd_channels, c_proj, kernel_size=1, bias=False, dtype=dtype, device=device),
))
# nn.init.constant_(self.projections[-1][-1].weight, 0) # zero output projection
self.xl = False
self.input_channels = c_in
self.unshuffle_amount = 8
def forward(self, x):
x = self.backbone(x)
proj_outputs = [None for _ in range(max(self.proj_blocks) + 1)]
for i, idx in enumerate(self.proj_blocks):
proj_outputs[idx] = self.projections[i](x)
return proj_outputs

View File

@ -194,10 +194,10 @@ class StageC(nn.Module):
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
ResBlock)):
if cnet is not None:
next_cnet = cnet()
next_cnet = cnet.pop()
if next_cnet is not None:
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
align_corners=True)
align_corners=True).to(x.dtype)
x = block(x)
elif isinstance(block, AttnBlock) or (
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
@ -228,10 +228,10 @@ class StageC(nn.Module):
x = torch.nn.functional.interpolate(x, skip.shape[-2:], mode='bilinear',
align_corners=True)
if cnet is not None:
next_cnet = cnet()
next_cnet = cnet.pop()
if next_cnet is not None:
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
align_corners=True)
align_corners=True).to(x.dtype)
x = block(x, skip)
elif isinstance(block, AttnBlock) or (
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
@ -248,7 +248,7 @@ class StageC(nn.Module):
x = upscaler(x)
return x
def forward(self, x, r, clip_text, clip_text_pooled, clip_img, cnet=None, **kwargs):
def forward(self, x, r, clip_text, clip_text_pooled, clip_img, control=None, **kwargs):
# Process the conditioning embeddings
r_embed = self.gen_r_embedding(r).to(dtype=x.dtype)
for c in self.t_conds:
@ -256,10 +256,13 @@ class StageC(nn.Module):
r_embed = torch.cat([r_embed, self.gen_r_embedding(t_cond).to(dtype=x.dtype)], dim=1)
clip = self.gen_c_embeddings(clip_text, clip_text_pooled, clip_img)
if control is not None:
cnet = control.get("input")
else:
cnet = None
# Model Blocks
x = self.embedding(x)
if cnet is not None:
cnet = ControlNetDeliverer(cnet)
level_outputs = self._down_encode(x, r_embed, clip, cnet)
x = self._up_decode(level_outputs, r_embed, clip, cnet)
return self.clf(x)

View File

@ -166,6 +166,10 @@ class BaseModel(torch.nn.Module):
if cross_attn_cnet is not None:
out['crossattn_controlnet'] = conds.CONDCrossAttn(cross_attn_cnet)
c_concat = kwargs.get("noise_concat", None)
if c_concat is not None:
out['c_concat'] = comfy.conds.CONDNoiseShape(data)
return out
def load_model_weights(self, sd, unet_prefix=""):

View File

@ -763,7 +763,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
#when the model doesn't actually fit on the card
#TODO: actually test if GP106 and others have the same type of behavior
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"]
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
for x in nvidia_10_series:
if x in props.name.lower():
fp16_works = True

View File

@ -67,6 +67,9 @@ class ModelPatcher:
def set_model_unet_function_wrapper(self, unet_wrapper_function):
self.model_options["model_function_wrapper"] = unet_wrapper_function
def set_model_denoise_mask_function(self, denoise_mask_function):
self.model_options["denoise_mask_function"] = denoise_mask_function
def set_model_patch(self, patch, name):
to = self.model_options["transformer_options"]
if "patches" not in to:
@ -176,10 +179,9 @@ class ModelPatcher:
def patch_model(self, device_to=None, patch_weights=True):
for k in self.object_patches:
old = getattr(self.model, k)
old = utils.set_attr(self.model, k, self.object_patches[k])
if k not in self.object_patches_backup:
self.object_patches_backup[k] = old
setattr(self.model, k, self.object_patches[k])
if patch_weights:
model_sd = self.model_state_dict()
@ -203,7 +205,7 @@ class ModelPatcher:
if inplace_update:
utils.copy_to_param(self.model, key, out_weight)
else:
utils.set_attr(self.model, key, out_weight)
utils.set_attr_param(self.model, key, out_weight)
del temp_weight
if device_to is not None:
@ -342,7 +344,7 @@ class ModelPatcher:
utils.copy_to_param(self.model, k, self.backup[k])
else:
for k in keys:
utils.set_attr(self.model, k, self.backup[k])
utils.set_attr_param(self.model, k, self.backup[k])
self.backup = {}
@ -352,6 +354,6 @@ class ModelPatcher:
keys = list(self.object_patches_backup.keys())
for k in keys:
setattr(self.model, k, self.object_patches_backup[k])
utils.set_attr(self.model, k, self.object_patches_backup[k])
self.object_patches_backup = {}

View File

@ -11,6 +11,14 @@ class EPS:
sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
return model_input - model_output * sigma
def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
if max_denoise:
noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
else:
noise = noise * sigma
noise += latent_image
return noise
class V_PREDICTION(EPS):
def calculate_denoised(self, sigma, model_output, model_input):

View File

@ -275,15 +275,16 @@ class CFGNoisePredictor(torch.nn.Module):
return self.apply_model(*args, **kwargs)
class KSamplerX0Inpaint(torch.nn.Module):
def __init__(self, model):
def __init__(self, model, sigmas):
super().__init__()
self.inner_model = model
self.sigmas = sigmas
def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None):
if denoise_mask is not None:
if "denoise_mask_function" in model_options:
denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask)
denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask, extra_options={"model": self.inner_model, "sigmas": self.sigmas})
latent_mask = 1. - denoise_mask
x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
x = x * denoise_mask + self.inner_model.inner_model.model_sampling.noise_scaling(sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1)), self.noise, self.latent_image) * latent_mask
out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, model_options=model_options, seed=seed)
if denoise_mask is not None:
out = out * denoise_mask + self.latent_image * latent_mask
@ -531,7 +532,7 @@ class KSAMPLER(Sampler):
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
extra_args["denoise_mask"] = denoise_mask
model_k = KSamplerX0Inpaint(model_wrap)
model_k = KSamplerX0Inpaint(model_wrap, sigmas)
model_k.latent_image = latent_image
if self.inpaint_options.get("random", False): #TODO: Should this be the default?
generator = torch.manual_seed(extra_args.get("seed", 41) + 1)
@ -539,19 +540,13 @@ class KSAMPLER(Sampler):
else:
model_k.noise = noise
if self.max_denoise(model_wrap, sigmas):
noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0)
else:
noise = noise * sigmas[0]
noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas[0], noise, latent_image, self.max_denoise(model_wrap, sigmas))
k_callback = None
total_steps = len(sigmas) - 1
if callback is not None:
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
if latent_image is not None:
noise += latent_image
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
return samples

View File

@ -296,8 +296,11 @@ def set_attr(obj, attr, value):
for name in attrs[:-1]:
obj = getattr(obj, name)
prev = getattr(obj, attrs[-1])
setattr(obj, attrs[-1], torch.nn.Parameter(value, requires_grad=False))
del prev
setattr(obj, attrs[-1], value)
return prev
def set_attr_param(obj, attr, value):
return set_attr(obj, attr, torch.nn.Parameter(value, requires_grad=False))
def copy_to_param(obj, attr, value):
# inplace update tensor instead of replacing it

View File

@ -5,275 +5,7 @@ import torch
import torch.nn.functional as F
import comfy.model_management
def get_canny_nms_kernel(device=None, dtype=None):
"""Utility function that returns 3x3 kernels for the Canny Non-maximal suppression."""
return torch.tensor(
[
[[[0.0, 0.0, 0.0], [0.0, 1.0, -1.0], [0.0, 0.0, 0.0]]],
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, -1.0]]],
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, -1.0, 0.0]]],
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0]]],
[[[0.0, 0.0, 0.0], [-1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
[[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
[[[0.0, -1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
[[[0.0, 0.0, -1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
],
device=device,
dtype=dtype,
)
def get_hysteresis_kernel(device=None, dtype=None):
"""Utility function that returns the 3x3 kernels for the Canny hysteresis."""
return torch.tensor(
[
[[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]],
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]],
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]],
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]],
[[[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
[[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
[[[0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
[[[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
],
device=device,
dtype=dtype,
)
def gaussian_blur_2d(img, kernel_size, sigma):
ksize_half = (kernel_size - 1) * 0.5
x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
pdf = torch.exp(-0.5 * (x / sigma).pow(2))
x_kernel = pdf / pdf.sum()
x_kernel = x_kernel.to(device=img.device, dtype=img.dtype)
kernel2d = torch.mm(x_kernel[:, None], x_kernel[None, :])
kernel2d = kernel2d.expand(img.shape[-3], 1, kernel2d.shape[0], kernel2d.shape[1])
padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2]
img = torch.nn.functional.pad(img, padding, mode="reflect")
img = torch.nn.functional.conv2d(img, kernel2d, groups=img.shape[-3])
return img
def get_sobel_kernel2d(device=None, dtype=None):
kernel_x = torch.tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=device, dtype=dtype)
kernel_y = kernel_x.transpose(0, 1)
return torch.stack([kernel_x, kernel_y])
def spatial_gradient(input, normalized: bool = True):
r"""Compute the first order image derivative in both x and y using a Sobel operator.
.. image:: _static/img/spatial_gradient.png
Args:
input: input image tensor with shape :math:`(B, C, H, W)`.
mode: derivatives modality, can be: `sobel` or `diff`.
order: the order of the derivatives.
normalized: whether the output is normalized.
Return:
the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`.
.. note::
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
filtering_edges.html>`__.
Examples:
>>> input = torch.rand(1, 3, 4, 4)
>>> output = spatial_gradient(input) # 1x3x2x4x4
>>> output.shape
torch.Size([1, 3, 2, 4, 4])
"""
# KORNIA_CHECK_IS_TENSOR(input)
# KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
# allocate kernel
kernel = get_sobel_kernel2d(device=input.device, dtype=input.dtype)
if normalized:
kernel = normalize_kernel2d(kernel)
# prepare kernel
b, c, h, w = input.shape
tmp_kernel = kernel[:, None, ...]
# Pad with "replicate for spatial dims, but with zeros for channel
spatial_pad = [kernel.size(1) // 2, kernel.size(1) // 2, kernel.size(2) // 2, kernel.size(2) // 2]
out_channels: int = 2
padded_inp = torch.nn.functional.pad(input.reshape(b * c, 1, h, w), spatial_pad, 'replicate')
out = F.conv2d(padded_inp, tmp_kernel, groups=1, padding=0, stride=1)
return out.reshape(b, c, out_channels, h, w)
def rgb_to_grayscale(image, rgb_weights = None):
r"""Convert a RGB image to grayscale version of image.
.. image:: _static/img/rgb_to_grayscale.png
The image data is assumed to be in the range of (0, 1).
Args:
image: RGB image to be converted to grayscale with shape :math:`(*,3,H,W)`.
rgb_weights: Weights that will be applied on each channel (RGB).
The sum of the weights should add up to one.
Returns:
grayscale version of the image with shape :math:`(*,1,H,W)`.
.. note::
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
color_conversions.html>`__.
Example:
>>> input = torch.rand(2, 3, 4, 5)
>>> gray = rgb_to_grayscale(input) # 2x1x4x5
"""
if len(image.shape) < 3 or image.shape[-3] != 3:
raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}")
if rgb_weights is None:
# 8 bit images
if image.dtype == torch.uint8:
rgb_weights = torch.tensor([76, 150, 29], device=image.device, dtype=torch.uint8)
# floating point images
elif image.dtype in (torch.float16, torch.float32, torch.float64):
rgb_weights = torch.tensor([0.299, 0.587, 0.114], device=image.device, dtype=image.dtype)
else:
raise TypeError(f"Unknown data type: {image.dtype}")
else:
# is tensor that we make sure is in the same device/dtype
rgb_weights = rgb_weights.to(image)
# unpack the color image channels with RGB order
r: Tensor = image[..., 0:1, :, :]
g: Tensor = image[..., 1:2, :, :]
b: Tensor = image[..., 2:3, :, :]
w_r, w_g, w_b = rgb_weights.unbind()
return w_r * r + w_g * g + w_b * b
def canny(
input,
low_threshold = 0.1,
high_threshold = 0.2,
kernel_size = 5,
sigma = 1,
hysteresis = True,
eps = 1e-6,
):
r"""Find edges of the input image and filters them using the Canny algorithm.
.. image:: _static/img/canny.png
Args:
input: input image tensor with shape :math:`(B,C,H,W)`.
low_threshold: lower threshold for the hysteresis procedure.
high_threshold: upper threshold for the hysteresis procedure.
kernel_size: the size of the kernel for the gaussian blur.
sigma: the standard deviation of the kernel for the gaussian blur.
hysteresis: if True, applies the hysteresis edge tracking.
Otherwise, the edges are divided between weak (0.5) and strong (1) edges.
eps: regularization number to avoid NaN during backprop.
Returns:
- the canny edge magnitudes map, shape of :math:`(B,1,H,W)`.
- the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`.
.. note::
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
canny.html>`__.
Example:
>>> input = torch.rand(5, 3, 4, 4)
>>> magnitude, edges = canny(input) # 5x3x4x4
>>> magnitude.shape
torch.Size([5, 1, 4, 4])
>>> edges.shape
torch.Size([5, 1, 4, 4])
"""
# KORNIA_CHECK_IS_TENSOR(input)
# KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
# KORNIA_CHECK(
# low_threshold <= high_threshold,
# "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: "
# f"{low_threshold}>{high_threshold}",
# )
# KORNIA_CHECK(0 < low_threshold < 1, f'Invalid low threshold. Should be in range (0, 1). Got: {low_threshold}')
# KORNIA_CHECK(0 < high_threshold < 1, f'Invalid high threshold. Should be in range (0, 1). Got: {high_threshold}')
device = input.device
dtype = input.dtype
# To Grayscale
if input.shape[1] == 3:
input = rgb_to_grayscale(input)
# Gaussian filter
blurred: Tensor = gaussian_blur_2d(input, kernel_size, sigma)
# Compute the gradients
gradients: Tensor = spatial_gradient(blurred, normalized=False)
# Unpack the edges
gx: Tensor = gradients[:, :, 0]
gy: Tensor = gradients[:, :, 1]
# Compute gradient magnitude and angle
magnitude: Tensor = torch.sqrt(gx * gx + gy * gy + eps)
angle: Tensor = torch.atan2(gy, gx)
# Radians to Degrees
angle = 180.0 * angle / math.pi
# Round angle to the nearest 45 degree
angle = torch.round(angle / 45) * 45
# Non-maximal suppression
nms_kernels: Tensor = get_canny_nms_kernel(device, dtype)
nms_magnitude: Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2)
# Get the indices for both directions
positive_idx: Tensor = (angle / 45) % 8
positive_idx = positive_idx.long()
negative_idx: Tensor = ((angle / 45) + 4) % 8
negative_idx = negative_idx.long()
# Apply the non-maximum suppression to the different directions
channel_select_filtered_positive: Tensor = torch.gather(nms_magnitude, 1, positive_idx)
channel_select_filtered_negative: Tensor = torch.gather(nms_magnitude, 1, negative_idx)
channel_select_filtered: Tensor = torch.stack(
[channel_select_filtered_positive, channel_select_filtered_negative], 1
)
is_max: Tensor = channel_select_filtered.min(dim=1)[0] > 0.0
magnitude = magnitude * is_max
# Threshold
edges: Tensor = F.threshold(magnitude, low_threshold, 0.0)
low: Tensor = magnitude > low_threshold
high: Tensor = magnitude > high_threshold
edges = low * 0.5 + high * 0.5
edges = edges.to(dtype)
# Hysteresis
if hysteresis:
edges_old: Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype)
hysteresis_kernels: Tensor = get_hysteresis_kernel(device, dtype)
while ((edges_old - edges).abs() != 0).any():
weak: Tensor = (edges == 0.5).float()
strong: Tensor = (edges == 1).float()
hysteresis_magnitude: Tensor = F.conv2d(
edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2
)
hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype)
hysteresis_magnitude = hysteresis_magnitude * weak + strong
edges_old = edges.clone()
edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5
edges = hysteresis_magnitude
return magnitude, edges
from kornia.filters import canny
class Canny:

View File

@ -342,6 +342,24 @@ class GrowMask:
out.append(output)
return (torch.stack(out, dim=0),)
class ThresholdMask:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"mask": ("MASK",),
"value": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
}
}
CATEGORY = "mask"
RETURN_TYPES = ("MASK",)
FUNCTION = "image_to_mask"
def image_to_mask(self, mask, value):
mask = (mask > value).float()
return (mask,)
NODE_CLASS_MAPPINGS = {
@ -356,6 +374,7 @@ NODE_CLASS_MAPPINGS = {
"MaskComposite": MaskComposite,
"FeatherMask": FeatherMask,
"GrowMask": GrowMask,
"ThresholdMask": ThresholdMask,
}
NODE_DISPLAY_NAME_MAPPINGS = {

View File

@ -37,7 +37,7 @@ class StableCascade_EmptyLatentImage:
RETURN_NAMES = ("stage_c", "stage_b")
FUNCTION = "generate"
CATEGORY = "_for_testing/stable_cascade"
CATEGORY = "latent/stable_cascade"
def generate(self, width, height, compression, batch_size=1):
c_latent = torch.zeros([batch_size, 16, height // compression, width // compression])
@ -63,7 +63,7 @@ class StableCascade_StageC_VAEEncode:
RETURN_NAMES = ("stage_c", "stage_b")
FUNCTION = "generate"
CATEGORY = "_for_testing/stable_cascade"
CATEGORY = "latent/stable_cascade"
def generate(self, image, vae, compression):
width = image.shape[-2]
@ -91,7 +91,7 @@ class StableCascade_StageB_Conditioning:
FUNCTION = "set_prior"
CATEGORY = "_for_testing/stable_cascade"
CATEGORY = "conditioning/stable_cascade"
def set_prior(self, conditioning, stage_c):
c = []
@ -102,8 +102,39 @@ class StableCascade_StageB_Conditioning:
c.append(n)
return (c, )
class StableCascade_SuperResolutionControlnet:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(s):
return {"required": {
"image": ("IMAGE",),
"vae": ("VAE", ),
}}
RETURN_TYPES = ("IMAGE", "LATENT", "LATENT")
RETURN_NAMES = ("controlnet_input", "stage_c", "stage_b")
FUNCTION = "generate"
CATEGORY = "_for_testing/stable_cascade"
def generate(self, image, vae):
width = image.shape[-2]
height = image.shape[-3]
batch_size = image.shape[0]
controlnet_input = vae.encode(image[:,:,:,:3]).movedim(1, -1)
c_latent = torch.zeros([batch_size, 16, height // 16, width // 16])
b_latent = torch.zeros([batch_size, 4, height // 2, width // 2])
return (controlnet_input, {
"samples": c_latent,
}, {
"samples": b_latent,
})
NODE_CLASS_MAPPINGS = {
"StableCascade_EmptyLatentImage": StableCascade_EmptyLatentImage,
"StableCascade_StageB_Conditioning": StableCascade_StageB_Conditioning,
"StableCascade_StageC_VAEEncode": StableCascade_StageC_VAEEncode,
"StableCascade_SuperResolutionControlnet": StableCascade_SuperResolutionControlnet,
}

View File

@ -0,0 +1,42 @@
# code adapted from https://github.com/exx8/differential-diffusion
import torch
class DifferentialDiffusion():
@classmethod
def INPUT_TYPES(s):
return {"required": {"model": ("MODEL", ),
}}
RETURN_TYPES = ("MODEL",)
FUNCTION = "apply"
CATEGORY = "_for_testing"
INIT = False
def apply(self, model):
model = model.clone()
model.set_model_denoise_mask_function(self.forward)
return (model,)
def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict):
model = extra_options["model"]
step_sigmas = extra_options["sigmas"]
sigma_to = model.inner_model.model_sampling.sigma_min
if step_sigmas[-1] > sigma_to:
sigma_to = step_sigmas[-1]
sigma_from = step_sigmas[0]
ts_from = model.inner_model.model_sampling.timestep(sigma_from)
ts_to = model.inner_model.model_sampling.timestep(sigma_to)
current_ts = model.inner_model.model_sampling.timestep(sigma[0])
threshold = (current_ts - ts_to) / (ts_from - ts_to)
return (denoise_mask >= threshold).to(denoise_mask.dtype)
NODE_CLASS_MAPPINGS = {
"DifferentialDiffusion": DifferentialDiffusion,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"DifferentialDiffusion": "Differential Diffusion",
}

View File

@ -0,0 +1,49 @@
import torch
import comfy.model_management
from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
class Morphology:
@classmethod
def INPUT_TYPES(s):
return {"required": {"image": ("IMAGE",),
"operation": (["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
"kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
}}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "process"
CATEGORY = "image/postprocessing"
def process(self, image, operation, kernel_size):
device = comfy.model_management.get_torch_device()
kernel = torch.ones(kernel_size, kernel_size, device=device)
image_k = image.to(device).movedim(-1, 1)
if operation == "erode":
output = erosion(image_k, kernel)
elif operation == "dilate":
output = dilation(image_k, kernel)
elif operation == "open":
output = opening(image_k, kernel)
elif operation == "close":
output = closing(image_k, kernel)
elif operation == "gradient":
output = gradient(image_k, kernel)
elif operation == "top_hat":
output = top_hat(image_k, kernel)
elif operation == "bottom_hat":
output = bottom_hat(image_k, kernel)
else:
raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
return (img_out,)
NODE_CLASS_MAPPINGS = {
"Morphology": Morphology,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"Morphology": "ImageMorphology",
}

View File

@ -28,4 +28,6 @@ protobuf
psutil
ConfigArgParse
aio-pika
pyjwt[crypto]
pyjwt[crypto]
kornia>=0.7.1
mpmath>=1.0,!=1.4.0a0