mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-11 14:50:49 +08:00
Merge with upstream
This commit is contained in:
commit
c0d9bc0129
@ -104,7 +104,7 @@ if self_update and not files_equal(update_py_path, repo_update_py_path) and file
|
|||||||
if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
|
if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
|
||||||
import subprocess
|
import subprocess
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', repo_req_path])
|
subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_req_path])
|
||||||
shutil.copy(repo_req_path, req_path)
|
shutil.copy(repo_req_path, req_path)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -24,7 +24,7 @@ on:
|
|||||||
description: 'python patch version'
|
description: 'python patch version'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
default: "6"
|
default: "8"
|
||||||
# push:
|
# push:
|
||||||
# branches:
|
# branches:
|
||||||
# - master
|
# - master
|
||||||
|
|||||||
@ -19,7 +19,7 @@ on:
|
|||||||
description: 'python patch version'
|
description: 'python patch version'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
default: "1"
|
default: "2"
|
||||||
# push:
|
# push:
|
||||||
# branches:
|
# branches:
|
||||||
# - master
|
# - master
|
||||||
@ -49,7 +49,7 @@ jobs:
|
|||||||
echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
|
echo 'import site' >> ./python3${{ inputs.python_minor }}._pth
|
||||||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
|
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
|
||||||
./python.exe get-pip.py
|
./python.exe get-pip.py
|
||||||
python -m pip wheel torch torchvision --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
|
python -m pip wheel torch torchvision mpmath==1.3.0 --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu${{ inputs.cu }} -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir
|
||||||
ls ../temp_wheel_dir
|
ls ../temp_wheel_dir
|
||||||
./python.exe -s -m pip install --pre ../temp_wheel_dir/*
|
./python.exe -s -m pip install --pre ../temp_wheel_dir/*
|
||||||
sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
|
sed -i '1i../ComfyUI' ./python3${{ inputs.python_minor }}._pth
|
||||||
|
|||||||
@ -19,7 +19,7 @@ on:
|
|||||||
description: 'python patch version'
|
description: 'python patch version'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
default: "6"
|
default: "8"
|
||||||
# push:
|
# push:
|
||||||
# branches:
|
# branches:
|
||||||
# - master
|
# - master
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import importlib.util
|
import importlib.util
|
||||||
from ..cli_args import args
|
from ..cli_args import args
|
||||||
|
import subprocess
|
||||||
|
|
||||||
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
|
#Can't use pytorch to get the GPU names because the cuda malloc has to be set before the first import.
|
||||||
def get_gpu_names():
|
def get_gpu_names():
|
||||||
@ -34,7 +35,15 @@ def get_gpu_names():
|
|||||||
return gpu_names
|
return gpu_names
|
||||||
return enum_display_devices()
|
return enum_display_devices()
|
||||||
else:
|
else:
|
||||||
return set()
|
gpu_names = set()
|
||||||
|
try:
|
||||||
|
out = subprocess.check_output(['nvidia-smi', '-L'])
|
||||||
|
for l in out.split(b'\n'):
|
||||||
|
if len(l) > 0:
|
||||||
|
gpu_names.add(l.decode('utf-8').split(' (UUID')[0])
|
||||||
|
except IOError as error:
|
||||||
|
pass
|
||||||
|
return gpu_names
|
||||||
|
|
||||||
blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
|
blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
|
||||||
"GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
|
"GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
|
||||||
|
|||||||
@ -10,6 +10,7 @@ from . import ops
|
|||||||
|
|
||||||
from .cldm import cldm
|
from .cldm import cldm
|
||||||
from .t2i_adapter import adapter
|
from .t2i_adapter import adapter
|
||||||
|
from .ldm.cascade import controlnet
|
||||||
|
|
||||||
|
|
||||||
def broadcast_image_to(tensor, target_batch_size, batched_number):
|
def broadcast_image_to(tensor, target_batch_size, batched_number):
|
||||||
@ -38,6 +39,8 @@ class ControlBase:
|
|||||||
self.timestep_percent_range = (0.0, 1.0)
|
self.timestep_percent_range = (0.0, 1.0)
|
||||||
self.global_average_pooling = False
|
self.global_average_pooling = False
|
||||||
self.timestep_range = None
|
self.timestep_range = None
|
||||||
|
self.compression_ratio = 8
|
||||||
|
self.upscale_algorithm = 'nearest-exact'
|
||||||
|
|
||||||
if device is None:
|
if device is None:
|
||||||
device = model_management.get_torch_device()
|
device = model_management.get_torch_device()
|
||||||
@ -78,6 +81,8 @@ class ControlBase:
|
|||||||
c.strength = self.strength
|
c.strength = self.strength
|
||||||
c.timestep_percent_range = self.timestep_percent_range
|
c.timestep_percent_range = self.timestep_percent_range
|
||||||
c.global_average_pooling = self.global_average_pooling
|
c.global_average_pooling = self.global_average_pooling
|
||||||
|
c.compression_ratio = self.compression_ratio
|
||||||
|
c.upscale_algorithm = self.upscale_algorithm
|
||||||
|
|
||||||
def inference_memory_requirements(self, dtype):
|
def inference_memory_requirements(self, dtype):
|
||||||
if self.previous_controlnet is not None:
|
if self.previous_controlnet is not None:
|
||||||
@ -159,11 +164,11 @@ class ControlNet(ControlBase):
|
|||||||
dtype = self.manual_cast_dtype
|
dtype = self.manual_cast_dtype
|
||||||
|
|
||||||
output_dtype = x_noisy.dtype
|
output_dtype = x_noisy.dtype
|
||||||
if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
|
if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
|
||||||
if self.cond_hint is not None:
|
if self.cond_hint is not None:
|
||||||
del self.cond_hint
|
del self.cond_hint
|
||||||
self.cond_hint = None
|
self.cond_hint = None
|
||||||
self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(dtype).to(self.device)
|
self.cond_hint = utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio, self.upscale_algorithm, "center").to(dtype).to(self.device)
|
||||||
if x_noisy.shape[0] != self.cond_hint.shape[0]:
|
if x_noisy.shape[0] != self.cond_hint.shape[0]:
|
||||||
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
|
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
|
||||||
|
|
||||||
@ -288,13 +293,13 @@ class ControlLora(ControlNet):
|
|||||||
for k in sd:
|
for k in sd:
|
||||||
weight = sd[k]
|
weight = sd[k]
|
||||||
try:
|
try:
|
||||||
utils.set_attr(self.control_model, k, weight)
|
utils.set_attr_param(self.control_model, k, weight)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
for k in self.control_weights:
|
for k in self.control_weights:
|
||||||
if k not in {"lora_controlnet"}:
|
if k not in {"lora_controlnet"}:
|
||||||
utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
|
utils.set_attr_param(self.control_model, k, self.control_weights[k].to(dtype).to(model_management.get_torch_device()))
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
|
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
|
||||||
@ -433,11 +438,13 @@ def load_controlnet(ckpt_path, model=None):
|
|||||||
return control
|
return control
|
||||||
|
|
||||||
class T2IAdapter(ControlBase):
|
class T2IAdapter(ControlBase):
|
||||||
def __init__(self, t2i_model, channels_in, device=None):
|
def __init__(self, t2i_model, channels_in, compression_ratio, upscale_algorithm, device=None):
|
||||||
super().__init__(device)
|
super().__init__(device)
|
||||||
self.t2i_model = t2i_model
|
self.t2i_model = t2i_model
|
||||||
self.channels_in = channels_in
|
self.channels_in = channels_in
|
||||||
self.control_input = None
|
self.control_input = None
|
||||||
|
self.compression_ratio = compression_ratio
|
||||||
|
self.upscale_algorithm = upscale_algorithm
|
||||||
|
|
||||||
def scale_image_to(self, width, height):
|
def scale_image_to(self, width, height):
|
||||||
unshuffle_amount = self.t2i_model.unshuffle_amount
|
unshuffle_amount = self.t2i_model.unshuffle_amount
|
||||||
@ -457,13 +464,13 @@ class T2IAdapter(ControlBase):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:
|
if self.cond_hint is None or x_noisy.shape[2] * self.compression_ratio != self.cond_hint.shape[2] or x_noisy.shape[3] * self.compression_ratio != self.cond_hint.shape[3]:
|
||||||
if self.cond_hint is not None:
|
if self.cond_hint is not None:
|
||||||
del self.cond_hint
|
del self.cond_hint
|
||||||
self.control_input = None
|
self.control_input = None
|
||||||
self.cond_hint = None
|
self.cond_hint = None
|
||||||
width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8)
|
width, height = self.scale_image_to(x_noisy.shape[3] * self.compression_ratio, x_noisy.shape[2] * self.compression_ratio)
|
||||||
self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
|
self.cond_hint = utils.common_upscale(self.cond_hint_original, width, height, self.upscale_algorithm, "center").float().to(self.device)
|
||||||
if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
|
if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
|
||||||
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
|
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
|
||||||
if x_noisy.shape[0] != self.cond_hint.shape[0]:
|
if x_noisy.shape[0] != self.cond_hint.shape[0]:
|
||||||
@ -482,11 +489,14 @@ class T2IAdapter(ControlBase):
|
|||||||
return self.control_merge(control_input, mid, control_prev, x_noisy.dtype)
|
return self.control_merge(control_input, mid, control_prev, x_noisy.dtype)
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
c = T2IAdapter(self.t2i_model, self.channels_in)
|
c = T2IAdapter(self.t2i_model, self.channels_in, self.compression_ratio, self.upscale_algorithm)
|
||||||
self.copy_to(c)
|
self.copy_to(c)
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def load_t2i_adapter(t2i_data):
|
def load_t2i_adapter(t2i_data):
|
||||||
|
compression_ratio = 8
|
||||||
|
upscale_algorithm = 'nearest-exact'
|
||||||
|
|
||||||
if 'adapter' in t2i_data:
|
if 'adapter' in t2i_data:
|
||||||
t2i_data = t2i_data['adapter']
|
t2i_data = t2i_data['adapter']
|
||||||
if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
|
if 'adapter.body.0.resnets.0.block1.weight' in t2i_data: #diffusers format
|
||||||
@ -514,8 +524,17 @@ def load_t2i_adapter(t2i_data):
|
|||||||
if cin == 256 or cin == 768:
|
if cin == 256 or cin == 768:
|
||||||
xl = True
|
xl = True
|
||||||
model_ad = adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
|
model_ad = adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
|
||||||
|
elif "backbone.0.0.weight" in keys:
|
||||||
|
model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.0.weight'].shape[1], proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
|
||||||
|
compression_ratio = 32
|
||||||
|
upscale_algorithm = 'bilinear'
|
||||||
|
elif "backbone.10.blocks.0.weight" in keys:
|
||||||
|
model_ad = controlnet.ControlNet(c_in=t2i_data['backbone.0.weight'].shape[1], bottleneck_mode="large", proj_blocks=[0, 4, 8, 12, 51, 55, 59, 63])
|
||||||
|
compression_ratio = 1
|
||||||
|
upscale_algorithm = 'nearest-exact'
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
missing, unexpected = model_ad.load_state_dict(t2i_data)
|
missing, unexpected = model_ad.load_state_dict(t2i_data)
|
||||||
if len(missing) > 0:
|
if len(missing) > 0:
|
||||||
print("t2i missing", missing)
|
print("t2i missing", missing)
|
||||||
@ -523,4 +542,4 @@ def load_t2i_adapter(t2i_data):
|
|||||||
if len(unexpected) > 0:
|
if len(unexpected) > 0:
|
||||||
print("t2i unexpected", unexpected)
|
print("t2i unexpected", unexpected)
|
||||||
|
|
||||||
return T2IAdapter(model_ad, model_ad.input_channels)
|
return T2IAdapter(model_ad, model_ad.input_channels, compression_ratio, upscale_algorithm)
|
||||||
|
|||||||
93
comfy/ldm/cascade/controlnet.py
Normal file
93
comfy/ldm/cascade/controlnet.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
"""
|
||||||
|
This file is part of ComfyUI.
|
||||||
|
Copyright (C) 2024 Stability AI
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torchvision
|
||||||
|
from torch import nn
|
||||||
|
from .common import LayerNorm2d_op
|
||||||
|
|
||||||
|
|
||||||
|
class CNetResBlock(nn.Module):
|
||||||
|
def __init__(self, c, dtype=None, device=None, operations=None):
|
||||||
|
super().__init__()
|
||||||
|
self.blocks = nn.Sequential(
|
||||||
|
LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
|
||||||
|
nn.GELU(),
|
||||||
|
operations.Conv2d(c, c, kernel_size=3, padding=1),
|
||||||
|
LayerNorm2d_op(operations)(c, dtype=dtype, device=device),
|
||||||
|
nn.GELU(),
|
||||||
|
operations.Conv2d(c, c, kernel_size=3, padding=1),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x + self.blocks(x)
|
||||||
|
|
||||||
|
|
||||||
|
class ControlNet(nn.Module):
|
||||||
|
def __init__(self, c_in=3, c_proj=2048, proj_blocks=None, bottleneck_mode=None, dtype=None, device=None, operations=nn):
|
||||||
|
super().__init__()
|
||||||
|
if bottleneck_mode is None:
|
||||||
|
bottleneck_mode = 'effnet'
|
||||||
|
self.proj_blocks = proj_blocks
|
||||||
|
if bottleneck_mode == 'effnet':
|
||||||
|
embd_channels = 1280
|
||||||
|
self.backbone = torchvision.models.efficientnet_v2_s().features.eval()
|
||||||
|
if c_in != 3:
|
||||||
|
in_weights = self.backbone[0][0].weight.data
|
||||||
|
self.backbone[0][0] = operations.Conv2d(c_in, 24, kernel_size=3, stride=2, bias=False, dtype=dtype, device=device)
|
||||||
|
if c_in > 3:
|
||||||
|
# nn.init.constant_(self.backbone[0][0].weight, 0)
|
||||||
|
self.backbone[0][0].weight.data[:, :3] = in_weights[:, :3].clone()
|
||||||
|
else:
|
||||||
|
self.backbone[0][0].weight.data = in_weights[:, :c_in].clone()
|
||||||
|
elif bottleneck_mode == 'simple':
|
||||||
|
embd_channels = c_in
|
||||||
|
self.backbone = nn.Sequential(
|
||||||
|
operations.Conv2d(embd_channels, embd_channels * 4, kernel_size=3, padding=1, dtype=dtype, device=device),
|
||||||
|
nn.LeakyReLU(0.2, inplace=True),
|
||||||
|
operations.Conv2d(embd_channels * 4, embd_channels, kernel_size=3, padding=1, dtype=dtype, device=device),
|
||||||
|
)
|
||||||
|
elif bottleneck_mode == 'large':
|
||||||
|
self.backbone = nn.Sequential(
|
||||||
|
operations.Conv2d(c_in, 4096 * 4, kernel_size=1, dtype=dtype, device=device),
|
||||||
|
nn.LeakyReLU(0.2, inplace=True),
|
||||||
|
operations.Conv2d(4096 * 4, 1024, kernel_size=1, dtype=dtype, device=device),
|
||||||
|
*[CNetResBlock(1024, dtype=dtype, device=device, operations=operations) for _ in range(8)],
|
||||||
|
operations.Conv2d(1024, 1280, kernel_size=1, dtype=dtype, device=device),
|
||||||
|
)
|
||||||
|
embd_channels = 1280
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Unknown bottleneck mode: {bottleneck_mode}')
|
||||||
|
self.projections = nn.ModuleList()
|
||||||
|
for _ in range(len(proj_blocks)):
|
||||||
|
self.projections.append(nn.Sequential(
|
||||||
|
operations.Conv2d(embd_channels, embd_channels, kernel_size=1, bias=False, dtype=dtype, device=device),
|
||||||
|
nn.LeakyReLU(0.2, inplace=True),
|
||||||
|
operations.Conv2d(embd_channels, c_proj, kernel_size=1, bias=False, dtype=dtype, device=device),
|
||||||
|
))
|
||||||
|
# nn.init.constant_(self.projections[-1][-1].weight, 0) # zero output projection
|
||||||
|
self.xl = False
|
||||||
|
self.input_channels = c_in
|
||||||
|
self.unshuffle_amount = 8
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.backbone(x)
|
||||||
|
proj_outputs = [None for _ in range(max(self.proj_blocks) + 1)]
|
||||||
|
for i, idx in enumerate(self.proj_blocks):
|
||||||
|
proj_outputs[idx] = self.projections[i](x)
|
||||||
|
return proj_outputs
|
||||||
@ -194,10 +194,10 @@ class StageC(nn.Module):
|
|||||||
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
||||||
ResBlock)):
|
ResBlock)):
|
||||||
if cnet is not None:
|
if cnet is not None:
|
||||||
next_cnet = cnet()
|
next_cnet = cnet.pop()
|
||||||
if next_cnet is not None:
|
if next_cnet is not None:
|
||||||
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
|
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
|
||||||
align_corners=True)
|
align_corners=True).to(x.dtype)
|
||||||
x = block(x)
|
x = block(x)
|
||||||
elif isinstance(block, AttnBlock) or (
|
elif isinstance(block, AttnBlock) or (
|
||||||
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
||||||
@ -228,10 +228,10 @@ class StageC(nn.Module):
|
|||||||
x = torch.nn.functional.interpolate(x, skip.shape[-2:], mode='bilinear',
|
x = torch.nn.functional.interpolate(x, skip.shape[-2:], mode='bilinear',
|
||||||
align_corners=True)
|
align_corners=True)
|
||||||
if cnet is not None:
|
if cnet is not None:
|
||||||
next_cnet = cnet()
|
next_cnet = cnet.pop()
|
||||||
if next_cnet is not None:
|
if next_cnet is not None:
|
||||||
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
|
x = x + nn.functional.interpolate(next_cnet, size=x.shape[-2:], mode='bilinear',
|
||||||
align_corners=True)
|
align_corners=True).to(x.dtype)
|
||||||
x = block(x, skip)
|
x = block(x, skip)
|
||||||
elif isinstance(block, AttnBlock) or (
|
elif isinstance(block, AttnBlock) or (
|
||||||
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
hasattr(block, '_fsdp_wrapped_module') and isinstance(block._fsdp_wrapped_module,
|
||||||
@ -248,7 +248,7 @@ class StageC(nn.Module):
|
|||||||
x = upscaler(x)
|
x = upscaler(x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def forward(self, x, r, clip_text, clip_text_pooled, clip_img, cnet=None, **kwargs):
|
def forward(self, x, r, clip_text, clip_text_pooled, clip_img, control=None, **kwargs):
|
||||||
# Process the conditioning embeddings
|
# Process the conditioning embeddings
|
||||||
r_embed = self.gen_r_embedding(r).to(dtype=x.dtype)
|
r_embed = self.gen_r_embedding(r).to(dtype=x.dtype)
|
||||||
for c in self.t_conds:
|
for c in self.t_conds:
|
||||||
@ -256,10 +256,13 @@ class StageC(nn.Module):
|
|||||||
r_embed = torch.cat([r_embed, self.gen_r_embedding(t_cond).to(dtype=x.dtype)], dim=1)
|
r_embed = torch.cat([r_embed, self.gen_r_embedding(t_cond).to(dtype=x.dtype)], dim=1)
|
||||||
clip = self.gen_c_embeddings(clip_text, clip_text_pooled, clip_img)
|
clip = self.gen_c_embeddings(clip_text, clip_text_pooled, clip_img)
|
||||||
|
|
||||||
|
if control is not None:
|
||||||
|
cnet = control.get("input")
|
||||||
|
else:
|
||||||
|
cnet = None
|
||||||
|
|
||||||
# Model Blocks
|
# Model Blocks
|
||||||
x = self.embedding(x)
|
x = self.embedding(x)
|
||||||
if cnet is not None:
|
|
||||||
cnet = ControlNetDeliverer(cnet)
|
|
||||||
level_outputs = self._down_encode(x, r_embed, clip, cnet)
|
level_outputs = self._down_encode(x, r_embed, clip, cnet)
|
||||||
x = self._up_decode(level_outputs, r_embed, clip, cnet)
|
x = self._up_decode(level_outputs, r_embed, clip, cnet)
|
||||||
return self.clf(x)
|
return self.clf(x)
|
||||||
|
|||||||
@ -166,6 +166,10 @@ class BaseModel(torch.nn.Module):
|
|||||||
if cross_attn_cnet is not None:
|
if cross_attn_cnet is not None:
|
||||||
out['crossattn_controlnet'] = conds.CONDCrossAttn(cross_attn_cnet)
|
out['crossattn_controlnet'] = conds.CONDCrossAttn(cross_attn_cnet)
|
||||||
|
|
||||||
|
c_concat = kwargs.get("noise_concat", None)
|
||||||
|
if c_concat is not None:
|
||||||
|
out['c_concat'] = comfy.conds.CONDNoiseShape(data)
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def load_model_weights(self, sd, unet_prefix=""):
|
def load_model_weights(self, sd, unet_prefix=""):
|
||||||
|
|||||||
@ -763,7 +763,7 @@ def should_use_fp16(device=None, model_params=0, prioritize_performance=True, ma
|
|||||||
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
|
#FP16 is confirmed working on a 1080 (GP104) but it's a bit slower than FP32 so it should only be enabled
|
||||||
#when the model doesn't actually fit on the card
|
#when the model doesn't actually fit on the card
|
||||||
#TODO: actually test if GP106 and others have the same type of behavior
|
#TODO: actually test if GP106 and others have the same type of behavior
|
||||||
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050"]
|
nvidia_10_series = ["1080", "1070", "titan x", "p3000", "p3200", "p4000", "p4200", "p5000", "p5200", "p6000", "1060", "1050", "p40", "p100", "p6", "p4"]
|
||||||
for x in nvidia_10_series:
|
for x in nvidia_10_series:
|
||||||
if x in props.name.lower():
|
if x in props.name.lower():
|
||||||
fp16_works = True
|
fp16_works = True
|
||||||
|
|||||||
@ -67,6 +67,9 @@ class ModelPatcher:
|
|||||||
def set_model_unet_function_wrapper(self, unet_wrapper_function):
|
def set_model_unet_function_wrapper(self, unet_wrapper_function):
|
||||||
self.model_options["model_function_wrapper"] = unet_wrapper_function
|
self.model_options["model_function_wrapper"] = unet_wrapper_function
|
||||||
|
|
||||||
|
def set_model_denoise_mask_function(self, denoise_mask_function):
|
||||||
|
self.model_options["denoise_mask_function"] = denoise_mask_function
|
||||||
|
|
||||||
def set_model_patch(self, patch, name):
|
def set_model_patch(self, patch, name):
|
||||||
to = self.model_options["transformer_options"]
|
to = self.model_options["transformer_options"]
|
||||||
if "patches" not in to:
|
if "patches" not in to:
|
||||||
@ -176,10 +179,9 @@ class ModelPatcher:
|
|||||||
|
|
||||||
def patch_model(self, device_to=None, patch_weights=True):
|
def patch_model(self, device_to=None, patch_weights=True):
|
||||||
for k in self.object_patches:
|
for k in self.object_patches:
|
||||||
old = getattr(self.model, k)
|
old = utils.set_attr(self.model, k, self.object_patches[k])
|
||||||
if k not in self.object_patches_backup:
|
if k not in self.object_patches_backup:
|
||||||
self.object_patches_backup[k] = old
|
self.object_patches_backup[k] = old
|
||||||
setattr(self.model, k, self.object_patches[k])
|
|
||||||
|
|
||||||
if patch_weights:
|
if patch_weights:
|
||||||
model_sd = self.model_state_dict()
|
model_sd = self.model_state_dict()
|
||||||
@ -203,7 +205,7 @@ class ModelPatcher:
|
|||||||
if inplace_update:
|
if inplace_update:
|
||||||
utils.copy_to_param(self.model, key, out_weight)
|
utils.copy_to_param(self.model, key, out_weight)
|
||||||
else:
|
else:
|
||||||
utils.set_attr(self.model, key, out_weight)
|
utils.set_attr_param(self.model, key, out_weight)
|
||||||
del temp_weight
|
del temp_weight
|
||||||
|
|
||||||
if device_to is not None:
|
if device_to is not None:
|
||||||
@ -342,7 +344,7 @@ class ModelPatcher:
|
|||||||
utils.copy_to_param(self.model, k, self.backup[k])
|
utils.copy_to_param(self.model, k, self.backup[k])
|
||||||
else:
|
else:
|
||||||
for k in keys:
|
for k in keys:
|
||||||
utils.set_attr(self.model, k, self.backup[k])
|
utils.set_attr_param(self.model, k, self.backup[k])
|
||||||
|
|
||||||
self.backup = {}
|
self.backup = {}
|
||||||
|
|
||||||
@ -352,6 +354,6 @@ class ModelPatcher:
|
|||||||
|
|
||||||
keys = list(self.object_patches_backup.keys())
|
keys = list(self.object_patches_backup.keys())
|
||||||
for k in keys:
|
for k in keys:
|
||||||
setattr(self.model, k, self.object_patches_backup[k])
|
utils.set_attr(self.model, k, self.object_patches_backup[k])
|
||||||
|
|
||||||
self.object_patches_backup = {}
|
self.object_patches_backup = {}
|
||||||
|
|||||||
@ -11,6 +11,14 @@ class EPS:
|
|||||||
sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
|
sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1))
|
||||||
return model_input - model_output * sigma
|
return model_input - model_output * sigma
|
||||||
|
|
||||||
|
def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
|
||||||
|
if max_denoise:
|
||||||
|
noise = noise * torch.sqrt(1.0 + sigma ** 2.0)
|
||||||
|
else:
|
||||||
|
noise = noise * sigma
|
||||||
|
|
||||||
|
noise += latent_image
|
||||||
|
return noise
|
||||||
|
|
||||||
class V_PREDICTION(EPS):
|
class V_PREDICTION(EPS):
|
||||||
def calculate_denoised(self, sigma, model_output, model_input):
|
def calculate_denoised(self, sigma, model_output, model_input):
|
||||||
|
|||||||
@ -275,15 +275,16 @@ class CFGNoisePredictor(torch.nn.Module):
|
|||||||
return self.apply_model(*args, **kwargs)
|
return self.apply_model(*args, **kwargs)
|
||||||
|
|
||||||
class KSamplerX0Inpaint(torch.nn.Module):
|
class KSamplerX0Inpaint(torch.nn.Module):
|
||||||
def __init__(self, model):
|
def __init__(self, model, sigmas):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.inner_model = model
|
self.inner_model = model
|
||||||
|
self.sigmas = sigmas
|
||||||
def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None):
|
def forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None):
|
||||||
if denoise_mask is not None:
|
if denoise_mask is not None:
|
||||||
if "denoise_mask_function" in model_options:
|
if "denoise_mask_function" in model_options:
|
||||||
denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask)
|
denoise_mask = model_options["denoise_mask_function"](sigma, denoise_mask, extra_options={"model": self.inner_model, "sigmas": self.sigmas})
|
||||||
latent_mask = 1. - denoise_mask
|
latent_mask = 1. - denoise_mask
|
||||||
x = x * denoise_mask + (self.latent_image + self.noise * sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1))) * latent_mask
|
x = x * denoise_mask + self.inner_model.inner_model.model_sampling.noise_scaling(sigma.reshape([sigma.shape[0]] + [1] * (len(self.noise.shape) - 1)), self.noise, self.latent_image) * latent_mask
|
||||||
out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, model_options=model_options, seed=seed)
|
out = self.inner_model(x, sigma, cond=cond, uncond=uncond, cond_scale=cond_scale, model_options=model_options, seed=seed)
|
||||||
if denoise_mask is not None:
|
if denoise_mask is not None:
|
||||||
out = out * denoise_mask + self.latent_image * latent_mask
|
out = out * denoise_mask + self.latent_image * latent_mask
|
||||||
@ -531,7 +532,7 @@ class KSAMPLER(Sampler):
|
|||||||
|
|
||||||
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
|
def sample(self, model_wrap, sigmas, extra_args, callback, noise, latent_image=None, denoise_mask=None, disable_pbar=False):
|
||||||
extra_args["denoise_mask"] = denoise_mask
|
extra_args["denoise_mask"] = denoise_mask
|
||||||
model_k = KSamplerX0Inpaint(model_wrap)
|
model_k = KSamplerX0Inpaint(model_wrap, sigmas)
|
||||||
model_k.latent_image = latent_image
|
model_k.latent_image = latent_image
|
||||||
if self.inpaint_options.get("random", False): #TODO: Should this be the default?
|
if self.inpaint_options.get("random", False): #TODO: Should this be the default?
|
||||||
generator = torch.manual_seed(extra_args.get("seed", 41) + 1)
|
generator = torch.manual_seed(extra_args.get("seed", 41) + 1)
|
||||||
@ -539,19 +540,13 @@ class KSAMPLER(Sampler):
|
|||||||
else:
|
else:
|
||||||
model_k.noise = noise
|
model_k.noise = noise
|
||||||
|
|
||||||
if self.max_denoise(model_wrap, sigmas):
|
noise = model_wrap.inner_model.model_sampling.noise_scaling(sigmas[0], noise, latent_image, self.max_denoise(model_wrap, sigmas))
|
||||||
noise = noise * torch.sqrt(1.0 + sigmas[0] ** 2.0)
|
|
||||||
else:
|
|
||||||
noise = noise * sigmas[0]
|
|
||||||
|
|
||||||
k_callback = None
|
k_callback = None
|
||||||
total_steps = len(sigmas) - 1
|
total_steps = len(sigmas) - 1
|
||||||
if callback is not None:
|
if callback is not None:
|
||||||
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
|
k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps)
|
||||||
|
|
||||||
if latent_image is not None:
|
|
||||||
noise += latent_image
|
|
||||||
|
|
||||||
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
|
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
|
||||||
return samples
|
return samples
|
||||||
|
|
||||||
|
|||||||
@ -296,8 +296,11 @@ def set_attr(obj, attr, value):
|
|||||||
for name in attrs[:-1]:
|
for name in attrs[:-1]:
|
||||||
obj = getattr(obj, name)
|
obj = getattr(obj, name)
|
||||||
prev = getattr(obj, attrs[-1])
|
prev = getattr(obj, attrs[-1])
|
||||||
setattr(obj, attrs[-1], torch.nn.Parameter(value, requires_grad=False))
|
setattr(obj, attrs[-1], value)
|
||||||
del prev
|
return prev
|
||||||
|
|
||||||
|
def set_attr_param(obj, attr, value):
|
||||||
|
return set_attr(obj, attr, torch.nn.Parameter(value, requires_grad=False))
|
||||||
|
|
||||||
def copy_to_param(obj, attr, value):
|
def copy_to_param(obj, attr, value):
|
||||||
# inplace update tensor instead of replacing it
|
# inplace update tensor instead of replacing it
|
||||||
|
|||||||
@ -5,275 +5,7 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
|
|
||||||
def get_canny_nms_kernel(device=None, dtype=None):
|
from kornia.filters import canny
|
||||||
"""Utility function that returns 3x3 kernels for the Canny Non-maximal suppression."""
|
|
||||||
return torch.tensor(
|
|
||||||
[
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 1.0, -1.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, -1.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, -1.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [-1.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[-1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, -1.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, -1.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
],
|
|
||||||
device=device,
|
|
||||||
dtype=dtype,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_hysteresis_kernel(device=None, dtype=None):
|
|
||||||
"""Utility function that returns the 3x3 kernels for the Canny hysteresis."""
|
|
||||||
return torch.tensor(
|
|
||||||
[
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 1.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
[[[0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]],
|
|
||||||
],
|
|
||||||
device=device,
|
|
||||||
dtype=dtype,
|
|
||||||
)
|
|
||||||
|
|
||||||
def gaussian_blur_2d(img, kernel_size, sigma):
|
|
||||||
ksize_half = (kernel_size - 1) * 0.5
|
|
||||||
|
|
||||||
x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
|
|
||||||
|
|
||||||
pdf = torch.exp(-0.5 * (x / sigma).pow(2))
|
|
||||||
|
|
||||||
x_kernel = pdf / pdf.sum()
|
|
||||||
x_kernel = x_kernel.to(device=img.device, dtype=img.dtype)
|
|
||||||
|
|
||||||
kernel2d = torch.mm(x_kernel[:, None], x_kernel[None, :])
|
|
||||||
kernel2d = kernel2d.expand(img.shape[-3], 1, kernel2d.shape[0], kernel2d.shape[1])
|
|
||||||
|
|
||||||
padding = [kernel_size // 2, kernel_size // 2, kernel_size // 2, kernel_size // 2]
|
|
||||||
|
|
||||||
img = torch.nn.functional.pad(img, padding, mode="reflect")
|
|
||||||
img = torch.nn.functional.conv2d(img, kernel2d, groups=img.shape[-3])
|
|
||||||
|
|
||||||
return img
|
|
||||||
|
|
||||||
def get_sobel_kernel2d(device=None, dtype=None):
|
|
||||||
kernel_x = torch.tensor([[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=device, dtype=dtype)
|
|
||||||
kernel_y = kernel_x.transpose(0, 1)
|
|
||||||
return torch.stack([kernel_x, kernel_y])
|
|
||||||
|
|
||||||
def spatial_gradient(input, normalized: bool = True):
|
|
||||||
r"""Compute the first order image derivative in both x and y using a Sobel operator.
|
|
||||||
.. image:: _static/img/spatial_gradient.png
|
|
||||||
Args:
|
|
||||||
input: input image tensor with shape :math:`(B, C, H, W)`.
|
|
||||||
mode: derivatives modality, can be: `sobel` or `diff`.
|
|
||||||
order: the order of the derivatives.
|
|
||||||
normalized: whether the output is normalized.
|
|
||||||
Return:
|
|
||||||
the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`.
|
|
||||||
.. note::
|
|
||||||
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
|
|
||||||
filtering_edges.html>`__.
|
|
||||||
Examples:
|
|
||||||
>>> input = torch.rand(1, 3, 4, 4)
|
|
||||||
>>> output = spatial_gradient(input) # 1x3x2x4x4
|
|
||||||
>>> output.shape
|
|
||||||
torch.Size([1, 3, 2, 4, 4])
|
|
||||||
"""
|
|
||||||
# KORNIA_CHECK_IS_TENSOR(input)
|
|
||||||
# KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
|
|
||||||
|
|
||||||
# allocate kernel
|
|
||||||
kernel = get_sobel_kernel2d(device=input.device, dtype=input.dtype)
|
|
||||||
if normalized:
|
|
||||||
kernel = normalize_kernel2d(kernel)
|
|
||||||
|
|
||||||
# prepare kernel
|
|
||||||
b, c, h, w = input.shape
|
|
||||||
tmp_kernel = kernel[:, None, ...]
|
|
||||||
|
|
||||||
# Pad with "replicate for spatial dims, but with zeros for channel
|
|
||||||
spatial_pad = [kernel.size(1) // 2, kernel.size(1) // 2, kernel.size(2) // 2, kernel.size(2) // 2]
|
|
||||||
out_channels: int = 2
|
|
||||||
padded_inp = torch.nn.functional.pad(input.reshape(b * c, 1, h, w), spatial_pad, 'replicate')
|
|
||||||
out = F.conv2d(padded_inp, tmp_kernel, groups=1, padding=0, stride=1)
|
|
||||||
return out.reshape(b, c, out_channels, h, w)
|
|
||||||
|
|
||||||
def rgb_to_grayscale(image, rgb_weights = None):
|
|
||||||
r"""Convert a RGB image to grayscale version of image.
|
|
||||||
|
|
||||||
.. image:: _static/img/rgb_to_grayscale.png
|
|
||||||
|
|
||||||
The image data is assumed to be in the range of (0, 1).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image: RGB image to be converted to grayscale with shape :math:`(*,3,H,W)`.
|
|
||||||
rgb_weights: Weights that will be applied on each channel (RGB).
|
|
||||||
The sum of the weights should add up to one.
|
|
||||||
Returns:
|
|
||||||
grayscale version of the image with shape :math:`(*,1,H,W)`.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
|
|
||||||
color_conversions.html>`__.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
>>> input = torch.rand(2, 3, 4, 5)
|
|
||||||
>>> gray = rgb_to_grayscale(input) # 2x1x4x5
|
|
||||||
"""
|
|
||||||
|
|
||||||
if len(image.shape) < 3 or image.shape[-3] != 3:
|
|
||||||
raise ValueError(f"Input size must have a shape of (*, 3, H, W). Got {image.shape}")
|
|
||||||
|
|
||||||
if rgb_weights is None:
|
|
||||||
# 8 bit images
|
|
||||||
if image.dtype == torch.uint8:
|
|
||||||
rgb_weights = torch.tensor([76, 150, 29], device=image.device, dtype=torch.uint8)
|
|
||||||
# floating point images
|
|
||||||
elif image.dtype in (torch.float16, torch.float32, torch.float64):
|
|
||||||
rgb_weights = torch.tensor([0.299, 0.587, 0.114], device=image.device, dtype=image.dtype)
|
|
||||||
else:
|
|
||||||
raise TypeError(f"Unknown data type: {image.dtype}")
|
|
||||||
else:
|
|
||||||
# is tensor that we make sure is in the same device/dtype
|
|
||||||
rgb_weights = rgb_weights.to(image)
|
|
||||||
|
|
||||||
# unpack the color image channels with RGB order
|
|
||||||
r: Tensor = image[..., 0:1, :, :]
|
|
||||||
g: Tensor = image[..., 1:2, :, :]
|
|
||||||
b: Tensor = image[..., 2:3, :, :]
|
|
||||||
|
|
||||||
w_r, w_g, w_b = rgb_weights.unbind()
|
|
||||||
return w_r * r + w_g * g + w_b * b
|
|
||||||
|
|
||||||
def canny(
|
|
||||||
input,
|
|
||||||
low_threshold = 0.1,
|
|
||||||
high_threshold = 0.2,
|
|
||||||
kernel_size = 5,
|
|
||||||
sigma = 1,
|
|
||||||
hysteresis = True,
|
|
||||||
eps = 1e-6,
|
|
||||||
):
|
|
||||||
r"""Find edges of the input image and filters them using the Canny algorithm.
|
|
||||||
.. image:: _static/img/canny.png
|
|
||||||
Args:
|
|
||||||
input: input image tensor with shape :math:`(B,C,H,W)`.
|
|
||||||
low_threshold: lower threshold for the hysteresis procedure.
|
|
||||||
high_threshold: upper threshold for the hysteresis procedure.
|
|
||||||
kernel_size: the size of the kernel for the gaussian blur.
|
|
||||||
sigma: the standard deviation of the kernel for the gaussian blur.
|
|
||||||
hysteresis: if True, applies the hysteresis edge tracking.
|
|
||||||
Otherwise, the edges are divided between weak (0.5) and strong (1) edges.
|
|
||||||
eps: regularization number to avoid NaN during backprop.
|
|
||||||
Returns:
|
|
||||||
- the canny edge magnitudes map, shape of :math:`(B,1,H,W)`.
|
|
||||||
- the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`.
|
|
||||||
.. note::
|
|
||||||
See a working example `here <https://kornia-tutorials.readthedocs.io/en/latest/
|
|
||||||
canny.html>`__.
|
|
||||||
Example:
|
|
||||||
>>> input = torch.rand(5, 3, 4, 4)
|
|
||||||
>>> magnitude, edges = canny(input) # 5x3x4x4
|
|
||||||
>>> magnitude.shape
|
|
||||||
torch.Size([5, 1, 4, 4])
|
|
||||||
>>> edges.shape
|
|
||||||
torch.Size([5, 1, 4, 4])
|
|
||||||
"""
|
|
||||||
# KORNIA_CHECK_IS_TENSOR(input)
|
|
||||||
# KORNIA_CHECK_SHAPE(input, ['B', 'C', 'H', 'W'])
|
|
||||||
# KORNIA_CHECK(
|
|
||||||
# low_threshold <= high_threshold,
|
|
||||||
# "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: "
|
|
||||||
# f"{low_threshold}>{high_threshold}",
|
|
||||||
# )
|
|
||||||
# KORNIA_CHECK(0 < low_threshold < 1, f'Invalid low threshold. Should be in range (0, 1). Got: {low_threshold}')
|
|
||||||
# KORNIA_CHECK(0 < high_threshold < 1, f'Invalid high threshold. Should be in range (0, 1). Got: {high_threshold}')
|
|
||||||
|
|
||||||
device = input.device
|
|
||||||
dtype = input.dtype
|
|
||||||
|
|
||||||
# To Grayscale
|
|
||||||
if input.shape[1] == 3:
|
|
||||||
input = rgb_to_grayscale(input)
|
|
||||||
|
|
||||||
# Gaussian filter
|
|
||||||
blurred: Tensor = gaussian_blur_2d(input, kernel_size, sigma)
|
|
||||||
|
|
||||||
# Compute the gradients
|
|
||||||
gradients: Tensor = spatial_gradient(blurred, normalized=False)
|
|
||||||
|
|
||||||
# Unpack the edges
|
|
||||||
gx: Tensor = gradients[:, :, 0]
|
|
||||||
gy: Tensor = gradients[:, :, 1]
|
|
||||||
|
|
||||||
# Compute gradient magnitude and angle
|
|
||||||
magnitude: Tensor = torch.sqrt(gx * gx + gy * gy + eps)
|
|
||||||
angle: Tensor = torch.atan2(gy, gx)
|
|
||||||
|
|
||||||
# Radians to Degrees
|
|
||||||
angle = 180.0 * angle / math.pi
|
|
||||||
|
|
||||||
# Round angle to the nearest 45 degree
|
|
||||||
angle = torch.round(angle / 45) * 45
|
|
||||||
|
|
||||||
# Non-maximal suppression
|
|
||||||
nms_kernels: Tensor = get_canny_nms_kernel(device, dtype)
|
|
||||||
nms_magnitude: Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2)
|
|
||||||
|
|
||||||
# Get the indices for both directions
|
|
||||||
positive_idx: Tensor = (angle / 45) % 8
|
|
||||||
positive_idx = positive_idx.long()
|
|
||||||
|
|
||||||
negative_idx: Tensor = ((angle / 45) + 4) % 8
|
|
||||||
negative_idx = negative_idx.long()
|
|
||||||
|
|
||||||
# Apply the non-maximum suppression to the different directions
|
|
||||||
channel_select_filtered_positive: Tensor = torch.gather(nms_magnitude, 1, positive_idx)
|
|
||||||
channel_select_filtered_negative: Tensor = torch.gather(nms_magnitude, 1, negative_idx)
|
|
||||||
|
|
||||||
channel_select_filtered: Tensor = torch.stack(
|
|
||||||
[channel_select_filtered_positive, channel_select_filtered_negative], 1
|
|
||||||
)
|
|
||||||
|
|
||||||
is_max: Tensor = channel_select_filtered.min(dim=1)[0] > 0.0
|
|
||||||
|
|
||||||
magnitude = magnitude * is_max
|
|
||||||
|
|
||||||
# Threshold
|
|
||||||
edges: Tensor = F.threshold(magnitude, low_threshold, 0.0)
|
|
||||||
|
|
||||||
low: Tensor = magnitude > low_threshold
|
|
||||||
high: Tensor = magnitude > high_threshold
|
|
||||||
|
|
||||||
edges = low * 0.5 + high * 0.5
|
|
||||||
edges = edges.to(dtype)
|
|
||||||
|
|
||||||
# Hysteresis
|
|
||||||
if hysteresis:
|
|
||||||
edges_old: Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype)
|
|
||||||
hysteresis_kernels: Tensor = get_hysteresis_kernel(device, dtype)
|
|
||||||
|
|
||||||
while ((edges_old - edges).abs() != 0).any():
|
|
||||||
weak: Tensor = (edges == 0.5).float()
|
|
||||||
strong: Tensor = (edges == 1).float()
|
|
||||||
|
|
||||||
hysteresis_magnitude: Tensor = F.conv2d(
|
|
||||||
edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2
|
|
||||||
)
|
|
||||||
hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype)
|
|
||||||
hysteresis_magnitude = hysteresis_magnitude * weak + strong
|
|
||||||
|
|
||||||
edges_old = edges.clone()
|
|
||||||
edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5
|
|
||||||
|
|
||||||
edges = hysteresis_magnitude
|
|
||||||
|
|
||||||
return magnitude, edges
|
|
||||||
|
|
||||||
|
|
||||||
class Canny:
|
class Canny:
|
||||||
|
|||||||
@ -342,6 +342,24 @@ class GrowMask:
|
|||||||
out.append(output)
|
out.append(output)
|
||||||
return (torch.stack(out, dim=0),)
|
return (torch.stack(out, dim=0),)
|
||||||
|
|
||||||
|
class ThresholdMask:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"mask": ("MASK",),
|
||||||
|
"value": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 1.0, "step": 0.01}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CATEGORY = "mask"
|
||||||
|
|
||||||
|
RETURN_TYPES = ("MASK",)
|
||||||
|
FUNCTION = "image_to_mask"
|
||||||
|
|
||||||
|
def image_to_mask(self, mask, value):
|
||||||
|
mask = (mask > value).float()
|
||||||
|
return (mask,)
|
||||||
|
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
@ -356,6 +374,7 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"MaskComposite": MaskComposite,
|
"MaskComposite": MaskComposite,
|
||||||
"FeatherMask": FeatherMask,
|
"FeatherMask": FeatherMask,
|
||||||
"GrowMask": GrowMask,
|
"GrowMask": GrowMask,
|
||||||
|
"ThresholdMask": ThresholdMask,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
|
|||||||
@ -37,7 +37,7 @@ class StableCascade_EmptyLatentImage:
|
|||||||
RETURN_NAMES = ("stage_c", "stage_b")
|
RETURN_NAMES = ("stage_c", "stage_b")
|
||||||
FUNCTION = "generate"
|
FUNCTION = "generate"
|
||||||
|
|
||||||
CATEGORY = "_for_testing/stable_cascade"
|
CATEGORY = "latent/stable_cascade"
|
||||||
|
|
||||||
def generate(self, width, height, compression, batch_size=1):
|
def generate(self, width, height, compression, batch_size=1):
|
||||||
c_latent = torch.zeros([batch_size, 16, height // compression, width // compression])
|
c_latent = torch.zeros([batch_size, 16, height // compression, width // compression])
|
||||||
@ -63,7 +63,7 @@ class StableCascade_StageC_VAEEncode:
|
|||||||
RETURN_NAMES = ("stage_c", "stage_b")
|
RETURN_NAMES = ("stage_c", "stage_b")
|
||||||
FUNCTION = "generate"
|
FUNCTION = "generate"
|
||||||
|
|
||||||
CATEGORY = "_for_testing/stable_cascade"
|
CATEGORY = "latent/stable_cascade"
|
||||||
|
|
||||||
def generate(self, image, vae, compression):
|
def generate(self, image, vae, compression):
|
||||||
width = image.shape[-2]
|
width = image.shape[-2]
|
||||||
@ -91,7 +91,7 @@ class StableCascade_StageB_Conditioning:
|
|||||||
|
|
||||||
FUNCTION = "set_prior"
|
FUNCTION = "set_prior"
|
||||||
|
|
||||||
CATEGORY = "_for_testing/stable_cascade"
|
CATEGORY = "conditioning/stable_cascade"
|
||||||
|
|
||||||
def set_prior(self, conditioning, stage_c):
|
def set_prior(self, conditioning, stage_c):
|
||||||
c = []
|
c = []
|
||||||
@ -102,8 +102,39 @@ class StableCascade_StageB_Conditioning:
|
|||||||
c.append(n)
|
c.append(n)
|
||||||
return (c, )
|
return (c, )
|
||||||
|
|
||||||
|
class StableCascade_SuperResolutionControlnet:
|
||||||
|
def __init__(self, device="cpu"):
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {
|
||||||
|
"image": ("IMAGE",),
|
||||||
|
"vae": ("VAE", ),
|
||||||
|
}}
|
||||||
|
RETURN_TYPES = ("IMAGE", "LATENT", "LATENT")
|
||||||
|
RETURN_NAMES = ("controlnet_input", "stage_c", "stage_b")
|
||||||
|
FUNCTION = "generate"
|
||||||
|
|
||||||
|
CATEGORY = "_for_testing/stable_cascade"
|
||||||
|
|
||||||
|
def generate(self, image, vae):
|
||||||
|
width = image.shape[-2]
|
||||||
|
height = image.shape[-3]
|
||||||
|
batch_size = image.shape[0]
|
||||||
|
controlnet_input = vae.encode(image[:,:,:,:3]).movedim(1, -1)
|
||||||
|
|
||||||
|
c_latent = torch.zeros([batch_size, 16, height // 16, width // 16])
|
||||||
|
b_latent = torch.zeros([batch_size, 4, height // 2, width // 2])
|
||||||
|
return (controlnet_input, {
|
||||||
|
"samples": c_latent,
|
||||||
|
}, {
|
||||||
|
"samples": b_latent,
|
||||||
|
})
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"StableCascade_EmptyLatentImage": StableCascade_EmptyLatentImage,
|
"StableCascade_EmptyLatentImage": StableCascade_EmptyLatentImage,
|
||||||
"StableCascade_StageB_Conditioning": StableCascade_StageB_Conditioning,
|
"StableCascade_StageB_Conditioning": StableCascade_StageB_Conditioning,
|
||||||
"StableCascade_StageC_VAEEncode": StableCascade_StageC_VAEEncode,
|
"StableCascade_StageC_VAEEncode": StableCascade_StageC_VAEEncode,
|
||||||
|
"StableCascade_SuperResolutionControlnet": StableCascade_SuperResolutionControlnet,
|
||||||
}
|
}
|
||||||
|
|||||||
42
comfy_extras/nodes_differential_diffusion.py
Normal file
42
comfy_extras/nodes_differential_diffusion.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
# code adapted from https://github.com/exx8/differential-diffusion
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class DifferentialDiffusion():
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {"model": ("MODEL", ),
|
||||||
|
}}
|
||||||
|
RETURN_TYPES = ("MODEL",)
|
||||||
|
FUNCTION = "apply"
|
||||||
|
CATEGORY = "_for_testing"
|
||||||
|
INIT = False
|
||||||
|
|
||||||
|
def apply(self, model):
|
||||||
|
model = model.clone()
|
||||||
|
model.set_model_denoise_mask_function(self.forward)
|
||||||
|
return (model,)
|
||||||
|
|
||||||
|
def forward(self, sigma: torch.Tensor, denoise_mask: torch.Tensor, extra_options: dict):
|
||||||
|
model = extra_options["model"]
|
||||||
|
step_sigmas = extra_options["sigmas"]
|
||||||
|
sigma_to = model.inner_model.model_sampling.sigma_min
|
||||||
|
if step_sigmas[-1] > sigma_to:
|
||||||
|
sigma_to = step_sigmas[-1]
|
||||||
|
sigma_from = step_sigmas[0]
|
||||||
|
|
||||||
|
ts_from = model.inner_model.model_sampling.timestep(sigma_from)
|
||||||
|
ts_to = model.inner_model.model_sampling.timestep(sigma_to)
|
||||||
|
current_ts = model.inner_model.model_sampling.timestep(sigma[0])
|
||||||
|
|
||||||
|
threshold = (current_ts - ts_to) / (ts_from - ts_to)
|
||||||
|
|
||||||
|
return (denoise_mask >= threshold).to(denoise_mask.dtype)
|
||||||
|
|
||||||
|
|
||||||
|
NODE_CLASS_MAPPINGS = {
|
||||||
|
"DifferentialDiffusion": DifferentialDiffusion,
|
||||||
|
}
|
||||||
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
|
"DifferentialDiffusion": "Differential Diffusion",
|
||||||
|
}
|
||||||
49
comfy_extras/nodes_morphology.py
Normal file
49
comfy_extras/nodes_morphology.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import torch
|
||||||
|
import comfy.model_management
|
||||||
|
|
||||||
|
from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat
|
||||||
|
|
||||||
|
|
||||||
|
class Morphology:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(s):
|
||||||
|
return {"required": {"image": ("IMAGE",),
|
||||||
|
"operation": (["erode", "dilate", "open", "close", "gradient", "bottom_hat", "top_hat"],),
|
||||||
|
"kernel_size": ("INT", {"default": 3, "min": 3, "max": 999, "step": 1}),
|
||||||
|
}}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("IMAGE",)
|
||||||
|
FUNCTION = "process"
|
||||||
|
|
||||||
|
CATEGORY = "image/postprocessing"
|
||||||
|
|
||||||
|
def process(self, image, operation, kernel_size):
|
||||||
|
device = comfy.model_management.get_torch_device()
|
||||||
|
kernel = torch.ones(kernel_size, kernel_size, device=device)
|
||||||
|
image_k = image.to(device).movedim(-1, 1)
|
||||||
|
if operation == "erode":
|
||||||
|
output = erosion(image_k, kernel)
|
||||||
|
elif operation == "dilate":
|
||||||
|
output = dilation(image_k, kernel)
|
||||||
|
elif operation == "open":
|
||||||
|
output = opening(image_k, kernel)
|
||||||
|
elif operation == "close":
|
||||||
|
output = closing(image_k, kernel)
|
||||||
|
elif operation == "gradient":
|
||||||
|
output = gradient(image_k, kernel)
|
||||||
|
elif operation == "top_hat":
|
||||||
|
output = top_hat(image_k, kernel)
|
||||||
|
elif operation == "bottom_hat":
|
||||||
|
output = bottom_hat(image_k, kernel)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid operation {operation} for morphology. Must be one of 'erode', 'dilate', 'open', 'close', 'gradient', 'tophat', 'bottomhat'")
|
||||||
|
img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1)
|
||||||
|
return (img_out,)
|
||||||
|
|
||||||
|
NODE_CLASS_MAPPINGS = {
|
||||||
|
"Morphology": Morphology,
|
||||||
|
}
|
||||||
|
|
||||||
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
|
"Morphology": "ImageMorphology",
|
||||||
|
}
|
||||||
@ -28,4 +28,6 @@ protobuf
|
|||||||
psutil
|
psutil
|
||||||
ConfigArgParse
|
ConfigArgParse
|
||||||
aio-pika
|
aio-pika
|
||||||
pyjwt[crypto]
|
pyjwt[crypto]
|
||||||
|
kornia>=0.7.1
|
||||||
|
mpmath>=1.0,!=1.4.0a0
|
||||||
Loading…
Reference in New Issue
Block a user