Conflict Resilved

This commit is contained in:
root 2023-08-17 18:39:00 +00:00
commit 3849a6bf10
28 changed files with 630 additions and 291 deletions

View File

@ -2,6 +2,13 @@ name: "Windows Release cu118 dependencies 2"
on: on:
workflow_dispatch: workflow_dispatch:
inputs:
xformers:
description: 'xformers version'
required: true
type: string
default: "xformers"
# push: # push:
# branches: # branches:
# - master # - master
@ -17,7 +24,7 @@ jobs:
- shell: bash - shell: bash
run: | run: |
python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
python -m pip install --no-cache-dir ./temp_wheel_dir/* python -m pip install --no-cache-dir ./temp_wheel_dir/*
echo installed basic echo installed basic
ls -lah temp_wheel_dir ls -lah temp_wheel_dir

1
CODEOWNERS Normal file
View File

@ -0,0 +1 @@
* @comfyanonymous

View File

@ -47,6 +47,7 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git
| Ctrl + O | Load workflow | | Ctrl + O | Load workflow |
| Ctrl + A | Select all nodes | | Ctrl + A | Select all nodes |
| Ctrl + M | Mute/unmute selected nodes | | Ctrl + M | Mute/unmute selected nodes |
| Ctrl + B | Bypass selected nodes (acts like the node was removed from the graph and the wires reconnected through) |
| Delete/Backspace | Delete selected nodes | | Delete/Backspace | Delete selected nodes |
| Ctrl + Delete/Backspace | Delete the current graph | | Ctrl + Delete/Backspace | Delete the current graph |
| Space | Move the canvas around when held and moving the cursor | | Space | Move the canvas around when held and moving the cursor |

View File

@ -1,5 +1,5 @@
MC1: #5d6c2c - darkolivegreen, MC2: #c0b1ab - darkgray, MC3: #73299d - darkslateblue, MC4: #323416 - darkslategray MC1: #323416 - darkslategray, MC2: #bfb1ab - darkgray, MC3: #73299d - darkslateblue, MC4: #5d6c2c - darkolivegreen
AN1: #3d6c2c - darkolivegreen, AN2: #6c5b2c - darkolivegreen AN1: #233416 - darkgreen, AN2: #342716 - black
T1: #2c5d6c - darkslategray, T2: #6c2c5d - purple T1: #163234 - darkslategray, T2: #341632 - darkslategray
C1: #3b2c6c - darkslateblue, C2: #abbac0 - silver C1: #181634 - midnightblue, C2: #abb9bf - silver
MO1: #686c59 - dimgray, MO2: #c0bbba - silver, MO3: #907a9d - lightslategray, MO4: #33342b - darkslategray MO1: #33342b - darkslategray, MO2: #bfbbb9 - silver, MO3: #907a9d - lightslategray, MO4: #686c59 - dimgray

View File

@ -38,6 +38,7 @@ parser.add_argument("--port", type=int, default=8188, help="Set the listen port.
parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.") parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).")
parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.") parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.") parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
@ -81,6 +82,9 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn'
vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).") parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).")

View File

@ -24,8 +24,9 @@ class ClipVisionModel():
return self.model.load_state_dict(sd, strict=False) return self.model.load_state_dict(sd, strict=False)
def encode_image(self, image): def encode_image(self, image):
img = torch.clip((255. * image[0]), 0, 255).round().int() img = torch.clip((255. * image), 0, 255).round().int()
inputs = self.processor(images=[img], return_tensors="pt") img = list(map(lambda a: a, img))
inputs = self.processor(images=img, return_tensors="pt")
outputs = self.model(**inputs) outputs = self.model(**inputs)
return outputs return outputs

View File

@ -244,30 +244,15 @@ class Gligen(nn.Module):
self.position_net = position_net self.position_net = position_net
self.key_dim = key_dim self.key_dim = key_dim
self.max_objs = 30 self.max_objs = 30
self.lowvram = False self.current_device = torch.device("cpu")
def _set_position(self, boxes, masks, positive_embeddings): def _set_position(self, boxes, masks, positive_embeddings):
if self.lowvram == True:
self.position_net.to(boxes.device)
objs = self.position_net(boxes, masks, positive_embeddings) objs = self.position_net(boxes, masks, positive_embeddings)
def func(x, extra_options):
if self.lowvram == True: key = extra_options["transformer_index"]
self.position_net.cpu() module = self.module_list[key]
def func_lowvram(x, extra_options): return module(x, objs)
key = extra_options["transformer_index"] return func
module = self.module_list[key]
module.to(x.device)
r = module(x, objs)
module.cpu()
return r
return func_lowvram
else:
def func(x, extra_options):
key = extra_options["transformer_index"]
module = self.module_list[key]
return module(x, objs)
return func
def set_position(self, latent_image_shape, position_params, device): def set_position(self, latent_image_shape, position_params, device):
batch, c, h, w = latent_image_shape batch, c, h, w = latent_image_shape
@ -312,14 +297,6 @@ class Gligen(nn.Module):
masks.to(device), masks.to(device),
conds.to(device)) conds.to(device))
def set_lowvram(self, value=True):
self.lowvram = value
def cleanup(self):
self.lowvram = False
def get_models(self):
return [self]
def load_gligen(sd): def load_gligen(sd):
sd_k = sd.keys() sd_k = sd.keys()

View File

@ -631,23 +631,78 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
elif solver_type == 'midpoint': elif solver_type == 'midpoint':
x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised) x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise if eta:
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
old_denoised = denoised old_denoised = denoised
h_last = h h_last = h
return x return x
@torch.no_grad()
def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
"""DPM-Solver++(3M) SDE."""
seed = extra_args.get("seed", None)
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
denoised_1, denoised_2 = None, None
h_1, h_2 = None, None
for i in trange(len(sigmas) - 1, disable=disable):
denoised = model(x, sigmas[i] * s_in, **extra_args)
if callback is not None:
callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
if sigmas[i + 1] == 0:
# Denoising step
x = denoised
else:
t, s = -sigmas[i].log(), -sigmas[i + 1].log()
h = s - t
h_eta = h * (eta + 1)
x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised
if h_2 is not None:
r0 = h_1 / h
r1 = h_2 / h
d1_0 = (denoised - denoised_1) / r0
d1_1 = (denoised_1 - denoised_2) / r1
d1 = d1_0 + (d1_0 - d1_1) * r0 / (r0 + r1)
d2 = (d1_0 - d1_1) / (r0 + r1)
phi_2 = h_eta.neg().expm1() / h_eta + 1
phi_3 = phi_2 / h_eta - 0.5
x = x + phi_2 * d1 - phi_3 * d2
elif h_1 is not None:
r = h_1 / h
d = (denoised - denoised_1) / r
phi_2 = h_eta.neg().expm1() / h_eta + 1
x = x + phi_2 * d
if eta:
x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
denoised_1, denoised_2 = denoised, denoised_1
h_1, h_2 = h, h_1
return x
@torch.no_grad()
def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)
@torch.no_grad() @torch.no_grad()
def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type) return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)
@torch.no_grad() @torch.no_grad()
def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r) return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)

View File

@ -105,6 +105,29 @@ class BaseModel(torch.nn.Module):
return {**unet_state_dict, **vae_state_dict, **clip_state_dict} return {**unet_state_dict, **vae_state_dict, **clip_state_dict}
def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0):
adm_inputs = []
weights = []
noise_aug = []
for unclip_cond in unclip_conditioning:
for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
weight = unclip_cond["strength"]
noise_augment = unclip_cond["noise_augmentation"]
noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
weights.append(weight)
noise_aug.append(noise_augment)
adm_inputs.append(adm_out)
if len(noise_aug) > 1:
adm_out = torch.stack(adm_inputs).sum(0)
noise_augment = noise_augment_merge
noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
adm_out = torch.cat((c_adm, noise_level_emb), 1)
return adm_out
class SD21UNCLIP(BaseModel): class SD21UNCLIP(BaseModel):
def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None): def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):
@ -114,33 +137,11 @@ class SD21UNCLIP(BaseModel):
def encode_adm(self, **kwargs): def encode_adm(self, **kwargs):
unclip_conditioning = kwargs.get("unclip_conditioning", None) unclip_conditioning = kwargs.get("unclip_conditioning", None)
device = kwargs["device"] device = kwargs["device"]
if unclip_conditioning is None:
if unclip_conditioning is not None: return torch.zeros((1, self.adm_channels))
adm_inputs = []
weights = []
noise_aug = []
for unclip_cond in unclip_conditioning:
adm_cond = unclip_cond["clip_vision_output"].image_embeds
weight = unclip_cond["strength"]
noise_augment = unclip_cond["noise_augmentation"]
noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
weights.append(weight)
noise_aug.append(noise_augment)
adm_inputs.append(adm_out)
if len(noise_aug) > 1:
adm_out = torch.stack(adm_inputs).sum(0)
#TODO: add a way to control this
noise_augment = 0.05
noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
c_adm, noise_level_emb = self.noise_augmentor(adm_out[:, :self.noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
adm_out = torch.cat((c_adm, noise_level_emb), 1)
else: else:
adm_out = torch.zeros((1, self.adm_channels)) return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05))
return adm_out
class SDInpaint(BaseModel): class SDInpaint(BaseModel):
def __init__(self, model_config, model_type=ModelType.EPS, device=None): def __init__(self, model_config, model_type=ModelType.EPS, device=None):

View File

@ -113,6 +113,7 @@ def model_config_from_unet_config(unet_config):
if model_config.matches(unet_config): if model_config.matches(unet_config):
return model_config(unet_config) return model_config(unet_config)
print("no match", unet_config)
return None return None
def model_config_from_unet(state_dict, unet_key_prefix, use_fp16): def model_config_from_unet(state_dict, unet_key_prefix, use_fp16):
@ -120,9 +121,20 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_fp16):
return model_config_from_unet_config(unet_config) return model_config_from_unet_config(unet_config)
def model_config_from_diffusers_unet(state_dict, use_fp16): def unet_config_from_diffusers_unet(state_dict, use_fp16):
match = {} match = {}
match["context_dim"] = state_dict["down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight"].shape[1] attention_resolutions = []
attn_res = 1
for i in range(5):
k = "down_blocks.{}.attentions.1.transformer_blocks.0.attn2.to_k.weight".format(i)
if k in state_dict:
match["context_dim"] = state_dict[k].shape[1]
attention_resolutions.append(attn_res)
attn_res *= 2
match["attention_resolutions"] = attention_resolutions
match["model_channels"] = state_dict["conv_in.weight"].shape[0] match["model_channels"] = state_dict["conv_in.weight"].shape[0]
match["in_channels"] = state_dict["conv_in.weight"].shape[1] match["in_channels"] = state_dict["conv_in.weight"].shape[1]
match["adm_in_channels"] = None match["adm_in_channels"] = None
@ -134,22 +146,22 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
SDXL = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SDXL = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 2, 10], 'channel_mult': [1, 2, 4], 'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 2, 10], 'channel_mult': [1, 2, 4],
'transformer_depth_middle': 10, 'use_linear_in_transformer': True, 'context_dim': 2048} 'transformer_depth_middle': 10, 'use_linear_in_transformer': True, 'context_dim': 2048, "num_head_channels": 64}
SDXL_refiner = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SDXL_refiner = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 2560, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 384, 'num_classes': 'sequential', 'adm_in_channels': 2560, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 384,
'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 4, 4, 0], 'channel_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 4, 4, 0], 'channel_mult': [1, 2, 4, 4],
'transformer_depth_middle': 4, 'use_linear_in_transformer': True, 'context_dim': 1280} 'transformer_depth_middle': 4, 'use_linear_in_transformer': True, 'context_dim': 1280, "num_head_channels": 64}
SD21 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SD21 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2, 'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2,
'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4], 'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024} 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, "num_head_channels": 64}
SD21_uncliph = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SD21_uncliph = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 2048, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_classes': 'sequential', 'adm_in_channels': 2048, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
'num_res_blocks': 2, 'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024} 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, "num_head_channels": 64}
SD21_unclipl = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SD21_unclipl = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 1536, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_classes': 'sequential', 'adm_in_channels': 1536, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
@ -159,9 +171,20 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
SD15 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, SD15 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2, 'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2,
'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4], 'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768} 'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768, "num_heads": 8}
supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl] SDXL_mid_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
'num_res_blocks': 2, 'attention_resolutions': [4], 'transformer_depth': [0, 0, 1], 'channel_mult': [1, 2, 4],
'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 2048, "num_head_channels": 64}
SDXL_small_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
'num_res_blocks': 2, 'attention_resolutions': [], 'transformer_depth': [0, 0, 0], 'channel_mult': [1, 2, 4],
'transformer_depth_middle': 0, 'use_linear_in_transformer': True, "num_head_channels": 64, 'context_dim': 1}
supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mid_cnet, SDXL_small_cnet]
for unet_config in supported_models: for unet_config in supported_models:
matches = True matches = True
@ -170,5 +193,11 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
matches = False matches = False
break break
if matches: if matches:
return model_config_from_unet_config(unet_config) return unet_config
return None
def model_config_from_diffusers_unet(state_dict, use_fp16):
unet_config = unet_config_from_diffusers_unet(state_dict, use_fp16)
if unet_config is not None:
return model_config_from_unet_config(unet_config)
return None return None

View File

@ -2,6 +2,7 @@ import psutil
from enum import Enum from enum import Enum
from comfy.cli_args import args from comfy.cli_args import args
import torch import torch
import sys
class VRAMState(Enum): class VRAMState(Enum):
DISABLED = 0 #No vram present: no need to move models to vram DISABLED = 0 #No vram present: no need to move models to vram
@ -201,6 +202,10 @@ if cpu_state == CPUState.MPS:
print(f"Set vram state to: {vram_state.name}") print(f"Set vram state to: {vram_state.name}")
DISABLE_SMART_MEMORY = args.disable_smart_memory
if DISABLE_SMART_MEMORY:
print("Disabling smart memory management")
def get_torch_device_name(device): def get_torch_device_name(device):
if hasattr(device, 'type'): if hasattr(device, 'type'):
@ -221,132 +226,164 @@ except:
print("Could not pick default device.") print("Could not pick default device.")
current_loaded_model = None current_loaded_models = []
current_gpu_controlnets = []
model_accelerated = False class LoadedModel:
def __init__(self, model):
self.model = model
self.model_accelerated = False
self.device = model.load_device
def model_memory(self):
return self.model.model_size()
def unload_model(): def model_memory_required(self, device):
global current_loaded_model if device == self.model.current_device:
global model_accelerated return 0
global current_gpu_controlnets else:
global vram_state return self.model_memory()
if current_loaded_model is not None: def model_load(self, lowvram_model_memory=0):
if model_accelerated: patch_model_to = None
accelerate.hooks.remove_hook_from_submodules(current_loaded_model.model) if lowvram_model_memory == 0:
model_accelerated = False patch_model_to = self.device
current_loaded_model.unpatch_model() self.model.model_patches_to(self.device)
current_loaded_model.model.to(current_loaded_model.offload_device) self.model.model_patches_to(self.model.model_dtype())
current_loaded_model.model_patches_to(current_loaded_model.offload_device)
current_loaded_model = None
if vram_state != VRAMState.HIGH_VRAM:
soft_empty_cache()
if vram_state != VRAMState.HIGH_VRAM: try:
if len(current_gpu_controlnets) > 0: self.real_model = self.model.patch_model(device_to=patch_model_to) #TODO: do something with loras and offloading to CPU
for n in current_gpu_controlnets: except Exception as e:
n.cpu() self.model.unpatch_model(self.model.offload_device)
current_gpu_controlnets = [] self.model_unload()
raise e
if lowvram_model_memory > 0:
print("loading in lowvram mode", lowvram_model_memory/(1024 * 1024))
device_map = accelerate.infer_auto_device_map(self.real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"})
accelerate.dispatch_model(self.real_model, device_map=device_map, main_device=self.device)
self.model_accelerated = True
return self.real_model
def model_unload(self):
if self.model_accelerated:
accelerate.hooks.remove_hook_from_submodules(self.real_model)
self.model_accelerated = False
self.model.unpatch_model(self.model.offload_device)
self.model.model_patches_to(self.model.offload_device)
def __eq__(self, other):
return self.model is other.model
def minimum_inference_memory(): def minimum_inference_memory():
return (768 * 1024 * 1024) return (1024 * 1024 * 1024)
def unload_model_clones(model):
to_unload = []
for i in range(len(current_loaded_models)):
if model.is_clone(current_loaded_models[i].model):
to_unload = [i] + to_unload
for i in to_unload:
print("unload clone", i)
current_loaded_models.pop(i).model_unload()
def free_memory(memory_required, device, keep_loaded=[]):
unloaded_model = False
for i in range(len(current_loaded_models) -1, -1, -1):
if DISABLE_SMART_MEMORY:
current_free_mem = 0
else:
current_free_mem = get_free_memory(device)
if current_free_mem > memory_required:
break
shift_model = current_loaded_models[i]
if shift_model.device == device:
if shift_model not in keep_loaded:
current_loaded_models.pop(i).model_unload()
unloaded_model = True
if unloaded_model:
soft_empty_cache()
def load_models_gpu(models, memory_required=0):
global vram_state
inference_memory = minimum_inference_memory()
extra_mem = max(inference_memory, memory_required)
models_to_load = []
models_already_loaded = []
for x in models:
loaded_model = LoadedModel(x)
if loaded_model in current_loaded_models:
index = current_loaded_models.index(loaded_model)
current_loaded_models.insert(0, current_loaded_models.pop(index))
models_already_loaded.append(loaded_model)
else:
models_to_load.append(loaded_model)
if len(models_to_load) == 0:
devs = set(map(lambda a: a.device, models_already_loaded))
for d in devs:
if d != torch.device("cpu"):
free_memory(extra_mem, d, models_already_loaded)
return
print("loading new")
total_memory_required = {}
for loaded_model in models_to_load:
unload_model_clones(loaded_model.model)
total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
for device in total_memory_required:
if device != torch.device("cpu"):
free_memory(total_memory_required[device] * 1.3 + extra_mem, device, models_already_loaded)
for loaded_model in models_to_load:
model = loaded_model.model
torch_dev = model.load_device
if is_device_cpu(torch_dev):
vram_set_state = VRAMState.DISABLED
else:
vram_set_state = vram_state
lowvram_model_memory = 0
if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
model_size = loaded_model.model_memory_required(torch_dev)
current_free_mem = get_free_memory(torch_dev)
lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
if model_size > (current_free_mem - inference_memory): #only switch to lowvram if really necessary
vram_set_state = VRAMState.LOW_VRAM
else:
lowvram_model_memory = 0
if vram_set_state == VRAMState.NO_VRAM:
lowvram_model_memory = 256 * 1024 * 1024
cur_loaded_model = loaded_model.model_load(lowvram_model_memory)
current_loaded_models.insert(0, loaded_model)
return
def load_model_gpu(model): def load_model_gpu(model):
global current_loaded_model return load_models_gpu([model])
global vram_state
global model_accelerated
if model is current_loaded_model: def cleanup_models():
return to_delete = []
unload_model() for i in range(len(current_loaded_models)):
print(sys.getrefcount(current_loaded_models[i].model))
if sys.getrefcount(current_loaded_models[i].model) <= 2:
to_delete = [i] + to_delete
torch_dev = model.load_device for i in to_delete:
model.model_patches_to(torch_dev) x = current_loaded_models.pop(i)
model.model_patches_to(model.model_dtype()) x.model_unload()
current_loaded_model = model del x
if is_device_cpu(torch_dev):
vram_set_state = VRAMState.DISABLED
else:
vram_set_state = vram_state
if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
model_size = model.model_size()
current_free_mem = get_free_memory(torch_dev)
lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
if model_size > (current_free_mem - minimum_inference_memory()): #only switch to lowvram if really necessary
vram_set_state = VRAMState.LOW_VRAM
real_model = model.model
patch_model_to = None
if vram_set_state == VRAMState.DISABLED:
pass
elif vram_set_state == VRAMState.NORMAL_VRAM or vram_set_state == VRAMState.HIGH_VRAM or vram_set_state == VRAMState.SHARED:
model_accelerated = False
patch_model_to = torch_dev
try:
real_model = model.patch_model(device_to=patch_model_to)
except Exception as e:
model.unpatch_model()
unload_model()
raise e
if patch_model_to is not None:
real_model.to(torch_dev)
if vram_set_state == VRAMState.NO_VRAM:
device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"})
accelerate.dispatch_model(real_model, device_map=device_map, main_device=torch_dev)
model_accelerated = True
elif vram_set_state == VRAMState.LOW_VRAM:
device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"})
accelerate.dispatch_model(real_model, device_map=device_map, main_device=torch_dev)
model_accelerated = True
return current_loaded_model
def load_controlnet_gpu(control_models):
global current_gpu_controlnets
global vram_state
if vram_state == VRAMState.DISABLED:
return
if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
for m in control_models:
if hasattr(m, 'set_lowvram'):
m.set_lowvram(True)
#don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
return
models = []
for m in control_models:
models += m.get_models()
for m in current_gpu_controlnets:
if m not in models:
m.cpu()
device = get_torch_device()
current_gpu_controlnets = []
for m in models:
current_gpu_controlnets.append(m.to(device))
def load_if_low_vram(model):
global vram_state
if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
return model.to(get_torch_device())
return model
def unload_if_low_vram(model):
global vram_state
if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
return model.cpu()
return model
def unet_offload_device(): def unet_offload_device():
if vram_state == VRAMState.HIGH_VRAM: if vram_state == VRAMState.HIGH_VRAM:
@ -354,6 +391,25 @@ def unet_offload_device():
else: else:
return torch.device("cpu") return torch.device("cpu")
def unet_inital_load_device(parameters, dtype):
torch_dev = get_torch_device()
if vram_state == VRAMState.HIGH_VRAM:
return torch_dev
cpu_dev = torch.device("cpu")
dtype_size = 4
if dtype == torch.float16 or dtype == torch.bfloat16:
dtype_size = 2
model_size = dtype_size * parameters
mem_dev = get_free_memory(torch_dev)
mem_cpu = get_free_memory(cpu_dev)
if mem_dev > mem_cpu and model_size < mem_dev:
return torch_dev
else:
return cpu_dev
def text_encoder_offload_device(): def text_encoder_offload_device():
if args.gpu_only: if args.gpu_only:
return get_torch_device() return get_torch_device()
@ -456,6 +512,13 @@ def get_free_memory(dev=None, torch_free_too=False):
else: else:
return mem_free_total return mem_free_total
def batch_area_memory(area):
if xformers_enabled() or pytorch_attention_flash_attention():
#TODO: these formulas are copied from maximum_batch_area below
return (area / 20) * (1024 * 1024)
else:
return (((area * 0.6) / 0.9) + 1024) * (1024 * 1024)
def maximum_batch_area(): def maximum_batch_area():
global vram_state global vram_state
if vram_state == VRAMState.NO_VRAM: if vram_state == VRAMState.NO_VRAM:

View File

@ -51,19 +51,24 @@ def get_models_from_cond(cond, model_type):
models += [c[1][model_type]] models += [c[1][model_type]]
return models return models
def load_additional_models(positive, negative, dtype): def get_additional_models(positive, negative):
"""loads additional models in positive and negative conditioning""" """loads additional models in positive and negative conditioning"""
control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control") control_nets = set(get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control"))
control_models = []
for m in control_nets:
control_models += m.get_models()
gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen") gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen")
gligen = [x[1].to(dtype) for x in gligen] gligen = [x[1] for x in gligen]
models = control_nets + gligen models = control_models + gligen
comfy.model_management.load_controlnet_gpu(models)
return models return models
def cleanup_additional_models(models): def cleanup_additional_models(models):
"""cleanup additional models that were loaded""" """cleanup additional models that were loaded"""
for m in models: for m in models:
m.cleanup() if hasattr(m, 'cleanup'):
m.cleanup()
def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None): def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None):
device = comfy.model_management.get_torch_device() device = comfy.model_management.get_torch_device()
@ -72,7 +77,8 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
noise_mask = prepare_mask(noise_mask, noise.shape, device) noise_mask = prepare_mask(noise_mask, noise.shape, device)
real_model = None real_model = None
comfy.model_management.load_model_gpu(model) models = get_additional_models(positive, negative)
comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[2] * noise.shape[3]))
real_model = model.model real_model = model.model
noise = noise.to(device) noise = noise.to(device)
@ -81,7 +87,6 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
positive_copy = broadcast_cond(positive, noise.shape[0], device) positive_copy = broadcast_cond(positive, noise.shape[0], device)
negative_copy = broadcast_cond(negative, noise.shape[0], device) negative_copy = broadcast_cond(negative, noise.shape[0], device)
models = load_additional_models(positive, negative, model.model_dtype())
sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)

View File

@ -88,9 +88,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
gligen_type = gligen[0] gligen_type = gligen[0]
gligen_model = gligen[1] gligen_model = gligen[1]
if gligen_type == "position": if gligen_type == "position":
gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device) gligen_patch = gligen_model.model.set_position(input_x.shape, gligen[2], input_x.device)
else: else:
gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device) gligen_patch = gligen_model.model.set_empty(input_x.shape, input_x.device)
patches['middle_patch'] = [gligen_patch] patches['middle_patch'] = [gligen_patch]
@ -189,12 +189,13 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
continue continue
to_run += [(p, COND)] to_run += [(p, COND)]
for x in uncond: if uncond is not None:
p = get_area_and_mult(x, x_in, cond_concat_in, timestep) for x in uncond:
if p is None: p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
continue if p is None:
continue
to_run += [(p, UNCOND)] to_run += [(p, UNCOND)]
while len(to_run) > 0: while len(to_run) > 0:
first = to_run[0] first = to_run[0]
@ -282,6 +283,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
max_total_area = model_management.maximum_batch_area() max_total_area = model_management.maximum_batch_area()
if math.isclose(cond_scale, 1.0):
uncond = None
cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options) cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
if "sampler_cfg_function" in model_options: if "sampler_cfg_function" in model_options:
args = {"cond": cond, "uncond": uncond, "cond_scale": cond_scale, "timestep": timestep} args = {"cond": cond, "uncond": uncond, "cond_scale": cond_scale, "timestep": timestep}
@ -343,6 +347,17 @@ def ddim_scheduler(model, steps):
sigs += [0.0] sigs += [0.0]
return torch.FloatTensor(sigs) return torch.FloatTensor(sigs)
def sgm_scheduler(model, steps):
sigs = []
timesteps = torch.linspace(model.inner_model.inner_model.num_timesteps - 1, 0, steps + 1)[:-1].type(torch.int)
for x in range(len(timesteps)):
ts = timesteps[x]
if ts > 999:
ts = 999
sigs.append(model.t_to_sigma(torch.tensor(ts)))
sigs += [0.0]
return torch.FloatTensor(sigs)
def blank_inpaint_image_like(latent_image): def blank_inpaint_image_like(latent_image):
blank_image = torch.ones_like(latent_image) blank_image = torch.ones_like(latent_image)
# these are the values for "zero" in pixel space translated to latent space # these are the values for "zero" in pixel space translated to latent space
@ -521,10 +536,10 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type):
class KSampler: class KSampler:
SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
"dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"] "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}): def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
self.model = model self.model = model
@ -566,6 +581,8 @@ class KSampler:
sigmas = simple_scheduler(self.model_wrap, steps) sigmas = simple_scheduler(self.model_wrap, steps)
elif self.scheduler == "ddim_uniform": elif self.scheduler == "ddim_uniform":
sigmas = ddim_scheduler(self.model_wrap, steps) sigmas = ddim_scheduler(self.model_wrap, steps)
elif self.scheduler == "sgm_uniform":
sigmas = sgm_scheduler(self.model_wrap, steps)
else: else:
print("error invalid scheduler", self.scheduler) print("error invalid scheduler", self.scheduler)

View File

@ -72,6 +72,7 @@ def load_lora(lora, to_load):
regular_lora = "{}.lora_up.weight".format(x) regular_lora = "{}.lora_up.weight".format(x)
diffusers_lora = "{}_lora.up.weight".format(x) diffusers_lora = "{}_lora.up.weight".format(x)
transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
A_name = None A_name = None
if regular_lora in lora.keys(): if regular_lora in lora.keys():
@ -82,6 +83,10 @@ def load_lora(lora, to_load):
A_name = diffusers_lora A_name = diffusers_lora
B_name = "{}_lora.down.weight".format(x) B_name = "{}_lora.down.weight".format(x)
mid_name = None mid_name = None
elif transformers_lora in lora.keys():
A_name = transformers_lora
B_name ="{}.lora_linear_layer.down.weight".format(x)
mid_name = None
if A_name is not None: if A_name is not None:
mid = None mid = None
@ -181,20 +186,29 @@ def model_lora_keys_clip(model, key_map={}):
key_map[lora_key] = k key_map[lora_key] = k
lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c])
key_map[lora_key] = k key_map[lora_key] = k
lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
key_map[lora_key] = k
k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
if k in sdk: if k in sdk:
lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
key_map[lora_key] = k key_map[lora_key] = k
clip_l_present = True clip_l_present = True
lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
key_map[lora_key] = k
k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
if k in sdk: if k in sdk:
if clip_l_present: if clip_l_present:
lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
key_map[lora_key] = k
lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
key_map[lora_key] = k
else: else:
lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner
key_map[lora_key] = k key_map[lora_key] = k
lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
key_map[lora_key] = k
return key_map return key_map
@ -209,13 +223,16 @@ def model_lora_keys_unet(model, key_map={}):
diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config) diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config)
for k in diffusers_keys: for k in diffusers_keys:
if k.endswith(".weight"): if k.endswith(".weight"):
unet_key = "diffusion_model.{}".format(diffusers_keys[k])
key_lora = k[:-len(".weight")].replace(".", "_") key_lora = k[:-len(".weight")].replace(".", "_")
key_map["lora_unet_{}".format(key_lora)] = "diffusion_model.{}".format(diffusers_keys[k]) key_map["lora_unet_{}".format(key_lora)] = unet_key
diffusers_lora_key = "unet.{}".format(k[:-len(".weight")].replace(".to_", ".processor.to_")) diffusers_lora_prefix = ["", "unet."]
if diffusers_lora_key.endswith(".to_out.0"): for p in diffusers_lora_prefix:
diffusers_lora_key = diffusers_lora_key[:-2] diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_"))
key_map[diffusers_lora_key] = "diffusion_model.{}".format(diffusers_keys[k]) if diffusers_lora_key.endswith(".to_out.0"):
diffusers_lora_key = diffusers_lora_key[:-2]
key_map[diffusers_lora_key] = unet_key
return key_map return key_map
def set_attr(obj, attr, value): def set_attr(obj, attr, value):
@ -227,7 +244,7 @@ def set_attr(obj, attr, value):
del prev del prev
class ModelPatcher: class ModelPatcher:
def __init__(self, model, load_device, offload_device, size=0): def __init__(self, model, load_device, offload_device, size=0, current_device=None):
self.size = size self.size = size
self.model = model self.model = model
self.patches = {} self.patches = {}
@ -236,6 +253,10 @@ class ModelPatcher:
self.model_size() self.model_size()
self.load_device = load_device self.load_device = load_device
self.offload_device = offload_device self.offload_device = offload_device
if current_device is None:
self.current_device = self.offload_device
else:
self.current_device = current_device
def model_size(self): def model_size(self):
if self.size > 0: if self.size > 0:
@ -250,7 +271,7 @@ class ModelPatcher:
return size return size
def clone(self): def clone(self):
n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size) n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device)
n.patches = {} n.patches = {}
for k in self.patches: for k in self.patches:
n.patches[k] = self.patches[k][:] n.patches[k] = self.patches[k][:]
@ -259,6 +280,11 @@ class ModelPatcher:
n.model_keys = self.model_keys n.model_keys = self.model_keys
return n return n
def is_clone(self, other):
if hasattr(other, 'model') and self.model is other.model:
return True
return False
def set_model_sampler_cfg_function(self, sampler_cfg_function): def set_model_sampler_cfg_function(self, sampler_cfg_function):
if len(inspect.signature(sampler_cfg_function).parameters) == 3: if len(inspect.signature(sampler_cfg_function).parameters) == 3:
self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
@ -373,6 +399,11 @@ class ModelPatcher:
out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype) out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype)
set_attr(self.model, key, out_weight) set_attr(self.model, key, out_weight)
del temp_weight del temp_weight
if device_to is not None:
self.model.to(device_to)
self.current_device = device_to
return self.model return self.model
def calculate_weight(self, patches, weight, key): def calculate_weight(self, patches, weight, key):
@ -465,7 +496,7 @@ class ModelPatcher:
return weight return weight
def unpatch_model(self): def unpatch_model(self, device_to=None):
keys = list(self.backup.keys()) keys = list(self.backup.keys())
for k in keys: for k in keys:
@ -473,6 +504,11 @@ class ModelPatcher:
self.backup = {} self.backup = {}
if device_to is not None:
self.model.to(device_to)
self.current_device = device_to
def load_lora_for_models(model, clip, lora, strength_model, strength_clip): def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
key_map = model_lora_keys_unet(model.model) key_map = model_lora_keys_unet(model.model)
key_map = model_lora_keys_clip(clip.cond_stage_model, key_map) key_map = model_lora_keys_clip(clip.cond_stage_model, key_map)
@ -538,7 +574,7 @@ class CLIP:
else: else:
self.cond_stage_model.reset_clip_layer() self.cond_stage_model.reset_clip_layer()
model_management.load_model_gpu(self.patcher) self.load_model()
cond, pooled = self.cond_stage_model.encode_token_weights(tokens) cond, pooled = self.cond_stage_model.encode_token_weights(tokens)
if return_pooled: if return_pooled:
return cond, pooled return cond, pooled
@ -554,11 +590,9 @@ class CLIP:
def get_sd(self): def get_sd(self):
return self.cond_stage_model.state_dict() return self.cond_stage_model.state_dict()
def patch_model(self): def load_model(self):
self.patcher.patch_model() model_management.load_model_gpu(self.patcher)
return self.patcher
def unpatch_model(self):
self.patcher.unpatch_model()
def get_key_patches(self): def get_key_patches(self):
return self.patcher.get_key_patches() return self.patcher.get_key_patches()
@ -613,11 +647,12 @@ class VAE:
return samples return samples
def decode(self, samples_in): def decode(self, samples_in):
model_management.unload_model()
self.first_stage_model = self.first_stage_model.to(self.device) self.first_stage_model = self.first_stage_model.to(self.device)
try: try:
memory_used = (2562 * samples_in.shape[2] * samples_in.shape[3] * 64) * 1.4
model_management.free_memory(memory_used, self.device)
free_memory = model_management.get_free_memory(self.device) free_memory = model_management.get_free_memory(self.device)
batch_number = int((free_memory * 0.7) / (2562 * samples_in.shape[2] * samples_in.shape[3] * 64)) batch_number = int(free_memory / memory_used)
batch_number = max(1, batch_number) batch_number = max(1, batch_number)
pixel_samples = torch.empty((samples_in.shape[0], 3, round(samples_in.shape[2] * 8), round(samples_in.shape[3] * 8)), device="cpu") pixel_samples = torch.empty((samples_in.shape[0], 3, round(samples_in.shape[2] * 8), round(samples_in.shape[3] * 8)), device="cpu")
@ -633,19 +668,19 @@ class VAE:
return pixel_samples return pixel_samples
def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16): def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16):
model_management.unload_model()
self.first_stage_model = self.first_stage_model.to(self.device) self.first_stage_model = self.first_stage_model.to(self.device)
output = self.decode_tiled_(samples, tile_x, tile_y, overlap) output = self.decode_tiled_(samples, tile_x, tile_y, overlap)
self.first_stage_model = self.first_stage_model.to(self.offload_device) self.first_stage_model = self.first_stage_model.to(self.offload_device)
return output.movedim(1,-1) return output.movedim(1,-1)
def encode(self, pixel_samples): def encode(self, pixel_samples):
model_management.unload_model()
self.first_stage_model = self.first_stage_model.to(self.device) self.first_stage_model = self.first_stage_model.to(self.device)
pixel_samples = pixel_samples.movedim(-1,1) pixel_samples = pixel_samples.movedim(-1,1)
try: try:
memory_used = (2078 * pixel_samples.shape[2] * pixel_samples.shape[3]) * 1.4 #NOTE: this constant along with the one in the decode above are estimated from the mem usage for the VAE and could change.
model_management.free_memory(memory_used, self.device)
free_memory = model_management.get_free_memory(self.device) free_memory = model_management.get_free_memory(self.device)
batch_number = int((free_memory * 0.7) / (2078 * pixel_samples.shape[2] * pixel_samples.shape[3])) #NOTE: this constant along with the one in the decode above are estimated from the mem usage for the VAE and could change. batch_number = int(free_memory / memory_used)
batch_number = max(1, batch_number) batch_number = max(1, batch_number)
samples = torch.empty((pixel_samples.shape[0], 4, round(pixel_samples.shape[2] // 8), round(pixel_samples.shape[3] // 8)), device="cpu") samples = torch.empty((pixel_samples.shape[0], 4, round(pixel_samples.shape[2] // 8), round(pixel_samples.shape[3] // 8)), device="cpu")
for x in range(0, pixel_samples.shape[0], batch_number): for x in range(0, pixel_samples.shape[0], batch_number):
@ -660,7 +695,6 @@ class VAE:
return samples return samples
def encode_tiled(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64): def encode_tiled(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
model_management.unload_model()
self.first_stage_model = self.first_stage_model.to(self.device) self.first_stage_model = self.first_stage_model.to(self.device)
pixel_samples = pixel_samples.movedim(-1,1) pixel_samples = pixel_samples.movedim(-1,1)
samples = self.encode_tiled_(pixel_samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap) samples = self.encode_tiled_(pixel_samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap)
@ -740,6 +774,7 @@ class ControlNet(ControlBase):
def __init__(self, control_model, global_average_pooling=False, device=None): def __init__(self, control_model, global_average_pooling=False, device=None):
super().__init__(device) super().__init__(device)
self.control_model = control_model self.control_model = control_model
self.control_model_wrapped = ModelPatcher(self.control_model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
self.global_average_pooling = global_average_pooling self.global_average_pooling = global_average_pooling
def get_control(self, x_noisy, t, cond, batched_number): def get_control(self, x_noisy, t, cond, batched_number):
@ -769,11 +804,9 @@ class ControlNet(ControlBase):
precision_scope = contextlib.nullcontext precision_scope = contextlib.nullcontext
with precision_scope(model_management.get_autocast_device(self.device)): with precision_scope(model_management.get_autocast_device(self.device)):
self.control_model = model_management.load_if_low_vram(self.control_model)
context = torch.cat(cond['c_crossattn'], 1) context = torch.cat(cond['c_crossattn'], 1)
y = cond.get('c_adm', None) y = cond.get('c_adm', None)
control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=context, y=y) control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=context, y=y)
self.control_model = model_management.unload_if_low_vram(self.control_model)
out = {'middle':[], 'output': []} out = {'middle':[], 'output': []}
autocast_enabled = torch.is_autocast_enabled() autocast_enabled = torch.is_autocast_enabled()
@ -808,7 +841,7 @@ class ControlNet(ControlBase):
def get_models(self): def get_models(self):
out = super().get_models() out = super().get_models()
out.append(self.control_model) out.append(self.control_model_wrapped)
return out return out
@ -818,7 +851,7 @@ def load_controlnet(ckpt_path, model=None):
controlnet_config = None controlnet_config = None
if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
use_fp16 = model_management.should_use_fp16() use_fp16 = model_management.should_use_fp16()
controlnet_config = model_detection.model_config_from_diffusers_unet(controlnet_data, use_fp16).unet_config controlnet_config = model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16)
diffusers_keys = utils.unet_to_diffusers(controlnet_config) diffusers_keys = utils.unet_to_diffusers(controlnet_config)
diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight" diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias" diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
@ -857,6 +890,9 @@ def load_controlnet(ckpt_path, model=None):
if k in controlnet_data: if k in controlnet_data:
new_sd[diffusers_keys[k]] = controlnet_data.pop(k) new_sd[diffusers_keys[k]] = controlnet_data.pop(k)
leftover_keys = controlnet_data.keys()
if len(leftover_keys) > 0:
print("leftover keys:", leftover_keys)
controlnet_data = new_sd controlnet_data = new_sd
pth_key = 'control_model.zero_convs.0.0.weight' pth_key = 'control_model.zero_convs.0.0.weight'
@ -884,8 +920,8 @@ def load_controlnet(ckpt_path, model=None):
if pth: if pth:
if 'difference' in controlnet_data: if 'difference' in controlnet_data:
if model is not None: if model is not None:
m = model.patch_model() model_management.load_models_gpu([model])
model_sd = m.state_dict() model_sd = model.model_state_dict()
for x in controlnet_data: for x in controlnet_data:
c_m = "control_model." c_m = "control_model."
if x.startswith(c_m): if x.startswith(c_m):
@ -893,7 +929,6 @@ def load_controlnet(ckpt_path, model=None):
if sd_key in model_sd: if sd_key in model_sd:
cd = controlnet_data[x] cd = controlnet_data[x]
cd += model_sd[sd_key].type(cd.dtype).to(cd.device) cd += model_sd[sd_key].type(cd.dtype).to(cd.device)
model.unpatch_model()
else: else:
print("WARNING: Loaded a diff controlnet without a model. It will very likely not work.") print("WARNING: Loaded a diff controlnet without a model. It will very likely not work.")
@ -984,7 +1019,6 @@ class T2IAdapter(ControlBase):
self.copy_to(c) self.copy_to(c)
return c return c
def load_t2i_adapter(t2i_data): def load_t2i_adapter(t2i_data):
keys = t2i_data.keys() keys = t2i_data.keys()
if 'adapter' in keys: if 'adapter' in keys:
@ -1070,7 +1104,7 @@ def load_gligen(ckpt_path):
model = gligen.load_gligen(data) model = gligen.load_gligen(data)
if model_management.should_use_fp16(): if model_management.should_use_fp16():
model = model.half() model = model.half()
return model return ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None): def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
#TODO: this function is a mess and should be removed eventually #TODO: this function is a mess and should be removed eventually
@ -1182,8 +1216,13 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
if output_clipvision: if output_clipvision:
clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True) clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True)
dtype = torch.float32
if fp16:
dtype = torch.float16
inital_load_device = model_management.unet_inital_load_device(parameters, dtype)
offload_device = model_management.unet_offload_device() offload_device = model_management.unet_offload_device()
model = model_config.get_model(sd, "model.diffusion_model.", device=offload_device) model = model_config.get_model(sd, "model.diffusion_model.", device=inital_load_device)
model.load_model_weights(sd, "model.diffusion_model.") model.load_model_weights(sd, "model.diffusion_model.")
if output_vae: if output_vae:
@ -1204,7 +1243,12 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
if len(left_over) > 0: if len(left_over) > 0:
print("left over keys:", left_over) print("left over keys:", left_over)
return (ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae, clipvision) model_patcher = ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
if inital_load_device != torch.device("cpu"):
print("loaded straight to GPU")
model_management.load_model_gpu(model_patcher)
return (model_patcher, clip, vae, clipvision)
def load_unet(unet_path): #load unet in diffusers format def load_unet(unet_path): #load unet in diffusers format
@ -1232,14 +1276,6 @@ def load_unet(unet_path): #load unet in diffusers format
return ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device) return ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device)
def save_checkpoint(output_path, model, clip, vae, metadata=None): def save_checkpoint(output_path, model, clip, vae, metadata=None):
try: model_management.load_models_gpu([model, clip.load_model()])
model.patch_model() sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
clip.patch_model() utils.save_torch_file(sd, output_path, metadata=metadata)
sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
utils.save_torch_file(sd, output_path, metadata=metadata)
model.unpatch_model()
clip.unpatch_model()
except Exception as e:
model.unpatch_model()
clip.unpatch_model()
raise e

View File

@ -1,7 +1,40 @@
import numpy as np
from scipy.ndimage import grey_dilation
import torch import torch
from nodes import MAX_RESOLUTION from nodes import MAX_RESOLUTION
def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
if resize_source:
source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
left, top = (x // multiplier, y // multiplier)
right, bottom = (left + source.shape[3], top + source.shape[2],)
if mask is None:
mask = torch.ones_like(source)
else:
mask = mask.clone()
mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
# calculate the bounds of the source that will be overlapping the destination
# this prevents the source trying to overwrite latent pixels that are out of bounds
# of the destination
visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
mask = mask[:, :, :visible_height, :visible_width]
inverse_mask = torch.ones_like(mask) - mask
source_portion = mask * source[:, :, :visible_height, :visible_width]
destination_portion = inverse_mask * destination[:, :, top:bottom, left:right]
destination[:, :, top:bottom, left:right] = source_portion + destination_portion
return destination
class LatentCompositeMasked: class LatentCompositeMasked:
@classmethod @classmethod
def INPUT_TYPES(s): def INPUT_TYPES(s):
@ -11,6 +44,7 @@ class LatentCompositeMasked:
"source": ("LATENT",), "source": ("LATENT",),
"x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
"y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
"resize_source": ("BOOLEAN", {"default": False}),
}, },
"optional": { "optional": {
"mask": ("MASK",), "mask": ("MASK",),
@ -21,40 +55,36 @@ class LatentCompositeMasked:
CATEGORY = "latent" CATEGORY = "latent"
def composite(self, destination, source, x, y, mask = None): def composite(self, destination, source, x, y, resize_source, mask = None):
output = destination.copy() output = destination.copy()
destination = destination["samples"].clone() destination = destination["samples"].clone()
source = source["samples"] source = source["samples"]
output["samples"] = composite(destination, source, x, y, mask, 8, resize_source)
return (output,)
x = max(-source.shape[3] * 8, min(x, destination.shape[3] * 8)) class ImageCompositeMasked:
y = max(-source.shape[2] * 8, min(y, destination.shape[2] * 8)) @classmethod
def INPUT_TYPES(s):
return {
"required": {
"destination": ("IMAGE",),
"source": ("IMAGE",),
"x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
"y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
"resize_source": ("BOOLEAN", {"default": False}),
},
"optional": {
"mask": ("MASK",),
}
}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "composite"
left, top = (x // 8, y // 8) CATEGORY = "image"
right, bottom = (left + source.shape[3], top + source.shape[2],)
if mask is None:
mask = torch.ones_like(source)
else:
mask = mask.clone()
mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
# calculate the bounds of the source that will be overlapping the destination
# this prevents the source trying to overwrite latent pixels that are out of bounds
# of the destination
visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
mask = mask[:, :, :visible_height, :visible_width]
inverse_mask = torch.ones_like(mask) - mask
source_portion = mask * source[:, :, :visible_height, :visible_width]
destination_portion = inverse_mask * destination[:, :, top:bottom, left:right]
destination[:, :, top:bottom, left:right] = source_portion + destination_portion
output["samples"] = destination
def composite(self, destination, source, x, y, resize_source, mask = None):
destination = destination.clone().movedim(-1, 1)
output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
return (output,) return (output,)
class MaskToImage: class MaskToImage:
@ -249,10 +279,40 @@ class FeatherMask:
return (output,) return (output,)
class GrowMask:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"mask": ("MASK",),
"expand": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
"tapered_corners": ("BOOLEAN", {"default": True}),
},
}
CATEGORY = "mask"
RETURN_TYPES = ("MASK",)
FUNCTION = "expand_mask"
def expand_mask(self, mask, expand, tapered_corners):
c = 0 if tapered_corners else 1
kernel = np.array([[c, 1, c],
[1, 1, 1],
[c, 1, c]])
output = mask.numpy().copy()
while expand > 0:
output = grey_dilation(output, footprint=kernel)
expand -= 1
output = torch.from_numpy(output)
return (output,)
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"LatentCompositeMasked": LatentCompositeMasked, "LatentCompositeMasked": LatentCompositeMasked,
"ImageCompositeMasked": ImageCompositeMasked,
"MaskToImage": MaskToImage, "MaskToImage": MaskToImage,
"ImageToMask": ImageToMask, "ImageToMask": ImageToMask,
"SolidMask": SolidMask, "SolidMask": SolidMask,
@ -260,6 +320,7 @@ NODE_CLASS_MAPPINGS = {
"CropMask": CropMask, "CropMask": CropMask,
"MaskComposite": MaskComposite, "MaskComposite": MaskComposite,
"FeatherMask": FeatherMask, "FeatherMask": FeatherMask,
"GrowMask": GrowMask,
} }
NODE_DISPLAY_NAME_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = {

View File

@ -59,8 +59,8 @@ class Blend:
def g(self, x): def g(self, x):
return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x)) return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x))
def gaussian_kernel(kernel_size: int, sigma: float): def gaussian_kernel(kernel_size: int, sigma: float, device=None):
x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size), torch.linspace(-1, 1, kernel_size), indexing="ij") x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
d = torch.sqrt(x * x + y * y) d = torch.sqrt(x * x + y * y)
g = torch.exp(-(d * d) / (2.0 * sigma * sigma)) g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
return g / g.sum() return g / g.sum()
@ -101,7 +101,7 @@ class Blur:
batch_size, height, width, channels = image.shape batch_size, height, width, channels = image.shape
kernel_size = blur_radius * 2 + 1 kernel_size = blur_radius * 2 + 1
kernel = gaussian_kernel(kernel_size, sigma).repeat(channels, 1, 1).unsqueeze(1) kernel = gaussian_kernel(kernel_size, sigma, device=image.device).repeat(channels, 1, 1).unsqueeze(1)
image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C) image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C)
padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect') padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect')

View File

@ -36,13 +36,15 @@ def get_gpu_names():
else: else:
return set() return set()
def cuda_malloc_supported(): blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
"GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
"Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000",
"Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M",
"GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"} "GeForce GTX 1650", "GeForce GTX 1630"
}
def cuda_malloc_supported():
try: try:
names = get_gpu_names() names = get_gpu_names()
except: except:

View File

@ -51,9 +51,10 @@ class Example:
"default": 0, "default": 0,
"min": 0, #Minimum value "min": 0, #Minimum value
"max": 4096, #Maximum value "max": 4096, #Maximum value
"step": 64 #Slider's step "step": 64, #Slider's step
"display": "number" # Cosmetic only: display as "number" or "slider"
}), }),
"float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), "float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "display": "number"}),
"print_to_screen": (["enable", "disable"],), "print_to_screen": (["enable", "disable"],),
"string_field": ("STRING", { "string_field": ("STRING", {
"multiline": False, #True if you want the field to look like the one on the ClipTextEncode node "multiline": False, #True if you want the field to look like the one on the ClipTextEncode node

View File

@ -354,6 +354,7 @@ class PromptExecutor:
d = self.outputs_ui.pop(x) d = self.outputs_ui.pop(x)
del d del d
comfy.model_management.cleanup_models()
if self.server.client_id is not None: if self.server.client_id is not None:
self.server.send_sync("execution_cached", { "nodes": list(current_outputs) , "prompt_id": prompt_id}, self.server.client_id) self.server.send_sync("execution_cached", { "nodes": list(current_outputs) , "prompt_id": prompt_id}, self.server.client_id)
executed = set() executed = set()

View File

@ -43,6 +43,10 @@ def set_output_directory(output_dir):
global output_directory global output_directory
output_directory = output_dir output_directory = output_dir
def set_temp_directory(temp_dir):
global temp_directory
temp_directory = temp_dir
def get_output_directory(): def get_output_directory():
global output_directory global output_directory
return output_directory return output_directory
@ -111,6 +115,8 @@ def add_model_folder_path(folder_name, full_folder_path):
global folder_names_and_paths global folder_names_and_paths
if folder_name in folder_names_and_paths: if folder_name in folder_names_and_paths:
folder_names_and_paths[folder_name][0].append(full_folder_path) folder_names_and_paths[folder_name][0].append(full_folder_path)
else:
folder_names_and_paths[folder_name] = ([full_folder_path], set())
def get_folder_paths(folder_name): def get_folder_paths(folder_name):
return folder_names_and_paths[folder_name][0][:] return folder_names_and_paths[folder_name][0][:]

20
main.py
View File

@ -72,6 +72,17 @@ from server import BinaryEventTypes
from nodes import init_custom_nodes from nodes import init_custom_nodes
import comfy.model_management import comfy.model_management
def cuda_malloc_warning():
device = comfy.model_management.get_torch_device()
device_name = comfy.model_management.get_torch_device_name(device)
cuda_malloc_warning = False
if "cudaMallocAsync" in device_name:
for b in cuda_malloc.blacklist:
if b in device_name:
cuda_malloc_warning = True
if cuda_malloc_warning:
print("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n")
def prompt_worker(q, server): def prompt_worker(q, server):
e = execution.PromptExecutor(server) e = execution.PromptExecutor(server)
while True: while True:
@ -100,7 +111,7 @@ def hijack_progress(server):
def cleanup_temp(): def cleanup_temp():
temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp") temp_dir = folder_paths.get_temp_directory()
if os.path.exists(temp_dir): if os.path.exists(temp_dir):
shutil.rmtree(temp_dir, ignore_errors=True) shutil.rmtree(temp_dir, ignore_errors=True)
@ -127,6 +138,10 @@ def load_extra_path_config(yaml_path):
if __name__ == "__main__": if __name__ == "__main__":
if args.temp_directory:
temp_dir = os.path.join(os.path.abspath(args.temp_directory), "temp")
print(f"Setting temp directory to: {temp_dir}")
folder_paths.set_temp_directory(temp_dir)
cleanup_temp() cleanup_temp()
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
@ -143,6 +158,9 @@ if __name__ == "__main__":
load_extra_path_config(config_path) load_extra_path_config(config_path)
init_custom_nodes() init_custom_nodes()
cuda_malloc_warning()
server.add_routes() server.add_routes()
hijack_progress(server) hijack_progress(server)

View File

@ -771,7 +771,7 @@ class StyleModelApply:
CATEGORY = "conditioning/style_model" CATEGORY = "conditioning/style_model"
def apply_stylemodel(self, clip_vision_output, style_model, conditioning): def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
cond = style_model.get_cond(clip_vision_output) cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
c = [] c = []
for t in conditioning: for t in conditioning:
n = [torch.cat((t[0], cond), dim=1), t[1].copy()] n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
@ -1448,6 +1448,44 @@ class ImageInvert:
s = 1.0 - image s = 1.0 - image
return (s,) return (s,)
class ImageBatch:
@classmethod
def INPUT_TYPES(s):
return {"required": { "image1": ("IMAGE",), "image2": ("IMAGE",)}}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "batch"
CATEGORY = "image"
def batch(self, image1, image2):
if image1.shape[1:] != image2.shape[1:]:
image2 = comfy.utils.common_upscale(image2.movedim(-1,1), image1.shape[2], image1.shape[1], "bilinear", "center").movedim(1,-1)
s = torch.cat((image1, image2), dim=0)
return (s,)
class EmptyImage:
def __init__(self, device="cpu"):
self.device = device
@classmethod
def INPUT_TYPES(s):
return {"required": { "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
"batch_size": ("INT", {"default": 1, "min": 1, "max": 64}),
"color": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFF, "step": 1, "display": "color"}),
}}
RETURN_TYPES = ("IMAGE",)
FUNCTION = "generate"
CATEGORY = "image"
def generate(self, width, height, batch_size=1, color=0):
r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF)
g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF)
b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF)
return (torch.cat((r, g, b), dim=-1), )
class ImagePadForOutpaint: class ImagePadForOutpaint:
@ -1533,7 +1571,9 @@ NODE_CLASS_MAPPINGS = {
"ImageScale": ImageScale, "ImageScale": ImageScale,
"ImageScaleBy": ImageScaleBy, "ImageScaleBy": ImageScaleBy,
"ImageInvert": ImageInvert, "ImageInvert": ImageInvert,
"ImageBatch": ImageBatch,
"ImagePadForOutpaint": ImagePadForOutpaint, "ImagePadForOutpaint": ImagePadForOutpaint,
"EmptyImage": EmptyImage,
"ConditioningAverage ": ConditioningAverage , "ConditioningAverage ": ConditioningAverage ,
"ConditioningCombine": ConditioningCombine, "ConditioningCombine": ConditioningCombine,
"ConditioningConcat": ConditioningConcat, "ConditioningConcat": ConditioningConcat,
@ -1627,6 +1667,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
"ImageUpscaleWithModel": "Upscale Image (using Model)", "ImageUpscaleWithModel": "Upscale Image (using Model)",
"ImageInvert": "Invert Image", "ImageInvert": "Invert Image",
"ImagePadForOutpaint": "Pad Image for Outpainting", "ImagePadForOutpaint": "Pad Image for Outpainting",
"ImageBatch": "Batch Images",
# _for_testing # _for_testing
"VAEDecodeTiled": "VAE Decode (Tiled)", "VAEDecodeTiled": "VAE Decode (Tiled)",
"VAEEncodeTiled": "VAE Encode (Tiled)", "VAEEncodeTiled": "VAE Encode (Tiled)",

View File

@ -154,3 +154,9 @@ wldhx.yadisk-direct==0.0.6
wrapt==1.15.0 wrapt==1.15.0
yarl==1.9.2 yarl==1.9.2
zipp==3.16.2 zipp==3.16.2
pyyaml
Pillow
scipy
tqdm
psutil

View File

@ -1,4 +1,4 @@
import { app } from "/scripts/app.js"; import { app } from "../../scripts/app.js";
const id = "Comfy.LinkRenderMode"; const id = "Comfy.LinkRenderMode";
const ext = { const ext = {

View File

@ -9766,6 +9766,7 @@ LGraphNode.prototype.executeAction = function(action)
switch (w.type) { switch (w.type) {
case "button": case "button":
ctx.fillStyle = background_color;
if (w.clicked) { if (w.clicked) {
ctx.fillStyle = "#AAA"; ctx.fillStyle = "#AAA";
w.clicked = false; w.clicked = false;

View File

@ -284,6 +284,11 @@ export class ComfyApp {
} }
} }
options.push({
content: "Bypass",
callback: (obj) => { if (this.mode === 4) this.mode = 0; else this.mode = 4; this.graph.change(); }
});
// prevent conflict of clipspace content // prevent conflict of clipspace content
if(!ComfyApp.clipspace_return_node) { if(!ComfyApp.clipspace_return_node) {
options.push({ options.push({

View File

@ -433,7 +433,7 @@ export const ComfyWidgets = {
// Add handler to check if an image is being dragged over our node // Add handler to check if an image is being dragged over our node
node.onDragOver = function (e) { node.onDragOver = function (e) {
if (e.dataTransfer && e.dataTransfer.items) { if (e.dataTransfer && e.dataTransfer.items) {
const image = [...e.dataTransfer.items].find((f) => f.kind === "file" && f.type.startsWith("image/")); const image = [...e.dataTransfer.items].find((f) => f.kind === "file");
return !!image; return !!image;
} }