diff --git a/.ci/nightly/update_windows/update.py b/.ci/nightly/update_windows/update.py deleted file mode 100755 index c09f29a80..000000000 --- a/.ci/nightly/update_windows/update.py +++ /dev/null @@ -1,65 +0,0 @@ -import pygit2 -from datetime import datetime -import sys - -def pull(repo, remote_name='origin', branch='master'): - for remote in repo.remotes: - if remote.name == remote_name: - remote.fetch() - remote_master_id = repo.lookup_reference('refs/remotes/origin/%s' % (branch)).target - merge_result, _ = repo.merge_analysis(remote_master_id) - # Up to date, do nothing - if merge_result & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE: - return - # We can just fastforward - elif merge_result & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD: - repo.checkout_tree(repo.get(remote_master_id)) - try: - master_ref = repo.lookup_reference('refs/heads/%s' % (branch)) - master_ref.set_target(remote_master_id) - except KeyError: - repo.create_branch(branch, repo.get(remote_master_id)) - repo.head.set_target(remote_master_id) - elif merge_result & pygit2.GIT_MERGE_ANALYSIS_NORMAL: - repo.merge(remote_master_id) - - if repo.index.conflicts is not None: - for conflict in repo.index.conflicts: - print('Conflicts found in:', conflict[0].path) - raise AssertionError('Conflicts, ahhhhh!!') - - user = repo.default_signature - tree = repo.index.write_tree() - commit = repo.create_commit('HEAD', - user, - user, - 'Merge!', - tree, - [repo.head.target, remote_master_id]) - # We need to do this or git CLI will think we are still merging. - repo.state_cleanup() - else: - raise AssertionError('Unknown merge analysis result') - - -repo = pygit2.Repository(str(sys.argv[1])) -ident = pygit2.Signature('comfyui', 'comfy@ui') -try: - print("stashing current changes") - repo.stash(ident) -except KeyError: - print("nothing to stash") -backup_branch_name = 'backup_branch_{}'.format(datetime.today().strftime('%Y-%m-%d_%H_%M_%S')) -print("creating backup branch: {}".format(backup_branch_name)) -repo.branches.local.create(backup_branch_name, repo.head.peel()) - -print("checking out master branch") -branch = repo.lookup_branch('master') -ref = repo.lookup_reference(branch.name) -repo.checkout(ref) - -print("pulling latest changes") -pull(repo) - -print("Done!") - diff --git a/.ci/nightly/update_windows/update_comfyui.bat b/.ci/nightly/update_windows/update_comfyui.bat deleted file mode 100755 index 60d1e694f..000000000 --- a/.ci/nightly/update_windows/update_comfyui.bat +++ /dev/null @@ -1,2 +0,0 @@ -..\python_embeded\python.exe .\update.py ..\ComfyUI\ -pause diff --git a/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat b/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat index c5e0c6be7..94f5d1023 100755 --- a/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat +++ b/.ci/nightly/update_windows/update_comfyui_and_python_dependencies.bat @@ -1,3 +1,3 @@ ..\python_embeded\python.exe .\update.py ..\ComfyUI\ -..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 -r ../ComfyUI/requirements.txt pygit2 +..\python_embeded\python.exe -s -m pip install --upgrade --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -r ../ComfyUI/requirements.txt pygit2 pause diff --git a/.ci/nightly/windows_base_files/README_VERY_IMPORTANT.txt b/.ci/nightly/windows_base_files/README_VERY_IMPORTANT.txt deleted file mode 100755 index 656b9db43..000000000 --- a/.ci/nightly/windows_base_files/README_VERY_IMPORTANT.txt +++ /dev/null @@ -1,27 +0,0 @@ -HOW TO RUN: - -if you have a NVIDIA gpu: - -run_nvidia_gpu.bat - - - -To run it in slow CPU mode: - -run_cpu.bat - - - -IF YOU GET A RED ERROR IN THE UI MAKE SURE YOU HAVE A MODEL/CHECKPOINT IN: ComfyUI\models\checkpoints - -You can download the stable diffusion 1.5 one from: https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt - - - -RECOMMENDED WAY TO UPDATE: -To update the ComfyUI code: update\update_comfyui.bat - - - -To update ComfyUI with the python dependencies: -update\update_comfyui_and_python_dependencies.bat diff --git a/.ci/nightly/windows_base_files/run_cpu.bat b/.ci/nightly/windows_base_files/run_cpu.bat deleted file mode 100755 index c3ba41721..000000000 --- a/.ci/nightly/windows_base_files/run_cpu.bat +++ /dev/null @@ -1,2 +0,0 @@ -.\python_embeded\python.exe -s ComfyUI\main.py --cpu --windows-standalone-build -pause diff --git a/.github/workflows/windows_release_cu118_dependencies_2.yml b/.github/workflows/windows_release_cu118_dependencies_2.yml index a88449527..42adee9e7 100644 --- a/.github/workflows/windows_release_cu118_dependencies_2.yml +++ b/.github/workflows/windows_release_cu118_dependencies_2.yml @@ -17,7 +17,7 @@ jobs: - shell: bash run: | - python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers==0.0.19.dev516 --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir + python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir python -m pip install --no-cache-dir ./temp_wheel_dir/* echo installed basic ls -lah temp_wheel_dir diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index 291d754e3..b6a18ec0a 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -19,21 +19,21 @@ jobs: fetch-depth: 0 - uses: actions/setup-python@v4 with: - python-version: '3.10.9' + python-version: '3.11.3' - shell: bash run: | cd .. cp -r ComfyUI ComfyUI_copy - curl https://www.python.org/ftp/python/3.10.9/python-3.10.9-embed-amd64.zip -o python_embeded.zip + curl https://www.python.org/ftp/python/3.11.3/python-3.11.3-embed-amd64.zip -o python_embeded.zip unzip python_embeded.zip -d python_embeded cd python_embeded - echo 'import site' >> ./python310._pth + echo 'import site' >> ./python311._pth curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py ./python.exe get-pip.py - python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu118 -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir + python -m pip wheel torch torchvision torchaudio --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu121 -r ../ComfyUI/requirements.txt pygit2 -w ../temp_wheel_dir ls ../temp_wheel_dir ./python.exe -s -m pip install --pre ../temp_wheel_dir/* - sed -i '1i../ComfyUI' ./python310._pth + sed -i '1i../ComfyUI' ./python311._pth cd .. @@ -46,6 +46,8 @@ jobs: mkdir update cp -r ComfyUI/.ci/update_windows/* ./update/ cp -r ComfyUI/.ci/windows_base_files/* ./ + cp -r ComfyUI/.ci/nightly/update_windows/* ./update/ + cp -r ComfyUI/.ci/nightly/windows_base_files/* ./ cd .. diff --git a/README.md b/README.md index bf16006bf..3b3824714 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ A powerful and modular stable diffusion GUI and backend. This ui will let you design and execute advanced stable diffusion pipelines using a graph/nodes/flowchart based interface. For some workflow examples and see what ComfyUI can do you can check out: ### [ComfyUI Examples](https://comfyanonymous.github.io/ComfyUI_examples/) +### [Installing ComfyUI](#installing) + ## Features - Nodes/graph/flowchart interface to experiment and create complex Stable Diffusion workflows without needing to code anything. - Fully supports SD1.x and SD2.x @@ -17,6 +19,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin - Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models. - Embeddings/Textual inversion - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/) +- [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/) - Loading full workflows (with seeds) from generated PNG files. - Saving/Loading workflows as Json files. - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones. diff --git a/comfy/cldm/cldm.py b/comfy/cldm/cldm.py index c60abf80b..cb660ee77 100644 --- a/comfy/cldm/cldm.py +++ b/comfy/cldm/cldm.py @@ -5,17 +5,17 @@ import torch import torch as th import torch.nn as nn -from ldm.modules.diffusionmodules.util import ( +from ..ldm.modules.diffusionmodules.util import ( conv_nd, linear, zero_module, timestep_embedding, ) -from ldm.modules.attention import SpatialTransformer -from ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample, AttentionBlock -from ldm.models.diffusion.ddpm import LatentDiffusion -from ldm.util import log_txt_as_img, exists, instantiate_from_config +from ..ldm.modules.attention import SpatialTransformer +from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample, AttentionBlock +from ..ldm.models.diffusion.ddpm import LatentDiffusion +from ..ldm.util import log_txt_as_img, exists, instantiate_from_config class ControlledUnetModel(UNetModel): diff --git a/comfy/cli_args.py b/comfy/cli_args.py index b24054ce0..764427165 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -10,6 +10,7 @@ parser.add_argument("--output-directory", type=str, default=None, help="Set the parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).") +parser.add_argument("--directml", type=int, nargs="?", metavar="DIRECTML_DEVICE", const=-1, help="Use torch-directml.") attn_group = parser.add_mutually_exclusive_group() attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") diff --git a/comfy/extra_samplers/uni_pc.py b/comfy/extra_samplers/uni_pc.py index e96cfc93a..2ff10caf1 100644 --- a/comfy/extra_samplers/uni_pc.py +++ b/comfy/extra_samplers/uni_pc.py @@ -712,7 +712,7 @@ class UniPC: def sample(self, x, timesteps, t_start=None, t_end=None, order=3, skip_type='time_uniform', method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver', - atol=0.0078, rtol=0.05, corrector=False, + atol=0.0078, rtol=0.05, corrector=False, callback=None, disable_pbar=False ): t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end t_T = self.noise_schedule.T if t_start is None else t_start @@ -723,7 +723,7 @@ class UniPC: # timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) assert timesteps.shape[0] - 1 == steps # with torch.no_grad(): - for step_index in trange(steps): + for step_index in trange(steps, disable=disable_pbar): if self.noise_mask is not None: x = x * self.noise_mask + (1. - self.noise_mask) * (self.masked_image * self.noise_schedule.marginal_alpha(timesteps[step_index]) + self.noise * self.noise_schedule.marginal_std(timesteps[step_index])) if step_index == 0: @@ -766,6 +766,8 @@ class UniPC: if model_x is None: model_x = self.model_fn(x, vec_t) model_prev_list[-1] = model_x + if callback is not None: + callback(step_index, model_prev_list[-1], x, steps) else: raise NotImplementedError() if denoise_to_zero: @@ -833,7 +835,7 @@ def expand_dims(v, dims): -def sample_unipc(model, noise, image, sigmas, sampling_function, max_denoise, extra_args=None, callback=None, disable=None, noise_mask=None, variant='bh1'): +def sample_unipc(model, noise, image, sigmas, sampling_function, max_denoise, extra_args=None, callback=None, disable=False, noise_mask=None, variant='bh1'): to_zero = False if sigmas[-1] == 0: timesteps = torch.nn.functional.interpolate(sigmas[None,None,:-1], size=(len(sigmas),), mode='linear')[0][0] @@ -877,7 +879,7 @@ def sample_unipc(model, noise, image, sigmas, sampling_function, max_denoise, ex order = min(3, len(timesteps) - 1) uni_pc = UniPC(model_fn, ns, predict_x0=True, thresholding=False, noise_mask=noise_mask, masked_image=image, noise=noise, variant=variant) - x = uni_pc.sample(img, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True) + x = uni_pc.sample(img, timesteps=timesteps, skip_type="time_uniform", method="multistep", order=order, lower_order_final=True, callback=callback, disable_pbar=disable) if not to_zero: x /= ns.marginal_alpha(timesteps[-1]) return x diff --git a/comfy/gligen.py b/comfy/gligen.py index 8770383e5..45b674503 100644 --- a/comfy/gligen.py +++ b/comfy/gligen.py @@ -1,6 +1,6 @@ import torch from torch import nn, einsum -from ldm.modules.attention import CrossAttention +from .ldm.modules.attention import CrossAttention from inspect import isfunction diff --git a/comfy/ldm/models/autoencoder.py b/comfy/ldm/models/autoencoder.py index bd698621c..1fb7ed879 100644 --- a/comfy/ldm/models/autoencoder.py +++ b/comfy/ldm/models/autoencoder.py @@ -3,11 +3,11 @@ import torch import torch.nn.functional as F from contextlib import contextmanager -from ldm.modules.diffusionmodules.model import Encoder, Decoder -from ldm.modules.distributions.distributions import DiagonalGaussianDistribution +from comfy.ldm.modules.diffusionmodules.model import Encoder, Decoder +from comfy.ldm.modules.distributions.distributions import DiagonalGaussianDistribution -from ldm.util import instantiate_from_config -from ldm.modules.ema import LitEma +from comfy.ldm.util import instantiate_from_config +from comfy.ldm.modules.ema import LitEma # class AutoencoderKL(pl.LightningModule): class AutoencoderKL(torch.nn.Module): diff --git a/comfy/ldm/models/diffusion/ddim.py b/comfy/ldm/models/diffusion/ddim.py index e00ffd3f5..c279f2c18 100644 --- a/comfy/ldm/models/diffusion/ddim.py +++ b/comfy/ldm/models/diffusion/ddim.py @@ -4,7 +4,7 @@ import torch import numpy as np from tqdm import tqdm -from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor +from comfy.ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor class DDIMSampler(object): @@ -81,6 +81,7 @@ class DDIMSampler(object): extra_args=None, to_zero=True, end_step=None, + disable_pbar=False, **kwargs ): self.make_schedule_timesteps(ddim_timesteps=ddim_timesteps, ddim_eta=eta, verbose=verbose) @@ -103,7 +104,8 @@ class DDIMSampler(object): denoise_function=denoise_function, extra_args=extra_args, to_zero=to_zero, - end_step=end_step + end_step=end_step, + disable_pbar=disable_pbar ) return samples, intermediates @@ -185,7 +187,7 @@ class DDIMSampler(object): mask=None, x0=None, img_callback=None, log_every_t=100, temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None, unconditional_guidance_scale=1., unconditional_conditioning=None, dynamic_threshold=None, - ucg_schedule=None, denoise_function=None, extra_args=None, to_zero=True, end_step=None): + ucg_schedule=None, denoise_function=None, extra_args=None, to_zero=True, end_step=None, disable_pbar=False): device = self.model.betas.device b = shape[0] if x_T is None: @@ -204,7 +206,7 @@ class DDIMSampler(object): total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0] # print(f"Running DDIM Sampling with {total_steps} timesteps") - iterator = tqdm(time_range[:end_step], desc='DDIM Sampler', total=end_step) + iterator = tqdm(time_range[:end_step], desc='DDIM Sampler', total=end_step, disable=disable_pbar) for i, step in enumerate(iterator): index = total_steps - i - 1 diff --git a/comfy/ldm/models/diffusion/ddpm.py b/comfy/ldm/models/diffusion/ddpm.py index d3f0eb2b2..0f484a7f1 100644 --- a/comfy/ldm/models/diffusion/ddpm.py +++ b/comfy/ldm/models/diffusion/ddpm.py @@ -19,12 +19,12 @@ from tqdm import tqdm from torchvision.utils import make_grid # from pytorch_lightning.utilities.distributed import rank_zero_only -from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config -from ldm.modules.ema import LitEma -from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution -from ldm.models.autoencoder import IdentityFirstStage, AutoencoderKL -from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like -from ldm.models.diffusion.ddim import DDIMSampler +from comfy.ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config +from comfy.ldm.modules.ema import LitEma +from comfy.ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution +from ..autoencoder import IdentityFirstStage, AutoencoderKL +from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like +from .ddim import DDIMSampler __conditioning_keys__ = {'concat': 'c_concat', diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 98dbda635..5eabecd65 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -6,7 +6,7 @@ from torch import nn, einsum from einops import rearrange, repeat from typing import Optional, Any -from ldm.modules.diffusionmodules.util import checkpoint +from .diffusionmodules.util import checkpoint from .sub_quadratic_attention import efficient_dot_product_attention from comfy import model_management @@ -21,7 +21,7 @@ if model_management.xformers_enabled(): import os _ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32") -from cli_args import args +from comfy.cli_args import args def exists(val): return val is not None @@ -163,13 +163,17 @@ class CrossAttentionBirchSan(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads query = self.to_q(x) context = default(context, x) key = self.to_k(context) - value = self.to_v(context) + if value is not None: + value = self.to_v(value) + else: + value = self.to_v(context) + del context, x query = query.unflatten(-1, (self.heads, -1)).transpose(1,2).flatten(end_dim=1) @@ -256,13 +260,17 @@ class CrossAttentionDoggettx(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads q_in = self.to_q(x) context = default(context, x) k_in = self.to_k(context) - v_in = self.to_v(context) + if value is not None: + v_in = self.to_v(value) + del value + else: + v_in = self.to_v(context) del context, x q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in)) @@ -350,13 +358,17 @@ class CrossAttention(nn.Module): nn.Dropout(dropout) ) - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): h = self.heads q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) @@ -402,11 +414,15 @@ class MemoryEfficientCrossAttention(nn.Module): self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.attention_op: Optional[Any] = None - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) b, _, _ = q.shape q, k, v = map( @@ -447,19 +463,19 @@ class CrossAttentionPytorch(nn.Module): self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) self.attention_op: Optional[Any] = None - def forward(self, x, context=None, mask=None): + def forward(self, x, context=None, value=None, mask=None): q = self.to_q(x) context = default(context, x) k = self.to_k(context) - v = self.to_v(context) + if value is not None: + v = self.to_v(value) + del value + else: + v = self.to_v(context) b, _, _ = q.shape q, k, v = map( - lambda t: t.unsqueeze(3) - .reshape(b, t.shape[1], self.heads, self.dim_head) - .permute(0, 2, 1, 3) - .reshape(b * self.heads, t.shape[1], self.dim_head) - .contiguous(), + lambda t: t.view(b, -1, self.heads, self.dim_head).transpose(1, 2), (q, k, v), ) @@ -468,10 +484,7 @@ class CrossAttentionPytorch(nn.Module): if exists(mask): raise NotImplementedError out = ( - out.unsqueeze(0) - .reshape(b, self.heads, out.shape[1], self.dim_head) - .permute(0, 2, 1, 3) - .reshape(b, out.shape[1], self.heads * self.dim_head) + out.transpose(1, 2).reshape(b, -1, self.heads * self.dim_head) ) return self.to_out(out) @@ -519,11 +532,25 @@ class BasicTransformerBlock(nn.Module): transformer_patches = {} n = self.norm1(x) + if self.disable_self_attn: + context_attn1 = context + else: + context_attn1 = None + value_attn1 = None + + if "attn1_patch" in transformer_patches: + patch = transformer_patches["attn1_patch"] + if context_attn1 is None: + context_attn1 = n + value_attn1 = context_attn1 + for p in patch: + n, context_attn1, value_attn1 = p(current_index, n, context_attn1, value_attn1) + if "tomesd" in transformer_options: m, u = tomesd.get_functions(x, transformer_options["tomesd"]["ratio"], transformer_options["original_shape"]) - n = u(self.attn1(m(n), context=context if self.disable_self_attn else None)) + n = u(self.attn1(m(n), context=context_attn1, value=value_attn1)) else: - n = self.attn1(n, context=context if self.disable_self_attn else None) + n = self.attn1(n, context=context_attn1, value=value_attn1) x += n if "middle_patch" in transformer_patches: @@ -532,7 +559,16 @@ class BasicTransformerBlock(nn.Module): x = p(current_index, x) n = self.norm2(x) - n = self.attn2(n, context=context) + + context_attn2 = context + value_attn2 = None + if "attn2_patch" in transformer_patches: + patch = transformer_patches["attn2_patch"] + value_attn2 = context_attn2 + for p in patch: + n, context_attn2, value_attn2 = p(current_index, n, context_attn2, value_attn2) + + n = self.attn2(n, context=context_attn2, value=value_attn2) x += n x = self.ff(self.norm3(x)) + x diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py index 1599d386e..5e4d2b60f 100644 --- a/comfy/ldm/modules/diffusionmodules/model.py +++ b/comfy/ldm/modules/diffusionmodules/model.py @@ -6,7 +6,7 @@ import numpy as np from einops import rearrange from typing import Optional, Any -from ldm.modules.attention import MemoryEfficientCrossAttention +from ..attention import MemoryEfficientCrossAttention from comfy import model_management if model_management.xformers_enabled_vae(): diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py index 4c69c8567..4352b756d 100644 --- a/comfy/ldm/modules/diffusionmodules/openaimodel.py +++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py @@ -6,7 +6,7 @@ import torch as th import torch.nn as nn import torch.nn.functional as F -from ldm.modules.diffusionmodules.util import ( +from .util import ( checkpoint, conv_nd, linear, @@ -15,8 +15,8 @@ from ldm.modules.diffusionmodules.util import ( normalization, timestep_embedding, ) -from ldm.modules.attention import SpatialTransformer -from ldm.util import exists +from ..attention import SpatialTransformer +from comfy.ldm.util import exists # dummy replace @@ -76,12 +76,14 @@ class TimestepEmbedSequential(nn.Sequential, TimestepBlock): support it as an extra input. """ - def forward(self, x, emb, context=None, transformer_options={}): + def forward(self, x, emb, context=None, transformer_options={}, output_shape=None): for layer in self: if isinstance(layer, TimestepBlock): x = layer(x, emb) elif isinstance(layer, SpatialTransformer): x = layer(x, context, transformer_options) + elif isinstance(layer, Upsample): + x = layer(x, output_shape=output_shape) else: x = layer(x) return x @@ -105,14 +107,20 @@ class Upsample(nn.Module): if use_conv: self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=padding) - def forward(self, x): + def forward(self, x, output_shape=None): assert x.shape[1] == self.channels if self.dims == 3: - x = F.interpolate( - x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest" - ) + shape = [x.shape[2], x.shape[3] * 2, x.shape[4] * 2] + if output_shape is not None: + shape[1] = output_shape[3] + shape[2] = output_shape[4] else: - x = F.interpolate(x, scale_factor=2, mode="nearest") + shape = [x.shape[2] * 2, x.shape[3] * 2] + if output_shape is not None: + shape[0] = output_shape[2] + shape[1] = output_shape[3] + + x = F.interpolate(x, size=shape, mode="nearest") if self.use_conv: x = self.conv(x) return x @@ -813,9 +821,14 @@ class UNetModel(nn.Module): ctrl = control['output'].pop() if ctrl is not None: hsp += ctrl + h = th.cat([h, hsp], dim=1) del hsp - h = module(h, emb, context, transformer_options) + if len(hs) > 0: + output_shape = hs[-1].shape + else: + output_shape = None + h = module(h, emb, context, transformer_options, output_shape) h = h.type(x.dtype) if self.predict_codebook_ids: return self.id_predictor(h) diff --git a/comfy/ldm/modules/diffusionmodules/upscaling.py b/comfy/ldm/modules/diffusionmodules/upscaling.py index 038166620..709a7f52e 100644 --- a/comfy/ldm/modules/diffusionmodules/upscaling.py +++ b/comfy/ldm/modules/diffusionmodules/upscaling.py @@ -3,8 +3,8 @@ import torch.nn as nn import numpy as np from functools import partial -from ldm.modules.diffusionmodules.util import extract_into_tensor, make_beta_schedule -from ldm.util import default +from .util import extract_into_tensor, make_beta_schedule +from comfy.ldm.util import default class AbstractLowScaleModel(nn.Module): diff --git a/comfy/ldm/modules/diffusionmodules/util.py b/comfy/ldm/modules/diffusionmodules/util.py index daf35da7b..82ea3f0a6 100644 --- a/comfy/ldm/modules/diffusionmodules/util.py +++ b/comfy/ldm/modules/diffusionmodules/util.py @@ -15,7 +15,7 @@ import torch.nn as nn import numpy as np from einops import repeat -from ldm.util import instantiate_from_config +from comfy.ldm.util import instantiate_from_config def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): diff --git a/comfy/ldm/modules/encoders/noise_aug_modules.py b/comfy/ldm/modules/encoders/noise_aug_modules.py index f99e7920a..b59bf204b 100644 --- a/comfy/ldm/modules/encoders/noise_aug_modules.py +++ b/comfy/ldm/modules/encoders/noise_aug_modules.py @@ -1,5 +1,5 @@ -from ldm.modules.diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation -from ldm.modules.diffusionmodules.openaimodel import Timestep +from ..diffusionmodules.upscaling import ImageConcatWithNoiseAugmentation +from ..diffusionmodules.openaimodel import Timestep import torch class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation): diff --git a/comfy/model_management.py b/comfy/model_management.py index a0d1313d2..3aea7ea8e 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1,6 +1,6 @@ import psutil from enum import Enum -from cli_args import args +from comfy.cli_args import args class VRAMState(Enum): CPU = 0 @@ -20,15 +20,30 @@ total_vram_available_mb = -1 accelerate_enabled = False xpu_available = False +directml_enabled = False +if args.directml is not None: + import torch_directml + directml_enabled = True + device_index = args.directml + if device_index < 0: + directml_device = torch_directml.device() + else: + directml_device = torch_directml.device(device_index) + print("Using directml with device:", torch_directml.device_name(device_index)) + # torch_directml.disable_tiled_resources(True) + try: import torch - try: - import intel_extension_for_pytorch as ipex - if torch.xpu.is_available(): - xpu_available = True - total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) - except: - total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) + if directml_enabled: + total_vram = 4097 #TODO + else: + try: + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True + total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) + except: + total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) if not args.normalvram and not args.cpu: if total_vram <= 4096: @@ -133,6 +148,7 @@ def unload_model(): #never unload models from GPU on high vram if vram_state != VRAMState.HIGH_VRAM: current_loaded_model.model.cpu() + current_loaded_model.model_patches_to("cpu") current_loaded_model.unpatch_model() current_loaded_model = None @@ -156,6 +172,8 @@ def load_model_gpu(model): except Exception as e: model.unpatch_model() raise e + + model.model_patches_to(get_torch_device()) current_loaded_model = model if vram_state == VRAMState.CPU: pass @@ -214,6 +232,10 @@ def unload_if_low_vram(model): def get_torch_device(): global xpu_available + global directml_enabled + if directml_enabled: + global directml_device + return directml_device if vram_state == VRAMState.MPS: return torch.device("mps") if vram_state == VRAMState.CPU: @@ -231,8 +253,14 @@ def get_autocast_device(dev): def xformers_enabled(): + global xpu_available + global directml_enabled if vram_state == VRAMState.CPU: return False + if xpu_available: + return False + if directml_enabled: + return False return XFORMERS_IS_AVAILABLE @@ -248,6 +276,7 @@ def pytorch_attention_enabled(): def get_free_memory(dev=None, torch_free_too=False): global xpu_available + global directml_enabled if dev is None: dev = get_torch_device() @@ -255,7 +284,10 @@ def get_free_memory(dev=None, torch_free_too=False): mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total else: - if xpu_available: + if directml_enabled: + mem_free_total = 1024 * 1024 * 1024 #TODO + mem_free_torch = mem_free_total + elif xpu_available: mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) mem_free_torch = mem_free_total else: @@ -290,9 +322,14 @@ def mps_mode(): def should_use_fp16(): global xpu_available + global directml_enabled + if FORCE_FP32: return False + if directml_enabled: + return False + if cpu_mode() or mps_mode() or xpu_available: return False #TODO ? diff --git a/comfy/sample.py b/comfy/sample.py new file mode 100644 index 000000000..bd38585ac --- /dev/null +++ b/comfy/sample.py @@ -0,0 +1,83 @@ +import torch +import comfy.model_management +import comfy.samplers +import math + +def prepare_noise(latent_image, seed, skip=0): + """ + creates random noise given a latent image and a seed. + optional arg skip can be used to skip and discard x number of noise generations for a given seed + """ + generator = torch.manual_seed(seed) + for _ in range(skip): + noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + return noise + +def prepare_mask(noise_mask, shape, device): + """ensures noise mask is of proper dimensions""" + noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear") + noise_mask = noise_mask.round() + noise_mask = torch.cat([noise_mask] * shape[1], dim=1) + if noise_mask.shape[0] < shape[0]: + noise_mask = noise_mask.repeat(math.ceil(shape[0] / noise_mask.shape[0]), 1, 1, 1)[:shape[0]] + noise_mask = noise_mask.to(device) + return noise_mask + +def broadcast_cond(cond, batch, device): + """broadcasts conditioning to the batch size""" + copy = [] + for p in cond: + t = p[0] + if t.shape[0] < batch: + t = torch.cat([t] * batch) + t = t.to(device) + copy += [[t] + p[1:]] + return copy + +def get_models_from_cond(cond, model_type): + models = [] + for c in cond: + if model_type in c[1]: + models += [c[1][model_type]] + return models + +def load_additional_models(positive, negative): + """loads additional models in positive and negative conditioning""" + control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control") + gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen") + gligen = [x[1] for x in gligen] + models = control_nets + gligen + comfy.model_management.load_controlnet_gpu(models) + return models + +def cleanup_additional_models(models): + """cleanup additional models that were loaded""" + for m in models: + m.cleanup() + +def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None, callback=None, disable_pbar=False): + device = comfy.model_management.get_torch_device() + + if noise_mask is not None: + noise_mask = prepare_mask(noise_mask, noise.shape, device) + + real_model = None + comfy.model_management.load_model_gpu(model) + real_model = model.model + + noise = noise.to(device) + latent_image = latent_image.to(device) + + positive_copy = broadcast_cond(positive, noise.shape[0], device) + negative_copy = broadcast_cond(negative, noise.shape[0], device) + + models = load_additional_models(positive, negative) + + sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) + + samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar) + samples = samples.cpu() + + cleanup_additional_models(models) + return samples diff --git a/comfy/samplers.py b/comfy/samplers.py index 15527224e..dcf93cca2 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -23,21 +23,36 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con adm_cond = cond[1]['adm_encoded'] input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] - mult = torch.ones_like(input_x) * strength + if 'mask' in cond[1]: + # Scale the mask to the size of the input + # The mask should have been resized as we began the sampling process + mask_strength = 1.0 + if "mask_strength" in cond[1]: + mask_strength = cond[1]["mask_strength"] + mask = cond[1]['mask'] + assert(mask.shape[1] == x_in.shape[2]) + assert(mask.shape[2] == x_in.shape[3]) + mask = mask[:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength + mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1) + else: + mask = torch.ones_like(input_x) + mult = mask * strength + + if 'mask' not in cond[1]: + rr = 8 + if area[2] != 0: + for t in range(rr): + mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1)) + if (area[0] + area[2]) < x_in.shape[2]: + for t in range(rr): + mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1)) + if area[3] != 0: + for t in range(rr): + mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1)) + if (area[1] + area[3]) < x_in.shape[3]: + for t in range(rr): + mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1)) - rr = 8 - if area[2] != 0: - for t in range(rr): - mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1)) - if (area[0] + area[2]) < x_in.shape[2]: - for t in range(rr): - mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1)) - if area[3] != 0: - for t in range(rr): - mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1)) - if (area[1] + area[3]) < x_in.shape[3]: - for t in range(rr): - mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1)) conditionning = {} conditionning['c_crossattn'] = cond[0] if cond_concat_in is not None and len(cond_concat_in) > 0: @@ -197,7 +212,15 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con transformer_options = model_options['transformer_options'].copy() if patches is not None: - transformer_options["patches"] = patches + if "patches" in transformer_options: + cur_patches = transformer_options["patches"].copy() + for p in patches: + if p in cur_patches: + cur_patches[p] = cur_patches[p] + patches[p] + else: + cur_patches[p] = patches[p] + else: + transformer_options["patches"] = patches c['transformer_options'] = transformer_options @@ -293,6 +316,71 @@ def blank_inpaint_image_like(latent_image): blank_image[:,3] *= 0.1380 return blank_image +def get_mask_aabb(masks): + if masks.numel() == 0: + return torch.zeros((0, 4), device=masks.device, dtype=torch.int) + + b = masks.shape[0] + + bounding_boxes = torch.zeros((b, 4), device=masks.device, dtype=torch.int) + is_empty = torch.zeros((b), device=masks.device, dtype=torch.bool) + for i in range(b): + mask = masks[i] + if mask.numel() == 0: + continue + if torch.max(mask != 0) == False: + is_empty[i] = True + continue + y, x = torch.where(mask) + bounding_boxes[i, 0] = torch.min(x) + bounding_boxes[i, 1] = torch.min(y) + bounding_boxes[i, 2] = torch.max(x) + bounding_boxes[i, 3] = torch.max(y) + + return bounding_boxes, is_empty + +def resolve_cond_masks(conditions, h, w, device): + # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes. + # While we're doing this, we can also resolve the mask device and scaling for performance reasons + for i in range(len(conditions)): + c = conditions[i] + if 'mask' in c[1]: + mask = c[1]['mask'] + mask = mask.to(device=device) + modified = c[1].copy() + if len(mask.shape) == 2: + mask = mask.unsqueeze(0) + if mask.shape[2] != h or mask.shape[3] != w: + mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(h, w), mode='bilinear', align_corners=False).squeeze(1) + + if modified.get("set_area_to_bounds", False): + bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0) + boxes, is_empty = get_mask_aabb(bounds) + if is_empty[0]: + # Use the minimum possible size for efficiency reasons. (Since the mask is all-0, this becomes a noop anyway) + modified['area'] = (8, 8, 0, 0) + else: + box = boxes[0] + H, W, Y, X = (box[3] - box[1] + 1, box[2] - box[0] + 1, box[1], box[0]) + # Make sure the height and width are divisible by 8 + if X % 8 != 0: + newx = X // 8 * 8 + W = W + (X - newx) + X = newx + if Y % 8 != 0: + newy = Y // 8 * 8 + H = H + (Y - newy) + Y = newy + if H % 8 != 0: + H = H + (8 - (H % 8)) + if W % 8 != 0: + W = W + (8 - (W % 8)) + area = (int(H), int(W), int(Y), int(X)) + modified['area'] = area + + modified['mask'] = mask + conditions[i] = [c[0], modified] + def create_cond_with_same_area_if_none(conds, c): if 'area' not in c[1]: return @@ -421,7 +509,7 @@ class KSampler: self.denoise = denoise self.model_options = model_options - def _calculate_sigmas(self, steps): + def calculate_sigmas(self, steps): sigmas = None discard_penultimate_sigma = False @@ -430,13 +518,13 @@ class KSampler: discard_penultimate_sigma = True if self.scheduler == "karras": - sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device) + sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max) elif self.scheduler == "normal": - sigmas = self.model_wrap.get_sigmas(steps).to(self.device) + sigmas = self.model_wrap.get_sigmas(steps) elif self.scheduler == "simple": - sigmas = simple_scheduler(self.model_wrap, steps).to(self.device) + sigmas = simple_scheduler(self.model_wrap, steps) elif self.scheduler == "ddim_uniform": - sigmas = ddim_scheduler(self.model_wrap, steps).to(self.device) + sigmas = ddim_scheduler(self.model_wrap, steps) else: print("error invalid scheduler", self.scheduler) @@ -447,15 +535,15 @@ class KSampler: def set_steps(self, steps, denoise=None): self.steps = steps if denoise is None or denoise > 0.9999: - self.sigmas = self._calculate_sigmas(steps) + self.sigmas = self.calculate_sigmas(steps).to(self.device) else: new_steps = int(steps/denoise) - sigmas = self._calculate_sigmas(new_steps) + sigmas = self.calculate_sigmas(new_steps).to(self.device) self.sigmas = sigmas[-(steps + 1):] - - def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None): - sigmas = self.sigmas + def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None, callback=None, disable_pbar=False): + if sigmas is None: + sigmas = self.sigmas sigma_min = self.sigma_min if last_step is not None and last_step < (len(sigmas) - 1): @@ -475,6 +563,10 @@ class KSampler: positive = positive[:] negative = negative[:] + + resolve_cond_masks(positive, noise.shape[2], noise.shape[3], self.device) + resolve_cond_masks(negative, noise.shape[2], noise.shape[3], self.device) + #make sure each cond area has an opposite one with the same area for c in positive: create_cond_with_same_area_if_none(negative, c) @@ -518,9 +610,9 @@ class KSampler: with precision_scope(model_management.get_autocast_device(self.device)): if self.sampler == "uni_pc": - samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask) + samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, disable=disable_pbar) elif self.sampler == "uni_pc_bh2": - samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, variant='bh2') + samples = uni_pc.sample_unipc(self.model_wrap, noise, latent_image, sigmas, sampling_function=sampling_function, max_denoise=max_denoise, extra_args=extra_args, noise_mask=denoise_mask, callback=callback, variant='bh2', disable=disable_pbar) elif self.sampler == "ddim": timesteps = [] for s in range(sigmas.shape[0]): @@ -528,6 +620,12 @@ class KSampler: noise_mask = None if denoise_mask is not None: noise_mask = 1.0 - denoise_mask + + ddim_callback = None + if callback is not None: + total_steps = len(timesteps) - 1 + ddim_callback = lambda pred_x0, i: callback(i, pred_x0, None, total_steps) + sampler = DDIMSampler(self.model, device=self.device) sampler.make_schedule_timesteps(ddim_timesteps=timesteps, verbose=False) z_enc = sampler.stochastic_encode(latent_image, torch.tensor([len(timesteps) - 1] * noise.shape[0]).to(self.device), noise=noise, max_denoise=max_denoise) @@ -541,11 +639,13 @@ class KSampler: eta=0.0, x_T=z_enc, x0=latent_image, + img_callback=ddim_callback, denoise_function=sampling_function, extra_args=extra_args, mask=noise_mask, to_zero=sigmas[-1]==0, - end_step=sigmas.shape[0] - 1) + end_step=sigmas.shape[0] - 1, + disable_pbar=disable_pbar) else: extra_args["denoise_mask"] = denoise_mask @@ -554,13 +654,18 @@ class KSampler: noise = noise * sigmas[0] + k_callback = None + total_steps = len(sigmas) - 1 + if callback is not None: + k_callback = lambda x: callback(x["i"], x["denoised"], x["x"], total_steps) + if latent_image is not None: noise += latent_image if self.sampler == "dpm_fast": - samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], self.steps, extra_args=extra_args) + samples = k_diffusion_sampling.sample_dpm_fast(self.model_k, noise, sigma_min, sigmas[0], total_steps, extra_args=extra_args, callback=k_callback, disable=disable_pbar) elif self.sampler == "dpm_adaptive": - samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args) + samples = k_diffusion_sampling.sample_dpm_adaptive(self.model_k, noise, sigma_min, sigmas[0], extra_args=extra_args, callback=k_callback, disable=disable_pbar) else: - samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args) + samples = getattr(k_diffusion_sampling, "sample_{}".format(self.sampler))(self.model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar) return samples.to(torch.float32) diff --git a/comfy/sd.py b/comfy/sd.py index 211acd70e..3543bdb77 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -2,8 +2,8 @@ import torch import contextlib import copy -import sd1_clip -import sd2_clip +from . import sd1_clip +from . import sd2_clip from comfy import model_management from .ldm.util import instantiate_from_config from .ldm.models.autoencoder import AutoencoderKL @@ -111,6 +111,8 @@ def load_lora(path, to_load): loaded_keys.add(A_name) loaded_keys.add(B_name) + + ######## loha hada_w1_a_name = "{}.hada_w1_a".format(x) hada_w1_b_name = "{}.hada_w1_b".format(x) hada_w2_a_name = "{}.hada_w2_a".format(x) @@ -132,6 +134,54 @@ def load_lora(path, to_load): loaded_keys.add(hada_w2_a_name) loaded_keys.add(hada_w2_b_name) + + ######## lokr + lokr_w1_name = "{}.lokr_w1".format(x) + lokr_w2_name = "{}.lokr_w2".format(x) + lokr_w1_a_name = "{}.lokr_w1_a".format(x) + lokr_w1_b_name = "{}.lokr_w1_b".format(x) + lokr_t2_name = "{}.lokr_t2".format(x) + lokr_w2_a_name = "{}.lokr_w2_a".format(x) + lokr_w2_b_name = "{}.lokr_w2_b".format(x) + + lokr_w1 = None + if lokr_w1_name in lora.keys(): + lokr_w1 = lora[lokr_w1_name] + loaded_keys.add(lokr_w1_name) + + lokr_w2 = None + if lokr_w2_name in lora.keys(): + lokr_w2 = lora[lokr_w2_name] + loaded_keys.add(lokr_w2_name) + + lokr_w1_a = None + if lokr_w1_a_name in lora.keys(): + lokr_w1_a = lora[lokr_w1_a_name] + loaded_keys.add(lokr_w1_a_name) + + lokr_w1_b = None + if lokr_w1_b_name in lora.keys(): + lokr_w1_b = lora[lokr_w1_b_name] + loaded_keys.add(lokr_w1_b_name) + + lokr_w2_a = None + if lokr_w2_a_name in lora.keys(): + lokr_w2_a = lora[lokr_w2_a_name] + loaded_keys.add(lokr_w2_a_name) + + lokr_w2_b = None + if lokr_w2_b_name in lora.keys(): + lokr_w2_b = lora[lokr_w2_b_name] + loaded_keys.add(lokr_w2_b_name) + + lokr_t2 = None + if lokr_t2_name in lora.keys(): + lokr_t2 = lora[lokr_t2_name] + loaded_keys.add(lokr_t2_name) + + if (lokr_w1 is not None) or (lokr_w2 is not None) or (lokr_w1_a is not None) or (lokr_w2_a is not None): + patch_dict[to_load[x]] = (lokr_w1, lokr_w2, alpha, lokr_w1_a, lokr_w1_b, lokr_w2_a, lokr_w2_b, lokr_t2) + for x in lora.keys(): if x not in loaded_keys: print("lora key not loaded", x) @@ -254,6 +304,29 @@ class ModelPatcher: def set_model_sampler_cfg_function(self, sampler_cfg_function): self.model_options["sampler_cfg_function"] = sampler_cfg_function + + def set_model_patch(self, patch, name): + to = self.model_options["transformer_options"] + if "patches" not in to: + to["patches"] = {} + to["patches"][name] = to["patches"].get(name, []) + [patch] + + def set_model_attn1_patch(self, patch): + self.set_model_patch(patch, "attn1_patch") + + def set_model_attn2_patch(self, patch): + self.set_model_patch(patch, "attn2_patch") + + def model_patches_to(self, device): + to = self.model_options["transformer_options"] + if "patches" in to: + patches = to["patches"] + for name in patches: + patch_list = patches[name] + for i in range(len(patch_list)): + if hasattr(patch_list[i], "to"): + patch_list[i] = patch_list[i].to(device) + def model_dtype(self): return self.model.diffusion_model.dtype @@ -292,6 +365,33 @@ class ModelPatcher: final_shape = [mat2.shape[1], mat2.shape[0], v[3].shape[2], v[3].shape[3]] mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1).float(), v[3].transpose(0, 1).flatten(start_dim=1).float()).reshape(final_shape).transpose(0, 1) weight += (alpha * torch.mm(mat1.flatten(start_dim=1).float(), mat2.flatten(start_dim=1).float())).reshape(weight.shape).type(weight.dtype).to(weight.device) + elif len(v) == 8: #lokr + w1 = v[0] + w2 = v[1] + w1_a = v[3] + w1_b = v[4] + w2_a = v[5] + w2_b = v[6] + t2 = v[7] + dim = None + + if w1 is None: + dim = w1_b.shape[0] + w1 = torch.mm(w1_a.float(), w1_b.float()) + + if w2 is None: + dim = w2_b.shape[0] + if t2 is None: + w2 = torch.mm(w2_a.float(), w2_b.float()) + else: + w2 = torch.einsum('i j k l, j r, i p -> p r k l', t2.float(), w2_b.float(), w2_a.float()) + + if len(w2.shape) == 4: + w1 = w1.unsqueeze(2).unsqueeze(2) + if v[2] is not None and dim is not None: + alpha *= v[2] / dim + + weight += alpha * torch.kron(w1.float(), w2.float()).reshape(weight.shape).type(weight.dtype).to(weight.device) else: #loha w1a = v[0] w1b = v[1] @@ -346,10 +446,10 @@ class CLIP: else: params = {} - if self.target_clip == "ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder": + if self.target_clip.endswith("FrozenOpenCLIPEmbedder"): clip = sd2_clip.SD2ClipModel tokenizer = sd2_clip.SD2Tokenizer - elif self.target_clip == "ldm.modules.encoders.modules.FrozenCLIPEmbedder": + elif self.target_clip.endswith("FrozenCLIPEmbedder"): clip = sd1_clip.SD1ClipModel tokenizer = sd1_clip.SD1Tokenizer @@ -414,11 +514,16 @@ class VAE: self.device = device def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16): + steps = samples.shape[0] * utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap) + steps += samples.shape[0] * utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap) + steps += samples.shape[0] * utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap) + pbar = utils.ProgressBar(steps) + decode_fn = lambda a: (self.first_stage_model.decode(1. / self.scale_factor * a.to(self.device)) + 1.0) output = torch.clamp(( - (utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8) + - utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8) + - utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8)) + (utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8, pbar = pbar) + + utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8, pbar = pbar) + + utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8, pbar = pbar)) / 3.0) / 2.0, min=0.0, max=1.0) return output @@ -462,9 +567,15 @@ class VAE: model_management.unload_model() self.first_stage_model = self.first_stage_model.to(self.device) pixel_samples = pixel_samples.movedim(-1,1).to(self.device) - samples = utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4) - samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4) - samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4) + + steps = pixel_samples.shape[0] * utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap) + steps += pixel_samples.shape[0] * utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap) + steps += pixel_samples.shape[0] * utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap) + pbar = utils.ProgressBar(steps) + + samples = utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) + samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) + samples += utils.tiled_scale(pixel_samples, lambda a: self.first_stage_model.encode(2. * a - 1.).sample() * self.scale_factor, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples /= 3.0 self.first_stage_model = self.first_stage_model.cpu() samples = samples.cpu() @@ -785,9 +896,9 @@ def load_clip(ckpt_path, embedding_directory=None): clip_data = utils.load_torch_file(ckpt_path) config = {} if "text_model.encoder.layers.22.mlp.fc1.weight" in clip_data: - config['target'] = 'ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' + config['target'] = 'comfy.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' else: - config['target'] = 'ldm.modules.encoders.modules.FrozenCLIPEmbedder' + config['target'] = 'comfy.ldm.modules.encoders.modules.FrozenCLIPEmbedder' clip = CLIP(config=config, embedding_directory=embedding_directory) clip.load_from_state_dict(clip_data) return clip @@ -863,9 +974,9 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o if output_clip: clip_config = {} if "cond_stage_model.model.transformer.resblocks.22.attn.out_proj.weight" in sd_keys: - clip_config['target'] = 'ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' + clip_config['target'] = 'comfy.ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' else: - clip_config['target'] = 'ldm.modules.encoders.modules.FrozenCLIPEmbedder' + clip_config['target'] = 'comfy.ldm.modules.encoders.modules.FrozenCLIPEmbedder' clip = CLIP(config=clip_config, embedding_directory=embedding_directory) w.cond_stage_model = clip.cond_stage_model load_state_dict_to = [w] @@ -886,7 +997,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o noise_schedule_config["timesteps"] = sd[noise_aug_key].shape[0] noise_schedule_config["beta_schedule"] = "squaredcos_cap_v2" params["noise_schedule_config"] = noise_schedule_config - noise_aug_config['target'] = "ldm.modules.encoders.noise_aug_modules.CLIPEmbeddingNoiseAugmentation" + noise_aug_config['target'] = "comfy.ldm.modules.encoders.noise_aug_modules.CLIPEmbeddingNoiseAugmentation" if size == 1280: #h params["timestep_dim"] = 1024 elif size == 1024: #l @@ -938,19 +1049,19 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o unet_config["in_channels"] = sd['model.diffusion_model.input_blocks.0.0.weight'].shape[1] unet_config["context_dim"] = sd['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight'].shape[1] - sd_config["unet_config"] = {"target": "ldm.modules.diffusionmodules.openaimodel.UNetModel", "params": unet_config} - model_config = {"target": "ldm.models.diffusion.ddpm.LatentDiffusion", "params": sd_config} + sd_config["unet_config"] = {"target": "comfy.ldm.modules.diffusionmodules.openaimodel.UNetModel", "params": unet_config} + model_config = {"target": "comfy.ldm.models.diffusion.ddpm.LatentDiffusion", "params": sd_config} if noise_aug_config is not None: #SD2.x unclip model sd_config["noise_aug_config"] = noise_aug_config sd_config["image_size"] = 96 sd_config["embedding_dropout"] = 0.25 sd_config["conditioning_key"] = 'crossattn-adm' - model_config["target"] = "ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion" + model_config["target"] = "comfy.ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion" elif unet_config["in_channels"] > 4: #inpainting model sd_config["conditioning_key"] = "hybrid" sd_config["finetune_keys"] = None - model_config["target"] = "ldm.models.diffusion.ddpm.LatentInpaintDiffusion" + model_config["target"] = "comfy.ldm.models.diffusion.ddpm.LatentInpaintDiffusion" else: sd_config["conditioning_key"] = "crossattn" diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 7f1217c3d..b1a392736 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -191,11 +191,20 @@ def safe_load_embed_zip(embed_path): del embed return out +def expand_directory_list(directories): + dirs = set() + for x in directories: + dirs.add(x) + for root, subdir, file in os.walk(x, followlinks=True): + dirs.add(root) + return list(dirs) def load_embed(embedding_name, embedding_directory): if isinstance(embedding_directory, str): embedding_directory = [embedding_directory] + embedding_directory = expand_directory_list(embedding_directory) + valid_file = None for embed_dir in embedding_directory: embed_path = os.path.join(embed_dir, embedding_name) diff --git a/comfy/utils.py b/comfy/utils.py index 0380b91dd..09e05d4ed 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -1,11 +1,15 @@ import torch +import math -def load_torch_file(ckpt): +def load_torch_file(ckpt, safe_load=False): if ckpt.lower().endswith(".safetensors"): import safetensors.torch sd = safetensors.torch.load_file(ckpt, device="cpu") else: - pl_sd = torch.load(ckpt, map_location="cpu") + if safe_load: + pl_sd = torch.load(ckpt, map_location="cpu", weights_only=True) + else: + pl_sd = torch.load(ckpt, map_location="cpu") if "global_step" in pl_sd: print(f"Global Step: {pl_sd['global_step']}") if "state_dict" in pl_sd: @@ -59,8 +63,11 @@ def common_upscale(samples, width, height, upscale_method, crop): s = samples return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method) +def get_tiled_scale_steps(width, height, tile_x, tile_y, overlap): + return math.ceil((height / (tile_y - overlap))) * math.ceil((width / (tile_x - overlap))) + @torch.inference_mode() -def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3): +def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_amount = 4, out_channels = 3, pbar = None): output = torch.empty((samples.shape[0], out_channels, round(samples.shape[2] * upscale_amount), round(samples.shape[3] * upscale_amount)), device="cpu") for b in range(samples.shape[0]): s = samples[b:b+1] @@ -80,6 +87,33 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am mask[:,:,:,mask.shape[3]- 1 - t: mask.shape[3]- t] *= ((1.0/feather) * (t + 1)) out[:,:,round(y*upscale_amount):round((y+tile_y)*upscale_amount),round(x*upscale_amount):round((x+tile_x)*upscale_amount)] += ps * mask out_div[:,:,round(y*upscale_amount):round((y+tile_y)*upscale_amount),round(x*upscale_amount):round((x+tile_x)*upscale_amount)] += mask + if pbar is not None: + pbar.update(1) output[b:b+1] = out/out_div return output + + +PROGRESS_BAR_HOOK = None +def set_progress_bar_global_hook(function): + global PROGRESS_BAR_HOOK + PROGRESS_BAR_HOOK = function + +class ProgressBar: + def __init__(self, total): + global PROGRESS_BAR_HOOK + self.total = total + self.current = 0 + self.hook = PROGRESS_BAR_HOOK + + def update_absolute(self, value, total=None): + if total is not None: + self.total = total + if value > self.total: + value = self.total + self.current = value + if self.hook is not None: + self.hook(self.current, self.total) + + def update(self, value): + self.update_absolute(self.current + value) diff --git a/comfy_extras/chainner_models/architecture/block.py b/comfy_extras/chainner_models/architecture/block.py index 1abe1ed8f..214642cc4 100644 --- a/comfy_extras/chainner_models/architecture/block.py +++ b/comfy_extras/chainner_models/architecture/block.py @@ -4,7 +4,10 @@ from __future__ import annotations from collections import OrderedDict -from typing import Literal +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import torch import torch.nn as nn diff --git a/comfy_extras/nodes_hypernetwork.py b/comfy_extras/nodes_hypernetwork.py new file mode 100644 index 000000000..c19b5e4c7 --- /dev/null +++ b/comfy_extras/nodes_hypernetwork.py @@ -0,0 +1,110 @@ +import comfy.utils +import folder_paths +import torch + +def load_hypernetwork_patch(path, strength): + sd = comfy.utils.load_torch_file(path, safe_load=True) + activation_func = sd.get('activation_func', 'linear') + is_layer_norm = sd.get('is_layer_norm', False) + use_dropout = sd.get('use_dropout', False) + activate_output = sd.get('activate_output', False) + last_layer_dropout = sd.get('last_layer_dropout', False) + + valid_activation = { + "linear": torch.nn.Identity, + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + "tanh": torch.nn.Tanh, + "sigmoid": torch.nn.Sigmoid, + "softsign": torch.nn.Softsign, + } + + if activation_func not in valid_activation: + print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout) + return None + + out = {} + + for d in sd: + try: + dim = int(d) + except: + continue + + output = [] + for index in [0, 1]: + attn_weights = sd[dim][index] + keys = attn_weights.keys() + + linears = filter(lambda a: a.endswith(".weight"), keys) + linears = list(map(lambda a: a[:-len(".weight")], linears)) + layers = [] + + for i in range(len(linears)): + lin_name = linears[i] + last_layer = (i == (len(linears) - 1)) + penultimate_layer = (i == (len(linears) - 2)) + + lin_weight = attn_weights['{}.weight'.format(lin_name)] + lin_bias = attn_weights['{}.bias'.format(lin_name)] + layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0]) + layer.load_state_dict({"weight": lin_weight, "bias": lin_bias}) + layers.append(layer) + if activation_func != "linear": + if (not last_layer) or (activate_output): + layers.append(valid_activation[activation_func]()) + if is_layer_norm: + layers.append(torch.nn.LayerNorm(lin_weight.shape[0])) + if use_dropout: + if (not last_layer) and (not penultimate_layer or last_layer_dropout): + layers.append(torch.nn.Dropout(p=0.3)) + + output.append(torch.nn.Sequential(*layers)) + out[dim] = torch.nn.ModuleList(output) + + class hypernetwork_patch: + def __init__(self, hypernet, strength): + self.hypernet = hypernet + self.strength = strength + def __call__(self, current_index, q, k, v): + dim = k.shape[-1] + if dim in self.hypernet: + hn = self.hypernet[dim] + k = k + hn[0](k) * self.strength + v = v + hn[1](v) * self.strength + + return q, k, v + + def to(self, device): + for d in self.hypernet.keys(): + self.hypernet[d] = self.hypernet[d].to(device) + return self + + return hypernetwork_patch(out, strength) + +class HypernetworkLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "hypernetwork_name": (folder_paths.get_filename_list("hypernetworks"), ), + "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "load_hypernetwork" + + CATEGORY = "loaders" + + def load_hypernetwork(self, model, hypernetwork_name, strength): + hypernetwork_path = folder_paths.get_full_path("hypernetworks", hypernetwork_name) + model_hypernetwork = model.clone() + patch = load_hypernetwork_patch(hypernetwork_path, strength) + if patch is not None: + model_hypernetwork.set_model_attn1_patch(patch) + model_hypernetwork.set_model_attn2_patch(patch) + return (model_hypernetwork,) + +NODE_CLASS_MAPPINGS = { + "HypernetworkLoader": HypernetworkLoader +} diff --git a/comfy_extras/nodes_upscale_model.py b/comfy_extras/nodes_upscale_model.py index d8754698c..ab5b0ccfc 100644 --- a/comfy_extras/nodes_upscale_model.py +++ b/comfy_extras/nodes_upscale_model.py @@ -37,7 +37,12 @@ class ImageUpscaleWithModel: device = model_management.get_torch_device() upscale_model.to(device) in_img = image.movedim(-1,-3).to(device) - s = comfy.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=128 + 64, tile_y=128 + 64, overlap = 8, upscale_amount=upscale_model.scale) + + tile = 128 + 64 + overlap = 8 + steps = in_img.shape[0] * comfy.utils.get_tiled_scale_steps(in_img.shape[3], in_img.shape[2], tile_x=tile, tile_y=tile, overlap=overlap) + pbar = comfy.utils.ProgressBar(steps) + s = comfy.utils.tiled_scale(in_img, lambda a: upscale_model(a), tile_x=tile, tile_y=tile, overlap=overlap, upscale_amount=upscale_model.scale, pbar=pbar) upscale_model.cpu() s = torch.clamp(s.movedim(-3,-1), min=0, max=1.0) return (s,) diff --git a/execution.py b/execution.py index 73be6db03..c19c10bc6 100644 --- a/execution.py +++ b/execution.py @@ -40,15 +40,13 @@ def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_da input_data_all[x] = unique_id return input_data_all -def recursive_execute(server, prompt, outputs, current_item, extra_data={}): +def recursive_execute(server, prompt, outputs, current_item, extra_data, executed): unique_id = current_item inputs = prompt[unique_id]['inputs'] class_type = prompt[unique_id]['class_type'] class_def = nodes.NODE_CLASS_MAPPINGS[class_type] if unique_id in outputs: - return [] - - executed = [] + return for x in inputs: input_data = inputs[x] @@ -57,7 +55,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}): input_unique_id = input_data[0] output_index = input_data[1] if input_unique_id not in outputs: - executed += recursive_execute(server, prompt, outputs, input_unique_id, extra_data) + recursive_execute(server, prompt, outputs, input_unique_id, extra_data, executed) input_data_all = get_input_data(inputs, class_def, unique_id, outputs, prompt, extra_data) if server.client_id is not None: @@ -72,7 +70,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}): server.send_sync("executed", { "node": unique_id, "output": outputs[unique_id]["ui"] }, server.client_id) if "result" in outputs[unique_id]: outputs[unique_id] = outputs[unique_id]["result"] - return executed + [unique_id] + executed.add(unique_id) def recursive_will_execute(prompt, outputs, current_item): unique_id = current_item @@ -99,40 +97,44 @@ def recursive_output_delete_if_changed(prompt, old_prompt, outputs, current_item is_changed_old = '' is_changed = '' + to_delete = False if hasattr(class_def, 'IS_CHANGED'): if unique_id in old_prompt and 'is_changed' in old_prompt[unique_id]: is_changed_old = old_prompt[unique_id]['is_changed'] if 'is_changed' not in prompt[unique_id]: input_data_all = get_input_data(inputs, class_def, unique_id, outputs) if input_data_all is not None: - is_changed = class_def.IS_CHANGED(**input_data_all) - prompt[unique_id]['is_changed'] = is_changed + try: + is_changed = class_def.IS_CHANGED(**input_data_all) + prompt[unique_id]['is_changed'] = is_changed + except: + to_delete = True else: is_changed = prompt[unique_id]['is_changed'] if unique_id not in outputs: return True - to_delete = False - if is_changed != is_changed_old: - to_delete = True - elif unique_id not in old_prompt: - to_delete = True - elif inputs == old_prompt[unique_id]['inputs']: - for x in inputs: - input_data = inputs[x] + if not to_delete: + if is_changed != is_changed_old: + to_delete = True + elif unique_id not in old_prompt: + to_delete = True + elif inputs == old_prompt[unique_id]['inputs']: + for x in inputs: + input_data = inputs[x] - if isinstance(input_data, list): - input_unique_id = input_data[0] - output_index = input_data[1] - if input_unique_id in outputs: - to_delete = recursive_output_delete_if_changed(prompt, old_prompt, outputs, input_unique_id) - else: - to_delete = True - if to_delete: - break - else: - to_delete = True + if isinstance(input_data, list): + input_unique_id = input_data[0] + output_index = input_data[1] + if input_unique_id in outputs: + to_delete = recursive_output_delete_if_changed(prompt, old_prompt, outputs, input_unique_id) + else: + to_delete = True + if to_delete: + break + else: + to_delete = True if to_delete: d = outputs.pop(unique_id) @@ -154,11 +156,20 @@ class PromptExecutor: self.server.client_id = None with torch.inference_mode(): + #delete cached outputs if nodes don't exist for them + to_delete = [] + for o in self.outputs: + if o not in prompt: + to_delete += [o] + for o in to_delete: + d = self.outputs.pop(o) + del d + for x in prompt: recursive_output_delete_if_changed(prompt, self.old_prompt, self.outputs, x) current_outputs = set(self.outputs.keys()) - executed = [] + executed = set() try: to_execute = [] for x in prompt: @@ -181,12 +192,12 @@ class PromptExecutor: except: valid = False if valid: - executed += recursive_execute(self.server, prompt, self.outputs, x, extra_data) + recursive_execute(self.server, prompt, self.outputs, x, extra_data, executed) except Exception as e: print(traceback.format_exc()) to_delete = [] for o in self.outputs: - if o not in current_outputs: + if (o not in current_outputs) and (o not in executed): to_delete += [o] if o in self.old_prompt: d = self.old_prompt.pop(o) @@ -194,11 +205,9 @@ class PromptExecutor: for o in to_delete: d = self.outputs.pop(o) del d - else: - executed = set(executed) + finally: for x in executed: self.old_prompt[x] = copy.deepcopy(prompt[x]) - finally: self.server.last_node_id = None if self.server.client_id is not None: self.server.send_sync("executing", { "node": None }, self.server.client_id) @@ -249,9 +258,15 @@ def validate_inputs(prompt, item): if "max" in info[1] and val > info[1]["max"]: return (False, "Value bigger than max. {}, {}".format(class_type, x)) - if isinstance(type_input, list): - if val not in type_input: - return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input)) + if hasattr(obj_class, "VALIDATE_INPUTS"): + input_data_all = get_input_data(inputs, obj_class, unique_id) + ret = obj_class.VALIDATE_INPUTS(**input_data_all) + if ret != True: + return (False, "{}, {}".format(class_type, ret)) + else: + if isinstance(type_input, list): + if val not in type_input: + return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input)) return (True, "") def validate_prompt(prompt): @@ -273,7 +288,8 @@ def validate_prompt(prompt): m = validate_inputs(prompt, o) valid = m[0] reason = m[1] - except: + except Exception as e: + print(traceback.format_exc()) valid = False reason = "Parsing error" diff --git a/extra_model_paths.yaml.example b/extra_model_paths.yaml.example index ac1ffe9d2..fa5418a68 100644 --- a/extra_model_paths.yaml.example +++ b/extra_model_paths.yaml.example @@ -13,6 +13,7 @@ a111: models/ESRGAN models/SwinIR embeddings: embeddings + hypernetworks: models/hypernetworks controlnet: models/ControlNet #other_ui: diff --git a/folder_paths.py b/folder_paths.py index 3c4ad3711..e5b89492c 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -32,6 +32,7 @@ folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_m folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], []) +folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions) output_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output") temp_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp") @@ -68,6 +69,46 @@ def get_directory_by_type(type_name): return None +# determine base_dir rely on annotation if name is 'filename.ext [annotation]' format +# otherwise use default_path as base_dir +def annotated_filepath(name): + if name.endswith("[output]"): + base_dir = get_output_directory() + name = name[:-9] + elif name.endswith("[input]"): + base_dir = get_input_directory() + name = name[:-8] + elif name.endswith("[temp]"): + base_dir = get_temp_directory() + name = name[:-7] + else: + return name, None + + return name, base_dir + + +def get_annotated_filepath(name, default_dir=None): + name, base_dir = annotated_filepath(name) + + if base_dir is None: + if default_dir is not None: + base_dir = default_dir + else: + base_dir = get_input_directory() # fallback path + + return os.path.join(base_dir, name) + + +def exists_annotated_filepath(name): + name, base_dir = annotated_filepath(name) + + if base_dir is None: + base_dir = get_input_directory() # fallback path + + filepath = os.path.join(base_dir, name) + return os.path.exists(filepath) + + def add_model_folder_path(folder_name, full_folder_path): global folder_names_and_paths if folder_name in folder_names_and_paths: diff --git a/main.py b/main.py index 02c700ebc..f369b82f3 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ import shutil import threading from comfy.cli_args import args +import comfy.utils if os.name == "nt": import logging @@ -39,14 +40,9 @@ async def run(server, address='', port=8188, verbose=True, call_on_start=None): await asyncio.gather(server.start(address, port, verbose, call_on_start), server.publish_loop()) def hijack_progress(server): - from tqdm.auto import tqdm - orig_func = getattr(tqdm, "update") - def wrapped_func(*args, **kwargs): - pbar = args[0] - v = orig_func(*args, **kwargs) - server.send_sync("progress", { "value": pbar.n, "max": pbar.total}, server.client_id) - return v - setattr(tqdm, "update", wrapped_func) + def hook(value, total): + server.send_sync("progress", { "value": value, "max": total}, server.client_id) + comfy.utils.set_progress_bar_global_hook(hook) def cleanup_temp(): temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp") diff --git a/models/hypernetworks/put_hypernetworks_here b/models/hypernetworks/put_hypernetworks_here new file mode 100644 index 000000000..e69de29bb diff --git a/nodes.py b/nodes.py index 48c3ee9c3..c2bc36855 100644 --- a/nodes.py +++ b/nodes.py @@ -5,6 +5,7 @@ import sys import json import hashlib import traceback +import math from PIL import Image from PIL.PngImagePlugin import PngInfo @@ -16,6 +17,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "co import comfy.diffusers_convert import comfy.samplers +import comfy.sample import comfy.sd import comfy.utils @@ -58,14 +60,44 @@ class ConditioningCombine: def combine(self, conditioning_1, conditioning_2): return (conditioning_1 + conditioning_2, ) +class ConditioningAverage : + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning_to": ("CONDITIONING", ), "conditioning_from": ("CONDITIONING", ), + "conditioning_to_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "addWeighted" + + CATEGORY = "conditioning" + + def addWeighted(self, conditioning_to, conditioning_from, conditioning_to_strength): + out = [] + + if len(conditioning_from) > 1: + print("Warning: ConditioningAverage conditioning_from contains more than 1 cond, only the first one will actually be applied to conditioning_to.") + + cond_from = conditioning_from[0][0] + + for i in range(len(conditioning_to)): + t1 = conditioning_to[i][0] + t0 = cond_from[:,:t1.shape[1]] + if t0.shape[1] < t1.shape[1]: + t0 = torch.cat([t0] + [torch.zeros((1, (t1.shape[1] - t0.shape[1]), t1.shape[2]))], dim=1) + + tw = torch.mul(t1, conditioning_to_strength) + torch.mul(t0, (1.0 - conditioning_to_strength)) + n = [tw, conditioning_to[i][1].copy()] + out.append(n) + return (out, ) + class ConditioningSetArea: @classmethod def INPUT_TYPES(s): return {"required": {"conditioning": ("CONDITIONING", ), - "width": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 64}), - "height": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 64}), - "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), - "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), + "width": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 64, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), }} RETURN_TYPES = ("CONDITIONING",) @@ -79,11 +111,41 @@ class ConditioningSetArea: n = [t[0], t[1].copy()] n[1]['area'] = (height // 8, width // 8, y // 8, x // 8) n[1]['strength'] = strength + n[1]['set_area_to_bounds'] = False n[1]['min_sigma'] = min_sigma n[1]['max_sigma'] = max_sigma c.append(n) return (c, ) +class ConditioningSetMask: + @classmethod + def INPUT_TYPES(s): + return {"required": {"conditioning": ("CONDITIONING", ), + "mask": ("MASK", ), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "set_cond_area": (["default", "mask bounds"],), + }} + RETURN_TYPES = ("CONDITIONING",) + FUNCTION = "append" + + CATEGORY = "conditioning" + + def append(self, conditioning, mask, set_cond_area, strength): + c = [] + set_area_to_bounds = False + if set_cond_area != "default": + set_area_to_bounds = True + if len(mask.shape) < 3: + mask = mask.unsqueeze(0) + for t in conditioning: + n = [t[0], t[1].copy()] + _, h, w = mask.shape + n[1]['mask'] = mask + n[1]['set_area_to_bounds'] = set_area_to_bounds + n[1]['mask_strength'] = strength + c.append(n) + return (c, ) + class VAEDecode: def __init__(self, device="cpu"): self.device = device @@ -126,16 +188,21 @@ class VAEEncode: CATEGORY = "latent" - def encode(self, vae, pixels): - x = (pixels.shape[1] // 64) * 64 - y = (pixels.shape[2] // 64) * 64 + @staticmethod + def vae_encode_crop_pixels(pixels): + x = (pixels.shape[1] // 8) * 8 + y = (pixels.shape[2] // 8) * 8 if pixels.shape[1] != x or pixels.shape[2] != y: - pixels = pixels[:,:x,:y,:] + x_offset = (pixels.shape[1] % 8) // 2 + y_offset = (pixels.shape[2] % 8) // 2 + pixels = pixels[:, x_offset:x + x_offset, y_offset:y + y_offset, :] + return pixels + + def encode(self, vae, pixels): + pixels = self.vae_encode_crop_pixels(pixels) t = vae.encode(pixels[:,:,:,:3]) - return ({"samples":t}, ) - class VAEEncodeTiled: def __init__(self, device="cpu"): self.device = device @@ -149,46 +216,51 @@ class VAEEncodeTiled: CATEGORY = "_for_testing" def encode(self, vae, pixels): - x = (pixels.shape[1] // 64) * 64 - y = (pixels.shape[2] // 64) * 64 - if pixels.shape[1] != x or pixels.shape[2] != y: - pixels = pixels[:,:x,:y,:] + pixels = VAEEncode.vae_encode_crop_pixels(pixels) t = vae.encode_tiled(pixels[:,:,:,:3]) - return ({"samples":t}, ) + class VAEEncodeForInpaint: def __init__(self, device="cpu"): self.device = device @classmethod def INPUT_TYPES(s): - return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", ), "mask": ("MASK", )}} + return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", ), "mask": ("MASK", ), "grow_mask_by": ("INT", {"default": 6, "min": 0, "max": 64, "step": 1}),}} RETURN_TYPES = ("LATENT",) FUNCTION = "encode" CATEGORY = "latent/inpaint" - def encode(self, vae, pixels, mask): - x = (pixels.shape[1] // 64) * 64 - y = (pixels.shape[2] // 64) * 64 - mask = torch.nn.functional.interpolate(mask[None,None,], size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")[0][0] + def encode(self, vae, pixels, mask, grow_mask_by=6): + x = (pixels.shape[1] // 8) * 8 + y = (pixels.shape[2] // 8) * 8 + mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear") pixels = pixels.clone() if pixels.shape[1] != x or pixels.shape[2] != y: - pixels = pixels[:,:x,:y,:] - mask = mask[:x,:y] + x_offset = (pixels.shape[1] % 8) // 2 + y_offset = (pixels.shape[2] % 8) // 2 + pixels = pixels[:,x_offset:x + x_offset, y_offset:y + y_offset,:] + mask = mask[:,:,x_offset:x + x_offset, y_offset:y + y_offset] #grow mask by a few pixels to keep things seamless in latent space - kernel_tensor = torch.ones((1, 1, 6, 6)) - mask_erosion = torch.clamp(torch.nn.functional.conv2d((mask.round())[None], kernel_tensor, padding=3), 0, 1) - m = (1.0 - mask.round()) + if grow_mask_by == 0: + mask_erosion = mask + else: + kernel_tensor = torch.ones((1, 1, grow_mask_by, grow_mask_by)) + padding = math.ceil((grow_mask_by - 1) / 2) + + mask_erosion = torch.clamp(torch.nn.functional.conv2d(mask.round(), kernel_tensor, padding=padding), 0, 1) + + m = (1.0 - mask.round()).squeeze(1) for i in range(3): pixels[:,:,:,i] -= 0.5 pixels[:,:,:,i] *= m pixels[:,:,:,i] += 0.5 t = vae.encode(pixels) - return ({"samples":t, "noise_mask": (mask_erosion[0][:x,:y].round())}, ) + return ({"samples":t, "noise_mask": (mask_erosion[:,:,:x,:y].round())}, ) class CheckpointLoader: @classmethod @@ -542,8 +614,8 @@ class EmptyLatentImage: @classmethod def INPUT_TYPES(s): - return {"required": { "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), - "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), + return {"required": { "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 64})}} RETURN_TYPES = ("LATENT",) FUNCTION = "generate" @@ -581,8 +653,8 @@ class LatentUpscale: @classmethod def INPUT_TYPES(s): return {"required": { "samples": ("LATENT",), "upscale_method": (s.upscale_methods,), - "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), - "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), + "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), "crop": (s.crop_methods,)}} RETURN_TYPES = ("LATENT",) FUNCTION = "upscale" @@ -684,8 +756,8 @@ class LatentCrop: @classmethod def INPUT_TYPES(s): return {"required": { "samples": ("LATENT",), - "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), - "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 64}), + "width": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), + "height": ("INT", {"default": 512, "min": 64, "max": MAX_RESOLUTION, "step": 8}), "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), }} @@ -710,16 +782,6 @@ class LatentCrop: new_width = width // 8 to_x = new_width + x to_y = new_height + y - def enforce_image_dim(d, to_d, max_d): - if to_d > max_d: - leftover = (to_d - max_d) % 8 - to_d = max_d - d -= leftover - return (d, to_d) - - #make sure size is always multiple of 64 - x, to_x = enforce_image_dim(x, to_x, samples.shape[3]) - y, to_y = enforce_image_dim(y, to_y, samples.shape[2]) s['samples'] = samples[:,:,y:to_y, x:to_x] return (s,) @@ -739,79 +801,27 @@ class SetLatentNoiseMask: s["noise_mask"] = mask return (s,) - def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): - latent_image = latent["samples"] - noise_mask = None device = comfy.model_management.get_torch_device() + latent_image = latent["samples"] if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") else: - batch_index = 0 - if "batch_index" in latent: - batch_index = latent["batch_index"] - - generator = torch.manual_seed(seed) - for i in range(batch_index): - noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") - noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu") + skip = latent["batch_index"] if "batch_index" in latent else 0 + noise = comfy.sample.prepare_noise(latent_image, seed, skip) + noise_mask = None if "noise_mask" in latent: - noise_mask = latent['noise_mask'] - noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear") - noise_mask = noise_mask.round() - noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1) - noise_mask = torch.cat([noise_mask] * noise.shape[0]) - noise_mask = noise_mask.to(device) + noise_mask = latent["noise_mask"] - real_model = None - comfy.model_management.load_model_gpu(model) - real_model = model.model - - noise = noise.to(device) - latent_image = latent_image.to(device) - - positive_copy = [] - negative_copy = [] - - control_nets = [] - def get_models(cond): - models = [] - for c in cond: - if 'control' in c[1]: - models += [c[1]['control']] - if 'gligen' in c[1]: - models += [c[1]['gligen'][1]] - return models - - for p in positive: - t = p[0] - if t.shape[0] < noise.shape[0]: - t = torch.cat([t] * noise.shape[0]) - t = t.to(device) - positive_copy += [[t] + p[1:]] - for n in negative: - t = n[0] - if t.shape[0] < noise.shape[0]: - t = torch.cat([t] * noise.shape[0]) - t = t.to(device) - negative_copy += [[t] + n[1:]] - - models = get_models(positive) + get_models(negative) - comfy.model_management.load_controlnet_gpu(models) - - if sampler_name in comfy.samplers.KSampler.SAMPLERS: - sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) - else: - #other samplers - pass - - samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask) - samples = samples.cpu() - for m in models: - m.cleanup() + pbar = comfy.utils.ProgressBar(steps) + def callback(step, x0, x, total_steps): + pbar.update_absolute(step + 1, total_steps) + samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, + denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step, + force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback) out = latent.copy() out["samples"] = samples return (out, ) @@ -974,8 +984,7 @@ class LoadImage: RETURN_TYPES = ("IMAGE", "MASK") FUNCTION = "load_image" def load_image(self, image): - input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image) i = Image.open(image_path) image = i.convert("RGB") image = np.array(image).astype(np.float32) / 255.0 @@ -989,20 +998,27 @@ class LoadImage: @classmethod def IS_CHANGED(s, image): - input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) return m.digest().hex() + @classmethod + def VALIDATE_INPUTS(s, image): + if not folder_paths.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + return True + class LoadImageMask: + _color_channels = ["alpha", "red", "green", "blue"] @classmethod def INPUT_TYPES(s): input_dir = folder_paths.get_input_directory() return {"required": {"image": (sorted(os.listdir(input_dir)), ), - "channel": (["alpha", "red", "green", "blue"], ),} + "channel": (s._color_channels, ),} } CATEGORY = "mask" @@ -1010,8 +1026,7 @@ class LoadImageMask: RETURN_TYPES = ("MASK",) FUNCTION = "load_image" def load_image(self, image, channel): - input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image) i = Image.open(image_path) if i.getbands() != ("R", "G", "B", "A"): i = i.convert("RGBA") @@ -1028,13 +1043,22 @@ class LoadImageMask: @classmethod def IS_CHANGED(s, image, channel): - input_dir = folder_paths.get_input_directory() - image_path = os.path.join(input_dir, image) + image_path = folder_paths.get_annotated_filepath(image) m = hashlib.sha256() with open(image_path, 'rb') as f: m.update(f.read()) return m.digest().hex() + @classmethod + def VALIDATE_INPUTS(s, image, channel): + if not folder_paths.exists_annotated_filepath(image): + return "Invalid image file: {}".format(image) + + if channel not in s._color_channels: + return "Invalid color channel: {}".format(channel) + + return True + class ImageScale: upscale_methods = ["nearest-exact", "bilinear", "area"] crop_methods = ["disabled", "center"] @@ -1079,10 +1103,10 @@ class ImagePadForOutpaint: return { "required": { "image": ("IMAGE",), - "left": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), - "top": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), - "right": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), - "bottom": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 64}), + "left": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "top": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "right": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), + "bottom": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), "feathering": ("INT", {"default": 40, "min": 0, "max": MAX_RESOLUTION, "step": 1}), } } @@ -1154,8 +1178,10 @@ NODE_CLASS_MAPPINGS = { "ImageScale": ImageScale, "ImageInvert": ImageInvert, "ImagePadForOutpaint": ImagePadForOutpaint, + "ConditioningAverage ": ConditioningAverage , "ConditioningCombine": ConditioningCombine, "ConditioningSetArea": ConditioningSetArea, + "ConditioningSetMask": ConditioningSetMask, "KSamplerAdvanced": KSamplerAdvanced, "SetLatentNoiseMask": SetLatentNoiseMask, "LatentComposite": LatentComposite, @@ -1204,7 +1230,9 @@ NODE_DISPLAY_NAME_MAPPINGS = { "CLIPTextEncode": "CLIP Text Encode (Prompt)", "CLIPSetLastLayer": "CLIP Set Last Layer", "ConditioningCombine": "Conditioning (Combine)", + "ConditioningAverage ": "Conditioning (Average)", "ConditioningSetArea": "Conditioning (Set Area)", + "ConditioningSetMask": "Conditioning (Set Mask)", "ControlNetApply": "Apply ControlNet", # Latent "VAEEncodeForInpaint": "VAE Encode (for Inpainting)", @@ -1268,6 +1296,7 @@ def load_custom_nodes(): def init_custom_nodes(): load_custom_nodes() + load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_hypernetwork.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_upscale_model.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_post_processing.py")) load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_mask.py")) diff --git a/notebooks/comfyui_colab.ipynb b/notebooks/comfyui_colab.ipynb index c1982d8be..fecfa6707 100644 --- a/notebooks/comfyui_colab.ipynb +++ b/notebooks/comfyui_colab.ipynb @@ -47,7 +47,7 @@ " !git pull\n", "\n", "!echo -= Install dependencies =-\n", - "!pip install xformers!=0.0.18 -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118" + "!pip install xformers!=0.0.18 -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu118 --extra-index-url https://download.pytorch.org/whl/cu117" ] }, { diff --git a/server.py b/server.py index b5403670f..1c5c17916 100644 --- a/server.py +++ b/server.py @@ -112,13 +112,20 @@ class PromptServer(): @routes.post("/upload/image") async def upload_image(request): - upload_dir = folder_paths.get_input_directory() + post = await request.post() + image = post.get("image") + + if post.get("type") is None: + upload_dir = folder_paths.get_input_directory() + elif post.get("type") == "input": + upload_dir = folder_paths.get_input_directory() + elif post.get("type") == "temp": + upload_dir = folder_paths.get_temp_directory() + elif post.get("type") == "output": + upload_dir = folder_paths.get_output_directory() if not os.path.exists(upload_dir): os.makedirs(upload_dir) - - post = await request.post() - image = post.get("image") if image and image.file: filename = image.filename diff --git a/web/extensions/core/colorPalette.js b/web/extensions/core/colorPalette.js index 41541a8d8..2f2238a2b 100644 --- a/web/extensions/core/colorPalette.js +++ b/web/extensions/core/colorPalette.js @@ -232,10 +232,27 @@ app.registerExtension({ "name": "My Color Palette", "colors": { "node_slot": { + }, + "litegraph_base": { + }, + "comfy_base": { } } }; + // Copy over missing keys from default color palette + const defaultColorPalette = colorPalettes[defaultColorPaletteId]; + for (const key in defaultColorPalette.colors.litegraph_base) { + if (!colorPalette.colors.litegraph_base[key]) { + colorPalette.colors.litegraph_base[key] = ""; + } + } + for (const key in defaultColorPalette.colors.comfy_base) { + if (!colorPalette.colors.comfy_base[key]) { + colorPalette.colors.comfy_base[key] = ""; + } + } + return completeColorPalette(colorPalette); }; diff --git a/web/extensions/core/slotDefaults.js b/web/extensions/core/slotDefaults.js index 3ec605900..9401678b0 100644 --- a/web/extensions/core/slotDefaults.js +++ b/web/extensions/core/slotDefaults.js @@ -6,6 +6,7 @@ app.registerExtension({ name: "Comfy.SlotDefaults", suggestionsNumber: null, init() { + LiteGraph.search_filter_enabled = true; LiteGraph.middle_click_slot_add_default_node = true; this.suggestionsNumber = app.ui.settings.addSetting({ id: "Comfy.NodeSuggestions.number", @@ -43,6 +44,14 @@ app.registerExtension({ } if (this.slot_types_default_out[type].includes(nodeId)) continue; this.slot_types_default_out[type].push(nodeId); + + // Input types have to be stored as lower case + // Store each node that can handle this input type + const lowerType = type.toLocaleLowerCase(); + if (!(lowerType in LiteGraph.registered_slot_in_types)) { + LiteGraph.registered_slot_in_types[lowerType] = { nodes: [] }; + } + LiteGraph.registered_slot_in_types[lowerType].nodes.push(nodeType.comfyClass); } var outputs = nodeData["output"]; @@ -53,6 +62,16 @@ app.registerExtension({ } this.slot_types_default_in[type].push(nodeId); + + // Store each node that can handle this output type + if (!(type in LiteGraph.registered_slot_out_types)) { + LiteGraph.registered_slot_out_types[type] = { nodes: [] }; + } + LiteGraph.registered_slot_out_types[type].nodes.push(nodeType.comfyClass); + + if(!LiteGraph.slot_types_out.includes(type)) { + LiteGraph.slot_types_out.push(type); + } } var maxNum = this.suggestionsNumber.value; this.setDefaults(maxNum); diff --git a/web/lib/litegraph.core.js b/web/lib/litegraph.core.js index 4189a48c0..2bc6af0c3 100644 --- a/web/lib/litegraph.core.js +++ b/web/lib/litegraph.core.js @@ -3628,6 +3628,18 @@ return size; }; + LGraphNode.prototype.inResizeCorner = function(canvasX, canvasY) { + var rows = this.outputs ? this.outputs.length : 1; + var outputs_offset = (this.constructor.slot_start_y || 0) + rows * LiteGraph.NODE_SLOT_HEIGHT; + return isInsideRectangle(canvasX, + canvasY, + this.pos[0] + this.size[0] - 15, + this.pos[1] + Math.max(this.size[1] - 15, outputs_offset), + 20, + 20 + ); + } + /** * returns all the info available about a property of this node. * @@ -5877,14 +5889,7 @@ LGraphNode.prototype.executeAction = function(action) if ( !this.connecting_node && !node.flags.collapsed && !this.live_mode ) { //Search for corner for resize if ( !skip_action && - node.resizable !== false && - isInsideRectangle( e.canvasX, - e.canvasY, - node.pos[0] + node.size[0] - 5, - node.pos[1] + node.size[1] - 5, - 10, - 10 - ) + node.resizable !== false && node.inResizeCorner(e.canvasX, e.canvasY) ) { this.graph.beforeChange(); this.resizing_node = node; @@ -6424,16 +6429,7 @@ LGraphNode.prototype.executeAction = function(action) //Search for corner if (this.canvas) { - if ( - isInsideRectangle( - e.canvasX, - e.canvasY, - node.pos[0] + node.size[0] - 5, - node.pos[1] + node.size[1] - 5, - 5, - 5 - ) - ) { + if (node.inResizeCorner(e.canvasX, e.canvasY)) { this.canvas.style.cursor = "se-resize"; } else { this.canvas.style.cursor = "crosshair"; @@ -9953,11 +9949,11 @@ LGraphNode.prototype.executeAction = function(action) } break; case "slider": - var range = w.options.max - w.options.min; + var old_value = w.value; var nvalue = Math.clamp((x - 15) / (widget_width - 30), 0, 1); if(w.options.read_only) break; w.value = w.options.min + (w.options.max - w.options.min) * nvalue; - if (w.callback) { + if (old_value != w.value) { setTimeout(function() { inner_value_change(w, w.value); }, 20); @@ -10044,7 +10040,7 @@ LGraphNode.prototype.executeAction = function(action) if (event.click_time < 200 && delta == 0) { this.prompt("Value",w.value,function(v) { // check if v is a valid equation or a number - if (/^[0-9+\-*/()\s]+$/.test(v)) { + if (/^[0-9+\-*/()\s]+|\d+\.\d+$/.test(v)) { try {//solve the equation if possible v = eval(v); } catch (e) { } diff --git a/web/scripts/api.js b/web/scripts/api.js index 2b90c2abc..d29faa5ba 100644 --- a/web/scripts/api.js +++ b/web/scripts/api.js @@ -35,7 +35,7 @@ class ComfyApi extends EventTarget { } let opened = false; - let existingSession = sessionStorage["Comfy.SessionId"] || ""; + let existingSession = window.name; if (existingSession) { existingSession = "?clientId=" + existingSession; } @@ -75,7 +75,7 @@ class ComfyApi extends EventTarget { case "status": if (msg.data.sid) { this.clientId = msg.data.sid; - sessionStorage["Comfy.SessionId"] = this.clientId; + window.name = this.clientId; } this.dispatchEvent(new CustomEvent("status", { detail: msg.data.status })); break; diff --git a/web/scripts/app.js b/web/scripts/app.js index 6e35f9a49..e2b12a032 100644 --- a/web/scripts/app.js +++ b/web/scripts/app.js @@ -20,6 +20,12 @@ export class ComfyApp { */ #processingQueue = false; + /** + * Content Clipboard + * @type {serialized node object} + */ + static clipspace = null; + constructor() { this.ui = new ComfyUI(this); @@ -130,6 +136,83 @@ export class ComfyApp { ); } } + + options.push( + { + content: "Copy (Clipspace)", + callback: (obj) => { + var widgets = null; + if(this.widgets) { + widgets = this.widgets.map(({ type, name, value }) => ({ type, name, value })); + } + + let img = new Image(); + var imgs = undefined; + if(this.imgs != undefined) { + img.src = this.imgs[0].src; + imgs = [img]; + } + + ComfyApp.clipspace = { + 'widgets': widgets, + 'imgs': imgs, + 'original_imgs': imgs, + 'images': this.images + }; + } + }); + + if(ComfyApp.clipspace != null) { + options.push( + { + content: "Paste (Clipspace)", + callback: () => { + if(ComfyApp.clipspace != null) { + if(ComfyApp.clipspace.widgets != null && this.widgets != null) { + ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => { + const prop = Object.values(this.widgets).find(obj => obj.type === type && obj.name === name); + if (prop) { + prop.callback(value); + } + }); + } + + // image paste + if(ComfyApp.clipspace.imgs != undefined && this.imgs != undefined && this.widgets != null) { + var filename = ""; + if(this.images && ComfyApp.clipspace.images) { + this.images = ComfyApp.clipspace.images; + } + + if(ComfyApp.clipspace.images != undefined) { + const clip_image = ComfyApp.clipspace.images[0]; + if(clip_image.subfolder != '') + filename = `${clip_image.subfolder}/`; + filename += `${clip_image.filename} [${clip_image.type}]`; + } + else if(ComfyApp.clipspace.widgets != undefined) { + const index_in_clip = ComfyApp.clipspace.widgets.findIndex(obj => obj.name === 'image'); + if(index_in_clip >= 0) { + filename = `${ComfyApp.clipspace.widgets[index_in_clip].value}`; + } + } + + const index = this.widgets.findIndex(obj => obj.name === 'image'); + if(index >= 0 && filename != "" && ComfyApp.clipspace.imgs != undefined) { + this.imgs = ComfyApp.clipspace.imgs; + + this.widgets[index].value = filename; + if(this.widgets_values != undefined) { + this.widgets_values[index] = filename; + } + } + } + this.trigger('changed'); + } + } + } + ); + } }; } @@ -180,6 +263,34 @@ export class ComfyApp { */ #addDrawBackgroundHandler(node) { const app = this; + + function getImageTop(node) { + let shiftY; + if (node.imageOffset != null) { + shiftY = node.imageOffset; + } else { + if (node.widgets?.length) { + const w = node.widgets[node.widgets.length - 1]; + shiftY = w.last_y; + if (w.computeSize) { + shiftY += w.computeSize()[1] + 4; + } else { + shiftY += LiteGraph.NODE_WIDGET_HEIGHT + 4; + } + } else { + shiftY = node.computeSize()[1]; + } + } + return shiftY; + } + + node.prototype.setSizeForImage = function () { + const minHeight = getImageTop(this) + 220; + if (this.size[1] < minHeight) { + this.setSize([this.size[0], minHeight]); + } + }; + node.prototype.onDrawBackground = function (ctx) { if (!this.flags.collapsed) { const output = app.nodeOutputs[this.id + ""]; @@ -200,9 +311,7 @@ export class ComfyApp { ).then((imgs) => { if (this.images === output.images) { this.imgs = imgs.filter(Boolean); - if (this.size[1] < 100) { - this.size[1] = 250; - } + this.setSizeForImage?.(); app.graph.setDirtyCanvas(true); } }); @@ -227,12 +336,7 @@ export class ComfyApp { this.imageIndex = imageIndex = 0; } - let shiftY; - if (this.imageOffset != null) { - shiftY = this.imageOffset; - } else { - shiftY = this.computeSize()[1]; - } + const shiftY = getImageTop(this); let dw = this.size[0]; let dh = this.size[1]; @@ -888,8 +992,10 @@ export class ComfyApp { loadGraphData(graphData) { this.clean(); + let reset_invalid_values = false; if (!graphData) { graphData = structuredClone(defaultGraph); + reset_invalid_values = true; } const missingNodeTypes = []; @@ -975,6 +1081,13 @@ export class ComfyApp { } } } + if (reset_invalid_values) { + if (widget.type == "combo") { + if (!widget.options.values.includes(widget.value) && widget.options.values.length > 0) { + widget.value = widget.options.values[0]; + } + } + } } } diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js index 2acc5f2c0..cd471bc93 100644 --- a/web/scripts/widgets.js +++ b/web/scripts/widgets.js @@ -136,9 +136,11 @@ function addMultilineWidget(node, name, opts, app) { left: `${t.a * margin + t.e}px`, top: `${t.d * (y + widgetHeight - margin - 3) + t.f}px`, width: `${(widgetWidth - margin * 2 - 3) * t.a}px`, + background: (!node.color)?'':node.color, height: `${(this.parent.inputHeight - margin * 2 - 4) * t.d}px`, position: "absolute", - zIndex: 1, + color: (!node.color)?'':'white', + zIndex: app.graph._nodes.indexOf(node), fontSize: `${t.d * 10.0}px`, }); this.inputEl.hidden = !visible; @@ -259,17 +261,13 @@ export const ComfyWidgets = { let uploadWidget; function showImage(name) { - // Position the image somewhere sensible - if (!node.imageOffset) { - node.imageOffset = uploadWidget.last_y ? uploadWidget.last_y + 25 : 75; - } - const img = new Image(); img.onload = () => { node.imgs = [img]; app.graph.setDirtyCanvas(true); }; img.src = `/view?filename=${name}&type=input`; + node.setSizeForImage?.(); } // Add our own callback to the combo widget to render an image when it changes diff --git a/web/style.css b/web/style.css index 2cbf02c0c..df220cc02 100644 --- a/web/style.css +++ b/web/style.css @@ -120,7 +120,7 @@ body { .comfy-menu > button, .comfy-menu-btns button, .comfy-menu .comfy-list button, -.comfy-modal button{ +.comfy-modal button { color: var(--input-text); background-color: var(--comfy-input-bg); border-radius: 8px; @@ -129,6 +129,15 @@ body { margin-top: 2px; } +.comfy-menu > button:hover, +.comfy-menu-btns button:hover, +.comfy-menu .comfy-list button:hover, +.comfy-modal button:hover, +.comfy-settings-btn:hover { + filter: brightness(1.2); + cursor: pointer; +} + .comfy-menu span.drag-handle { width: 10px; height: 20px; @@ -248,8 +257,11 @@ button.comfy-queue-btn { } } +/* Input popup */ + .graphdialog { min-height: 1em; + background-color: var(--comfy-menu-bg); } .graphdialog .name { @@ -273,15 +285,66 @@ button.comfy-queue-btn { border-radius: 12px 0 0 12px; } +/* Context menu */ + .litegraph .litemenu-entry.has_submenu { position: relative; padding-right: 20px; - } +} - .litemenu-entry.has_submenu::after { +.litemenu-entry.has_submenu::after { content: ">"; position: absolute; top: 0; right: 2px; - } - \ No newline at end of file +} + +.litegraph.litecontextmenu, +.litegraph.litecontextmenu.dark { + z-index: 9999 !important; + background-color: var(--comfy-menu-bg) !important; + filter: brightness(95%); +} + +.litegraph.litecontextmenu .litemenu-entry:hover:not(.disabled):not(.separator) { + background-color: var(--comfy-menu-bg) !important; + filter: brightness(155%); + color: var(--input-text); +} + +.litegraph.litecontextmenu .litemenu-entry.submenu, +.litegraph.litecontextmenu.dark .litemenu-entry.submenu { + background-color: var(--comfy-menu-bg) !important; + color: var(--input-text); +} + +.litegraph.litecontextmenu input { + background-color: var(--comfy-input-bg) !important; + color: var(--input-text) !important; +} + +/* Search box */ + +.litegraph.litesearchbox { + z-index: 9999 !important; + background-color: var(--comfy-menu-bg) !important; + overflow: hidden; +} + +.litegraph.litesearchbox input, +.litegraph.litesearchbox select { + background-color: var(--comfy-input-bg) !important; + color: var(--input-text); +} + +.litegraph.lite-search-item { + color: var(--input-text); + background-color: var(--comfy-input-bg); + filter: brightness(80%); + padding-left: 0.2em; +} + +.litegraph.lite-search-item.generic_type { + color: var(--input-text); + filter: brightness(50%); +}