From eecd69b53a896343775bcb02a4f8349e7442ffd1 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 29 Apr 2024 20:00:47 -0400 Subject: [PATCH 1/8] Add a SamplerLCMUpscale node. This sampler is an LCM sampler that upscales the latent during sampling. It can be used to generate at a higher resolution with an LCM model very quickly. To try it use it with a basic 5 step LCM workflow with scale_ratio 1.5 or 2.0 --- comfy_extras/nodes_advanced_samplers.py | 61 +++++++++++++++++++++++++ nodes.py | 1 + 2 files changed, 62 insertions(+) create mode 100644 comfy_extras/nodes_advanced_samplers.py diff --git a/comfy_extras/nodes_advanced_samplers.py b/comfy_extras/nodes_advanced_samplers.py new file mode 100644 index 000000000..d973def81 --- /dev/null +++ b/comfy_extras/nodes_advanced_samplers.py @@ -0,0 +1,61 @@ +import comfy.samplers +import comfy.utils +import torch +import numpy as np +from tqdm.auto import trange, tqdm +import math + + +@torch.no_grad() +def sample_lcm_upscale(model, x, sigmas, extra_args=None, callback=None, disable=None, total_upscale=2.0, upscale_method="bislerp", upscale_steps=None): + extra_args = {} if extra_args is None else extra_args + + if upscale_steps is None: + upscale_steps = max(len(sigmas) // 2 + 1, 2) + else: + upscale_steps += 1 + upscale_steps = min(upscale_steps, len(sigmas) + 1) + + upscales = np.linspace(1.0, total_upscale, upscale_steps)[1:] + + orig_shape = x.size() + s_in = x.new_ones([x.shape[0]]) + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + + x = denoised + if i < len(upscales): + x = comfy.utils.common_upscale(x, round(orig_shape[-1] * upscales[i]), round(orig_shape[-2] * upscales[i]), upscale_method, "disabled") + + if sigmas[i + 1] > 0: + x += sigmas[i + 1] * torch.randn_like(x) + return x + + +class SamplerLCMUpscale: + upscale_methods = ["bislerp", "nearest-exact", "bilinear", "area", "bicubic"] + + @classmethod + def INPUT_TYPES(s): + return {"required": + {"scale_ratio": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 20.0, "step": 0.01}), + "scale_steps": ("INT", {"default": -1, "min": -1, "max": 1000, "step": 1}), + "upscale_method": (s.upscale_methods,), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, scale_ratio, scale_steps, upscale_method): + if scale_steps < 0: + scale_steps = None + sampler = comfy.samplers.KSAMPLER(sample_lcm_upscale, extra_options={"total_upscale": scale_ratio, "upscale_steps": scale_steps, "upscale_method": upscale_method}) + return (sampler, ) + +NODE_CLASS_MAPPINGS = { + "SamplerLCMUpscale": SamplerLCMUpscale, +} diff --git a/nodes.py b/nodes.py index 1651a71cd..acad256f8 100644 --- a/nodes.py +++ b/nodes.py @@ -1945,6 +1945,7 @@ def init_custom_nodes(): "nodes_pag.py", "nodes_align_your_steps.py", "nodes_attention_multiply.py", + "nodes_advanced_samplers.py", ] import_failed = [] From bb8b48a260256bd7461f45de0397d15df822e5a5 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 30 Apr 2024 20:11:34 -0400 Subject: [PATCH 2/8] Update Readme. --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 8ca7e4376..d392cd802 100644 --- a/README.md +++ b/README.md @@ -197,9 +197,7 @@ To use a textual inversion concepts/embeddings in a text prompt put them in the ## How to increase generation speed? -Make sure you use the regular loaders/Load Checkpoint node to load checkpoints. It will auto pick the right settings depending on your GPU. - -You can set this command line setting to disable the upcasting to fp32 in some cross attention operations which will increase your speed. Note that this will very likely give you black images on SD2.x models. If you use xformers or pytorch attention this option does not do anything. +On non Nvidia hardware you can set this command line setting to disable the upcasting to fp32 in some cross attention operations which will increase your speed. Note that this will very likely give you black images on SD2.x models. If you use xformers or pytorch attention this option does not do anything. ```--dont-upcast-attention``` From bacce529fbedeafc826731f4281f325f4eae3f85 Mon Sep 17 00:00:00 2001 From: Garrett Sutula Date: Tue, 30 Apr 2024 20:17:02 -0400 Subject: [PATCH 3/8] Add TLS Support (#3312) * Add TLS Support * Add to readme * Add guidance for windows users on generating certificates * Add guidance for windows users on generating certificates * Fix typo --- README.md | 8 ++++++++ comfy/cli_args.py | 2 ++ main.py | 4 ++-- server.py | 15 ++++++++++++--- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d392cd802..eb07540cc 100644 --- a/README.md +++ b/README.md @@ -207,6 +207,14 @@ Use ```--preview-method auto``` to enable previews. The default installation includes a fast latent preview method that's low-resolution. To enable higher-quality previews with [TAESD](https://github.com/madebyollin/taesd), download the [taesd_decoder.pth](https://github.com/madebyollin/taesd/raw/main/taesd_decoder.pth) (for SD1.x and SD2.x) and [taesdxl_decoder.pth](https://github.com/madebyollin/taesd/raw/main/taesdxl_decoder.pth) (for SDXL) models and place them in the `models/vae_approx` folder. Once they're installed, restart ComfyUI to enable high-quality previews. +## How to use TLS/SSL? +Generate a self-signed certificate (not appropriate for shared/production use) and key by running the command: `openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -sha256 -days 3650 -nodes -subj "/C=XX/ST=StateName/L=CityName/O=CompanyName/OU=CompanySectionName/CN=CommonNameOrHostname"` + +Use `--tls-keyfile key.pem --tls-certfile cert.pem` to enable TLS/SSL, the app will now be accessible with `https://...` instead of `http://...`. + +> Note: Windows users can use [alexisrolland/docker-openssl](https://github.com/alexisrolland/docker-openssl) or one of the [3rd party binary distributions](https://wiki.openssl.org/index.php/Binaries) to run the command example above. +

If you use a container, note that the volume mount `-v` can be a relative path so `... -v ".\:/openssl-certs" ...` would create the key & cert files in the current directory of your command prompt or powershell terminal. + ## Support and dev channel [Matrix space: #comfyui_space:matrix.org](https://app.element.io/#/room/%23comfyui_space%3Amatrix.org) (it's like discord but open source). diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 353bb51e7..569c79380 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -35,6 +35,8 @@ parser = argparse.ArgumentParser() parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") +parser.add_argument("--tls-keyfile", type=str, help="Path to TLS (SSL) key file. Enables TLS, makes app accessible at https://... requires --tls-certfile to function") +parser.add_argument("--tls-certfile", type=str, help="Path to TLS (SSL) certificate file. Enables TLS, makes app accessible at https://... requires --tls-keyfile to function") parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--max-upload-size", type=float, default=100, help="Set the maximum upload size in MB.") diff --git a/main.py b/main.py index b3a3ebea8..a374f2b12 100644 --- a/main.py +++ b/main.py @@ -243,11 +243,11 @@ if __name__ == "__main__": call_on_start = None if args.auto_launch: - def startup_server(address, port): + def startup_server(scheme, address, port): import webbrowser if os.name == 'nt' and address == '0.0.0.0': address = '127.0.0.1' - webbrowser.open(f"http://{address}:{port}") + webbrowser.open(f"{scheme}://{address}:{port}") call_on_start = startup_server try: diff --git a/server.py b/server.py index 5642bd5e2..bab3b0600 100644 --- a/server.py +++ b/server.py @@ -11,6 +11,7 @@ import urllib import json import glob import struct +import ssl from PIL import Image, ImageOps from PIL.PngImagePlugin import PngInfo from io import BytesIO @@ -623,14 +624,22 @@ class PromptServer(): async def start(self, address, port, verbose=True, call_on_start=None): runner = web.AppRunner(self.app, access_log=None) await runner.setup() - site = web.TCPSite(runner, address, port) + ssl_ctx = None + scheme = "http" + if args.tls_keyfile and args.tls_certfile: + ssl_ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_TLS_SERVER, verify_mode=ssl.CERT_NONE) + ssl_ctx.load_cert_chain(certfile=args.tls_certfile, + keyfile=args.tls_keyfile) + scheme = "https" + + site = web.TCPSite(runner, address, port, ssl_context=ssl_ctx) await site.start() if verbose: logging.info("Starting server\n") - logging.info("To see the GUI go to: http://{}:{}".format(address, port)) + logging.info("To see the GUI go to: {}://{}:{}".format(scheme, address, port)) if call_on_start is not None: - call_on_start(address, port) + call_on_start(scheme, address, port) def add_on_prompt_handler(self, handler): self.on_prompt_handlers.append(handler) From 2aed53c4ac78d842a2e984d23343334a29ed8562 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 30 Apr 2024 21:23:40 -0400 Subject: [PATCH 4/8] Workaround xformers bug. --- comfy/ldm/modules/attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index f116efee3..d51a2fae1 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -297,8 +297,8 @@ def attention_split(q, k, v, heads, mask=None): BROKEN_XFORMERS = False try: x_vers = xformers.__version__ - #I think 0.0.23 is also broken (q with bs bigger than 65535 gives CUDA error) - BROKEN_XFORMERS = x_vers.startswith("0.0.21") or x_vers.startswith("0.0.22") or x_vers.startswith("0.0.23") + # XFormers bug confirmed on all versions from 0.0.21 to 0.0.26 (q with bs bigger than 65535 gives CUDA error) + BROKEN_XFORMERS = x_vers.startswith("0.0.2") and not x_vers.startswith("0.0.20") except: pass From 94d5a128010f5956d0e6d1ea905b60890bd3bad7 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 1 May 2024 16:57:10 -0400 Subject: [PATCH 5/8] Don't load the model in SDTurboScheduler --- comfy_extras/nodes_custom_sampler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 7afdbf4bf..f3dff000e 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -107,8 +107,7 @@ class SDTurboScheduler: def get_sigmas(self, model, steps, denoise): start_step = 10 - int(10 * denoise) timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps] - comfy.model_management.load_models_gpu([model]) - sigmas = model.model.model_sampling.sigma(timesteps) + sigmas = model.get_model_object("model_sampling").sigma(timesteps) sigmas = torch.cat([sigmas, sigmas.new_zeros([1])]) return (sigmas, ) From f81a6fade86bfaec91a115284745d9f95b74e265 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 1 May 2024 17:05:30 -0400 Subject: [PATCH 6/8] Fix some edge cases with samplers and arrays with a single sigma. --- comfy/k_diffusion/sampling.py | 17 +++++++++++++++++ comfy/samplers.py | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 7af016829..f9b281894 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -527,6 +527,9 @@ def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, @torch.no_grad() def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): """DPM-Solver++ (stochastic).""" + if len(sigmas) <= 1: + return x + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() seed = extra_args.get("seed", None) noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler @@ -595,6 +598,8 @@ def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=No @torch.no_grad() def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): """DPM-Solver++(2M) SDE.""" + if len(sigmas) <= 1: + return x if solver_type not in {'heun', 'midpoint'}: raise ValueError('solver_type must be \'heun\' or \'midpoint\'') @@ -642,6 +647,9 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): """DPM-Solver++(3M) SDE.""" + if len(sigmas) <= 1: + return x + seed = extra_args.get("seed", None) sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler @@ -690,18 +698,27 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl @torch.no_grad() def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + if len(sigmas) <= 1: + return x + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler) @torch.no_grad() def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): + if len(sigmas) <= 1: + return x + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type) @torch.no_grad() def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): + if len(sigmas) <= 1: + return x + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r) diff --git a/comfy/samplers.py b/comfy/samplers.py index b12b0fd1b..29962a916 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -539,6 +539,9 @@ class KSAMPLER(Sampler): def ksampler(sampler_name, extra_options={}, inpaint_options={}): if sampler_name == "dpm_fast": def dpm_fast_function(model, noise, sigmas, extra_args, callback, disable): + if len(sigmas) <= 1: + return noise + sigma_min = sigmas[-1] if sigma_min == 0: sigma_min = sigmas[-2] @@ -547,6 +550,9 @@ def ksampler(sampler_name, extra_options={}, inpaint_options={}): sampler_function = dpm_fast_function elif sampler_name == "dpm_adaptive": def dpm_adaptive_function(model, noise, sigmas, extra_args, callback, disable, **extra_options): + if len(sigmas) <= 1: + return noise + sigma_min = sigmas[-1] if sigma_min == 0: sigma_min = sigmas[-2] From a56d02efc7fc111f00ba9be9d01f2945d41552d6 Mon Sep 17 00:00:00 2001 From: Simon Lui <502929+simonlui@users.noreply.github.com> Date: Thu, 2 May 2024 00:26:50 -0700 Subject: [PATCH 7/8] Change torch.xpu to ipex.optimize, xpu device initialization and remove workaround for text node issue from older IPEX. (#3388) --- comfy/model_management.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 537df41ed..913b6844f 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -83,7 +83,7 @@ def get_torch_device(): return torch.device("cpu") else: if is_intel_xpu(): - return torch.device("xpu") + return torch.device("xpu", torch.xpu.current_device()) else: return torch.device(torch.cuda.current_device()) @@ -304,7 +304,7 @@ class LoadedModel: raise e if is_intel_xpu() and not args.disable_ipex_optimize: - self.real_model = torch.xpu.optimize(self.real_model.eval(), inplace=True, auto_kernel_selection=True, graph_mode=True) + self.real_model = ipex.optimize(self.real_model.eval(), graph_mode=True, concat_linear=True) self.weights_loaded = True return self.real_model @@ -552,8 +552,6 @@ def text_encoder_device(): if args.gpu_only: return get_torch_device() elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM: - if is_intel_xpu(): - return torch.device("cpu") if should_use_fp16(prioritize_performance=False): return get_torch_device() else: From 89d0e9abeb31e44cccef46537cd10d8812130ef3 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 2 May 2024 03:34:19 -0400 Subject: [PATCH 8/8] Fix int widgets rounding. --- web/scripts/widgets.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js index 00c91914d..8f1603253 100644 --- a/web/scripts/widgets.js +++ b/web/scripts/widgets.js @@ -229,7 +229,7 @@ function createIntWidget(node, inputName, inputData, app, isSeedInput) { val, function (v) { const s = this.options.step / 10; - this.value = Math.round(v / s) * s; + this.value = Math.round((v - this.options.min) / s) * s + this.options.min; }, config ),