From 37713e3b0acfc576f4eafc0b47582374ab5987dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:22:14 +0800 Subject: [PATCH 01/24] Add basic XPU device support closed #387 --- comfy/model_management.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 052dfb775..f0b8be55e 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -5,6 +5,7 @@ LOW_VRAM = 2 NORMAL_VRAM = 3 HIGH_VRAM = 4 MPS = 5 +XPU = 6 accelerate_enabled = False vram_state = NORMAL_VRAM @@ -85,10 +86,17 @@ try: except: pass +try: + import intel_extension_for_pytorch + if torch.xpu.is_available(): + vram_state = XPU +except: + pass + if forced_cpu: vram_state = CPU -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state]) +print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state]) current_loaded_model = None @@ -141,6 +149,9 @@ def load_model_gpu(model): mps_device = torch.device("mps") real_model.to(mps_device) pass + elif vram_state == XPU: + real_model.to("xpu") + pass elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: model_accelerated = False real_model.cuda() @@ -189,6 +200,8 @@ def unload_if_low_vram(model): def get_torch_device(): if vram_state == MPS: return torch.device("mps") + if vram_state == XPU: + return torch.device("xpu") if vram_state == CPU: return torch.device("cpu") else: @@ -228,6 +241,9 @@ def get_free_memory(dev=None, torch_free_too=False): if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total + elif hasattr(dev, 'type') and (dev.type == 'xpu'): + mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) + mem_free_torch = mem_free_total else: stats = torch.cuda.memory_stats(dev) mem_active = stats['active_bytes.all.current'] @@ -258,8 +274,12 @@ def mps_mode(): global vram_state return vram_state == MPS +def xpu_mode(): + global vram_state + return vram_state == XPU + def should_use_fp16(): - if cpu_mode() or mps_mode(): + if cpu_mode() or mps_mode() or xpu_mode(): return False #TODO ? if torch.cuda.is_bf16_supported(): From 1ced2bdd2da9a13caf72d7bff36d7f645f443fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Wed, 5 Apr 2023 21:25:37 +0800 Subject: [PATCH 02/24] Specify safetensors version to avoid upstream errors https://github.com/huggingface/safetensors/issues/142 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3b4040a29..0527b31df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ torchsde einops open-clip-torch transformers>=4.25.1 -safetensors +safetensors>=0.3.0 pytorch_lightning aiohttp accelerate From dd29966f8a2973529ea50de2ef3d0e7c72b5114e Mon Sep 17 00:00:00 2001 From: EllangoK Date: Wed, 5 Apr 2023 20:32:59 -0400 Subject: [PATCH 03/24] changes main.py to use argparse --- main.py | 118 ++++++++++++++++++++++---------------------------------- 1 file changed, 47 insertions(+), 71 deletions(-) diff --git a/main.py b/main.py index a3549b86f..20c8a49e8 100644 --- a/main.py +++ b/main.py @@ -1,57 +1,54 @@ -import os -import sys -import shutil - -import threading +import argparse import asyncio +import os +import shutil +import sys +import threading if os.name == "nt": import logging logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) if __name__ == "__main__": - if '--help' in sys.argv: - print() - print("Valid Command line Arguments:") - print("\t--listen [ip]\t\t\tListen on ip or 0.0.0.0 if none given so the UI can be accessed from other computers.") - print("\t--port 8188\t\t\tSet the listen port.") - print() - print("\t--extra-model-paths-config file.yaml\tload an extra_model_paths.yaml file.") - print("\t--output-directory path/to/output\tSet the ComfyUI output directory.") - print() - print() - print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n") - print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.") - print("\t--use-pytorch-cross-attention\tUse the new pytorch 2.0 cross attention function.") - print("\t--disable-xformers\t\tdisables xformers") - print("\t--cuda-device 1\t\tSet the id of the cuda device this instance will use.") - print() - print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n") - print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.") - print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.") - print("\t--novram\t\t\tWhen lowvram isn't enough.") - print() - print("\t--cpu\t\t\tTo use the CPU for everything (slow).") - exit() + parser = argparse.ArgumentParser(description="Script Arguments") - if '--dont-upcast-attention' in sys.argv: + parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 0.0.0.0 if none given so the UI can be accessed from other computers.") + parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") + parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") + parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") + parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") + parser.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") + parser.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") + parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") + parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") + parser.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.") + parser.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.") + parser.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.") + parser.add_argument("--novram", action="store_true", help="When lowvram isn't enough.") + parser.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") + parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") + parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") + parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.") + + args = parser.parse_args() + + if args.dont_upcast_attention: print("disabling upcasting of attention") os.environ['ATTN_PRECISION'] = "fp16" - try: - index = sys.argv.index('--cuda-device') - device = sys.argv[index + 1] - os.environ['CUDA_VISIBLE_DEVICES'] = device - print("Set cuda device to:", device) - except: - pass + if args.cuda_device is not None: + os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device) + print("Set cuda device to:", args.cuda_device) + -from nodes import init_custom_nodes -import execution -import server -import folder_paths import yaml +import execution +import folder_paths +import server +from nodes import init_custom_nodes + + def prompt_worker(q, server): e = execution.PromptExecutor(server) while True: @@ -110,51 +107,30 @@ if __name__ == "__main__": hijack_progress(server) threading.Thread(target=prompt_worker, daemon=True, args=(q,server,)).start() - try: - address = '0.0.0.0' - p_index = sys.argv.index('--listen') - try: - ip = sys.argv[p_index + 1] - if ip[:2] != '--': - address = ip - except: - pass - except: - address = '127.0.0.1' - dont_print = False - if '--dont-print-server' in sys.argv: - dont_print = True + address = args.listen + + dont_print = args.dont_print_server extra_model_paths_config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "extra_model_paths.yaml") if os.path.isfile(extra_model_paths_config_path): load_extra_path_config(extra_model_paths_config_path) - if '--extra-model-paths-config' in sys.argv: - indices = [(i + 1) for i in range(len(sys.argv) - 1) if sys.argv[i] == '--extra-model-paths-config'] - for i in indices: - load_extra_path_config(sys.argv[i]) + if args.extra_model_paths_config: + load_extra_path_config(args.extra_model_paths_config) - try: - output_dir = sys.argv[sys.argv.index('--output-directory') + 1] - output_dir = os.path.abspath(output_dir) + if args.output_directory: + output_dir = os.path.abspath(args.output_directory) print("setting output directory to:", output_dir) folder_paths.set_output_directory(output_dir) - except: - pass - port = 8188 - try: - p_index = sys.argv.index('--port') - port = int(sys.argv[p_index + 1]) - except: - pass + port = args.port - if '--quick-test-for-ci' in sys.argv: + if args.quick_test_for_ci: exit(0) call_on_start = None - if "--windows-standalone-build" in sys.argv: + if args.windows_standalone_build: def startup_server(address, port): import webbrowser webbrowser.open("http://{}:{}".format(address, port)) From e5e587b1c0c5dc728d65b3e84592445cdb5e6e9b Mon Sep 17 00:00:00 2001 From: EllangoK Date: Wed, 5 Apr 2023 23:41:23 -0400 Subject: [PATCH 04/24] seperates out arg parser and imports args --- comfy/cli_args.py | 29 +++++++++ comfy/ldm/modules/attention.py | 5 +- comfy/model_management.py | 111 ++++++++++++++++----------------- main.py | 27 +------- 4 files changed, 88 insertions(+), 84 deletions(-) create mode 100644 comfy/cli_args.py diff --git a/comfy/cli_args.py b/comfy/cli_args.py new file mode 100644 index 000000000..6a56e315c --- /dev/null +++ b/comfy/cli_args.py @@ -0,0 +1,29 @@ +import argparse + +parser = argparse.ArgumentParser() + +parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 127.0.0.1 if none given so the UI can be accessed from other computers.") +parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") +parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") +parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") +parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") + +attn_group = parser.add_mutually_exclusive_group() +attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") +attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") + +parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") +parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") + +vram_group = parser.add_mutually_exclusive_group() +vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.") +vram_group.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.") +vram_group.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.") +vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.") +vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") + +parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") +parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") +parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.") + +args = parser.parse_args() diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 07553627c..92b3eca7c 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -21,6 +21,8 @@ if model_management.xformers_enabled(): import os _ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32") +from cli_args import args + def exists(val): return val is not None @@ -474,7 +476,6 @@ class CrossAttentionPytorch(nn.Module): return self.to_out(out) -import sys if model_management.xformers_enabled(): print("Using xformers cross attention") CrossAttention = MemoryEfficientCrossAttention @@ -482,7 +483,7 @@ elif model_management.pytorch_attention_enabled(): print("Using pytorch cross attention") CrossAttention = CrossAttentionPytorch else: - if "--use-split-cross-attention" in sys.argv: + if args.use_split_cross_attention: print("Using split optimization for cross attention") CrossAttention = CrossAttentionDoggettx else: diff --git a/comfy/model_management.py b/comfy/model_management.py index 052dfb775..7dda073dc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -1,36 +1,35 @@ +import psutil +from enum import Enum +from cli_args import args -CPU = 0 -NO_VRAM = 1 -LOW_VRAM = 2 -NORMAL_VRAM = 3 -HIGH_VRAM = 4 -MPS = 5 +class VRAMState(Enum): + CPU = 0 + NO_VRAM = 1 + LOW_VRAM = 2 + NORMAL_VRAM = 3 + HIGH_VRAM = 4 + MPS = 5 -accelerate_enabled = False -vram_state = NORMAL_VRAM +# Determine VRAM State +vram_state = VRAMState.NORMAL_VRAM +set_vram_to = VRAMState.NORMAL_VRAM total_vram = 0 total_vram_available_mb = -1 -import sys -import psutil - -forced_cpu = "--cpu" in sys.argv - -set_vram_to = NORMAL_VRAM +accelerate_enabled = False try: import torch total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) - forced_normal_vram = "--normalvram" in sys.argv - if not forced_normal_vram and not forced_cpu: + if not args.normalvram and not args.cpu: if total_vram <= 4096: print("Trying to enable lowvram mode because your GPU seems to have 4GB or less. If you don't want this use: --normalvram") - set_vram_to = LOW_VRAM + set_vram_to = VRAMState.LOW_VRAM elif total_vram > total_ram * 1.1 and total_vram > 14336: print("Enabling highvram mode because your GPU has more vram than your computer has ram. If you don't want this use: --normalvram") - vram_state = HIGH_VRAM + vram_state = VRAMState.HIGH_VRAM except: pass @@ -39,34 +38,32 @@ try: except: OOM_EXCEPTION = Exception -if "--disable-xformers" in sys.argv: - XFORMERS_IS_AVAILBLE = False +if args.disable_xformers: + XFORMERS_IS_AVAILABLE = False else: try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILABLE = True except: - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False -ENABLE_PYTORCH_ATTENTION = False -if "--use-pytorch-cross-attention" in sys.argv: +ENABLE_PYTORCH_ATTENTION = args.use_pytorch_cross_attention +if ENABLE_PYTORCH_ATTENTION: torch.backends.cuda.enable_math_sdp(True) torch.backends.cuda.enable_flash_sdp(True) torch.backends.cuda.enable_mem_efficient_sdp(True) - ENABLE_PYTORCH_ATTENTION = True - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False + +if args.lowvram: + set_vram_to = VRAMState.LOW_VRAM +elif args.novram: + set_vram_to = VRAMState.NO_VRAM +elif args.highvram: + vram_state = VRAMState.HIGH_VRAM -if "--lowvram" in sys.argv: - set_vram_to = LOW_VRAM -if "--novram" in sys.argv: - set_vram_to = NO_VRAM -if "--highvram" in sys.argv: - vram_state = HIGH_VRAM - - -if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM: +if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM): try: import accelerate accelerate_enabled = True @@ -81,14 +78,14 @@ if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM: try: if torch.backends.mps.is_available(): - vram_state = MPS + vram_state = VRAMState.MPS except: pass -if forced_cpu: - vram_state = CPU +if args.cpu: + vram_state = VRAMState.CPU -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state]) +print(f"Set vram state to: {vram_state.name}") current_loaded_model = None @@ -109,12 +106,12 @@ def unload_model(): model_accelerated = False #never unload models from GPU on high vram - if vram_state != HIGH_VRAM: + if vram_state != VRAMState.HIGH_VRAM: current_loaded_model.model.cpu() current_loaded_model.unpatch_model() current_loaded_model = None - if vram_state != HIGH_VRAM: + if vram_state != VRAMState.HIGH_VRAM: if len(current_gpu_controlnets) > 0: for n in current_gpu_controlnets: n.cpu() @@ -135,19 +132,19 @@ def load_model_gpu(model): model.unpatch_model() raise e current_loaded_model = model - if vram_state == CPU: + if vram_state == VRAMState.CPU: pass - elif vram_state == MPS: + elif vram_state == VRAMState.MPS: mps_device = torch.device("mps") real_model.to(mps_device) pass - elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: + elif vram_state == VRAMState.NORMAL_VRAM or vram_state == VRAMState.HIGH_VRAM: model_accelerated = False real_model.cuda() else: - if vram_state == NO_VRAM: + if vram_state == VRAMState.NO_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"}) - elif vram_state == LOW_VRAM: + elif vram_state == VRAMState.LOW_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"}) accelerate.dispatch_model(real_model, device_map=device_map, main_device="cuda") @@ -157,10 +154,10 @@ def load_model_gpu(model): def load_controlnet_gpu(models): global current_gpu_controlnets global vram_state - if vram_state == CPU: + if vram_state == VRAMState.CPU: return - if vram_state == LOW_VRAM or vram_state == NO_VRAM: + if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM: #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after return @@ -176,20 +173,20 @@ def load_controlnet_gpu(models): def load_if_low_vram(model): global vram_state - if vram_state == LOW_VRAM or vram_state == NO_VRAM: + if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM: return model.cuda() return model def unload_if_low_vram(model): global vram_state - if vram_state == LOW_VRAM or vram_state == NO_VRAM: + if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM: return model.cpu() return model def get_torch_device(): - if vram_state == MPS: + if vram_state == VRAMState.MPS: return torch.device("mps") - if vram_state == CPU: + if vram_state == VRAMState.CPU: return torch.device("cpu") else: return torch.cuda.current_device() @@ -201,9 +198,9 @@ def get_autocast_device(dev): def xformers_enabled(): - if vram_state == CPU: + if vram_state == VRAMState.CPU: return False - return XFORMERS_IS_AVAILBLE + return XFORMERS_IS_AVAILABLE def xformers_enabled_vae(): @@ -243,7 +240,7 @@ def get_free_memory(dev=None, torch_free_too=False): def maximum_batch_area(): global vram_state - if vram_state == NO_VRAM: + if vram_state == VRAMState.NO_VRAM: return 0 memory_free = get_free_memory() / (1024 * 1024) @@ -252,11 +249,11 @@ def maximum_batch_area(): def cpu_mode(): global vram_state - return vram_state == CPU + return vram_state == VRAMState.CPU def mps_mode(): global vram_state - return vram_state == MPS + return vram_state == VRAMState.MPS def should_use_fp16(): if cpu_mode() or mps_mode(): diff --git a/main.py b/main.py index 20c8a49e8..51a48fc6d 100644 --- a/main.py +++ b/main.py @@ -1,37 +1,14 @@ -import argparse import asyncio import os import shutil -import sys import threading +from comfy.cli_args import args if os.name == "nt": import logging logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Script Arguments") - - parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 0.0.0.0 if none given so the UI can be accessed from other computers.") - parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") - parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") - parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") - parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") - parser.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") - parser.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") - parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") - parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") - parser.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.") - parser.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.") - parser.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.") - parser.add_argument("--novram", action="store_true", help="When lowvram isn't enough.") - parser.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") - parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") - parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.") - parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.") - - args = parser.parse_args() - if args.dont_upcast_attention: print("disabling upcasting of attention") os.environ['ATTN_PRECISION'] = "fp16" @@ -121,7 +98,7 @@ if __name__ == "__main__": if args.output_directory: output_dir = os.path.abspath(args.output_directory) - print("setting output directory to:", output_dir) + print(f"Setting output directory to: {output_dir}") folder_paths.set_output_directory(output_dir) port = args.port From 84b9c0ac2ff49b5b18b8e7804f8fe42a379a0787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 12:27:22 +0800 Subject: [PATCH 05/24] Import intel_extension_for_pytorch as ipex --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index f0b8be55e..379cc18d7 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -87,7 +87,7 @@ except: pass try: - import intel_extension_for_pytorch + import intel_extension_for_pytorch as ipex if torch.xpu.is_available(): vram_state = XPU except: From 7cb924f68469cd2481b2313f8e5fc02587279bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 14:24:47 +0800 Subject: [PATCH 06/24] Use separate variables instead of `vram_state` --- comfy/model_management.py | 70 +++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 379cc18d7..a84167746 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -5,9 +5,9 @@ LOW_VRAM = 2 NORMAL_VRAM = 3 HIGH_VRAM = 4 MPS = 5 -XPU = 6 accelerate_enabled = False +xpu_available = False vram_state = NORMAL_VRAM total_vram = 0 @@ -22,7 +22,12 @@ set_vram_to = NORMAL_VRAM try: import torch - total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True + total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) + else: + total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) forced_normal_vram = "--normalvram" in sys.argv if not forced_normal_vram and not forced_cpu: @@ -86,17 +91,10 @@ try: except: pass -try: - import intel_extension_for_pytorch as ipex - if torch.xpu.is_available(): - vram_state = XPU -except: - pass - if forced_cpu: vram_state = CPU -print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state]) +print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state]) current_loaded_model = None @@ -133,6 +131,7 @@ def load_model_gpu(model): global current_loaded_model global vram_state global model_accelerated + global xpu_available if model is current_loaded_model: return @@ -149,19 +148,19 @@ def load_model_gpu(model): mps_device = torch.device("mps") real_model.to(mps_device) pass - elif vram_state == XPU: - real_model.to("xpu") - pass elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM: model_accelerated = False - real_model.cuda() + if xpu_available: + real_model.to("xpu") + else: + real_model.cuda() else: if vram_state == NO_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"}) elif vram_state == LOW_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"}) - accelerate.dispatch_model(real_model, device_map=device_map, main_device="cuda") + accelerate.dispatch_model(real_model, device_map=device_map, main_device="xpu" if xpu_available else "cuda") model_accelerated = True return current_loaded_model @@ -187,8 +186,12 @@ def load_controlnet_gpu(models): def load_if_low_vram(model): global vram_state + global xpu_available if vram_state == LOW_VRAM or vram_state == NO_VRAM: - return model.cuda() + if xpu_available: + return model.to("xpu") + else: + return model.cuda() return model def unload_if_low_vram(model): @@ -198,14 +201,16 @@ def unload_if_low_vram(model): return model def get_torch_device(): + global xpu_available if vram_state == MPS: return torch.device("mps") - if vram_state == XPU: - return torch.device("xpu") if vram_state == CPU: return torch.device("cpu") else: - return torch.cuda.current_device() + if xpu_available: + return torch.device("xpu") + else: + return torch.cuda.current_device() def get_autocast_device(dev): if hasattr(dev, 'type'): @@ -235,22 +240,24 @@ def pytorch_attention_enabled(): return ENABLE_PYTORCH_ATTENTION def get_free_memory(dev=None, torch_free_too=False): + global xpu_available if dev is None: dev = get_torch_device() if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'): mem_free_total = psutil.virtual_memory().available mem_free_torch = mem_free_total - elif hasattr(dev, 'type') and (dev.type == 'xpu'): - mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) - mem_free_torch = mem_free_total else: - stats = torch.cuda.memory_stats(dev) - mem_active = stats['active_bytes.all.current'] - mem_reserved = stats['reserved_bytes.all.current'] - mem_free_cuda, _ = torch.cuda.mem_get_info(dev) - mem_free_torch = mem_reserved - mem_active - mem_free_total = mem_free_cuda + mem_free_torch + if xpu_available: + mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev) + mem_free_torch = mem_free_total + else: + stats = torch.cuda.memory_stats(dev) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(dev) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch if torch_free_too: return (mem_free_total, mem_free_torch) @@ -274,12 +281,9 @@ def mps_mode(): global vram_state return vram_state == MPS -def xpu_mode(): - global vram_state - return vram_state == XPU - def should_use_fp16(): - if cpu_mode() or mps_mode() or xpu_mode(): + global xpu_available + if cpu_mode() or mps_mode() or xpu_available: return False #TODO ? if torch.cuda.is_bf16_supported(): From 60127a83040b3b243457980d04f3bb25c4491978 Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Wed, 5 Apr 2023 23:57:31 -0700 Subject: [PATCH 07/24] diffusers loader --- comfy/diffusers_convert.py | 364 +++++++++++++++++++++ models/diffusers/put_diffusers_models_here | 0 nodes.py | 19 +- 3 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 comfy/diffusers_convert.py create mode 100644 models/diffusers/put_diffusers_models_here diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py new file mode 100644 index 000000000..a31c1c11b --- /dev/null +++ b/comfy/diffusers_convert.py @@ -0,0 +1,364 @@ +import json +import os +import yaml + +# because of local import nonsense +import sys +sys.path.append(os.path.dirname(os.path.realpath(__file__))) + +import folder_paths +from comfy.ldm.util import instantiate_from_config +from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE +import os.path as osp +import re +import torch +from safetensors.torch import load_file, save_file + +# conversion code from https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py + +# =================# +# UNet Conversion # +# =================# + +unet_conversion_map = [ + # (stable-diffusion, HF Diffusers) + ("time_embed.0.weight", "time_embedding.linear_1.weight"), + ("time_embed.0.bias", "time_embedding.linear_1.bias"), + ("time_embed.2.weight", "time_embedding.linear_2.weight"), + ("time_embed.2.bias", "time_embedding.linear_2.bias"), + ("input_blocks.0.0.weight", "conv_in.weight"), + ("input_blocks.0.0.bias", "conv_in.bias"), + ("out.0.weight", "conv_norm_out.weight"), + ("out.0.bias", "conv_norm_out.bias"), + ("out.2.weight", "conv_out.weight"), + ("out.2.bias", "conv_out.bias"), +] + +unet_conversion_map_resnet = [ + # (stable-diffusion, HF Diffusers) + ("in_layers.0", "norm1"), + ("in_layers.2", "conv1"), + ("out_layers.0", "norm2"), + ("out_layers.3", "conv2"), + ("emb_layers.1", "time_emb_proj"), + ("skip_connection", "conv_shortcut"), +] + +unet_conversion_map_layer = [] +# hardcoded number of downblocks and resnets/attentions... +# would need smarter logic for other networks. +for i in range(4): + # loop over downblocks/upblocks + + for j in range(2): + # loop over resnets/attentions for downblocks + hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}." + sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0." + unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix)) + + if i < 3: + # no attention layers in down_blocks.3 + hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}." + sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1." + unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix)) + + for j in range(3): + # loop over resnets/attentions for upblocks + hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}." + sd_up_res_prefix = f"output_blocks.{3 * i + j}.0." + unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix)) + + if i > 0: + # no attention layers in up_blocks.0 + hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}." + sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1." + unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix)) + + if i < 3: + # no downsample in down_blocks.3 + hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv." + sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op." + unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix)) + + # no upsample in up_blocks.3 + hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0." + sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{1 if i == 0 else 2}." + unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix)) + +hf_mid_atn_prefix = "mid_block.attentions.0." +sd_mid_atn_prefix = "middle_block.1." +unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix)) + +for j in range(2): + hf_mid_res_prefix = f"mid_block.resnets.{j}." + sd_mid_res_prefix = f"middle_block.{2 * j}." + unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix)) + + +def convert_unet_state_dict(unet_state_dict): + # buyer beware: this is a *brittle* function, + # and correct output requires that all of these pieces interact in + # the exact order in which I have arranged them. + mapping = {k: k for k in unet_state_dict.keys()} + for sd_name, hf_name in unet_conversion_map: + mapping[hf_name] = sd_name + for k, v in mapping.items(): + if "resnets" in k: + for sd_part, hf_part in unet_conversion_map_resnet: + v = v.replace(hf_part, sd_part) + mapping[k] = v + for k, v in mapping.items(): + for sd_part, hf_part in unet_conversion_map_layer: + v = v.replace(hf_part, sd_part) + mapping[k] = v + new_state_dict = {v: unet_state_dict[k] for k, v in mapping.items()} + return new_state_dict + + +# ================# +# VAE Conversion # +# ================# + +vae_conversion_map = [ + # (stable-diffusion, HF Diffusers) + ("nin_shortcut", "conv_shortcut"), + ("norm_out", "conv_norm_out"), + ("mid.attn_1.", "mid_block.attentions.0."), +] + +for i in range(4): + # down_blocks have two resnets + for j in range(2): + hf_down_prefix = f"encoder.down_blocks.{i}.resnets.{j}." + sd_down_prefix = f"encoder.down.{i}.block.{j}." + vae_conversion_map.append((sd_down_prefix, hf_down_prefix)) + + if i < 3: + hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0." + sd_downsample_prefix = f"down.{i}.downsample." + vae_conversion_map.append((sd_downsample_prefix, hf_downsample_prefix)) + + hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0." + sd_upsample_prefix = f"up.{3 - i}.upsample." + vae_conversion_map.append((sd_upsample_prefix, hf_upsample_prefix)) + + # up_blocks have three resnets + # also, up blocks in hf are numbered in reverse from sd + for j in range(3): + hf_up_prefix = f"decoder.up_blocks.{i}.resnets.{j}." + sd_up_prefix = f"decoder.up.{3 - i}.block.{j}." + vae_conversion_map.append((sd_up_prefix, hf_up_prefix)) + +# this part accounts for mid blocks in both the encoder and the decoder +for i in range(2): + hf_mid_res_prefix = f"mid_block.resnets.{i}." + sd_mid_res_prefix = f"mid.block_{i + 1}." + vae_conversion_map.append((sd_mid_res_prefix, hf_mid_res_prefix)) + +vae_conversion_map_attn = [ + # (stable-diffusion, HF Diffusers) + ("norm.", "group_norm."), + ("q.", "query."), + ("k.", "key."), + ("v.", "value."), + ("proj_out.", "proj_attn."), +] + + +def reshape_weight_for_sd(w): + # convert HF linear weights to SD conv2d weights + return w.reshape(*w.shape, 1, 1) + + +def convert_vae_state_dict(vae_state_dict): + mapping = {k: k for k in vae_state_dict.keys()} + for k, v in mapping.items(): + for sd_part, hf_part in vae_conversion_map: + v = v.replace(hf_part, sd_part) + mapping[k] = v + for k, v in mapping.items(): + if "attentions" in k: + for sd_part, hf_part in vae_conversion_map_attn: + v = v.replace(hf_part, sd_part) + mapping[k] = v + new_state_dict = {v: vae_state_dict[k] for k, v in mapping.items()} + weights_to_convert = ["q", "k", "v", "proj_out"] + for k, v in new_state_dict.items(): + for weight_name in weights_to_convert: + if f"mid.attn_1.{weight_name}.weight" in k: + print(f"Reshaping {k} for SD format") + new_state_dict[k] = reshape_weight_for_sd(v) + return new_state_dict + + +# =========================# +# Text Encoder Conversion # +# =========================# + + +textenc_conversion_lst = [ + # (stable-diffusion, HF Diffusers) + ("resblocks.", "text_model.encoder.layers."), + ("ln_1", "layer_norm1"), + ("ln_2", "layer_norm2"), + (".c_fc.", ".fc1."), + (".c_proj.", ".fc2."), + (".attn", ".self_attn"), + ("ln_final.", "transformer.text_model.final_layer_norm."), + ("token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"), + ("positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"), +] +protected = {re.escape(x[1]): x[0] for x in textenc_conversion_lst} +textenc_pattern = re.compile("|".join(protected.keys())) + +# Ordering is from https://github.com/pytorch/pytorch/blob/master/test/cpp/api/modules.cpp +code2idx = {"q": 0, "k": 1, "v": 2} + + +def convert_text_enc_state_dict_v20(text_enc_dict): + new_state_dict = {} + capture_qkv_weight = {} + capture_qkv_bias = {} + for k, v in text_enc_dict.items(): + if ( + k.endswith(".self_attn.q_proj.weight") + or k.endswith(".self_attn.k_proj.weight") + or k.endswith(".self_attn.v_proj.weight") + ): + k_pre = k[: -len(".q_proj.weight")] + k_code = k[-len("q_proj.weight")] + if k_pre not in capture_qkv_weight: + capture_qkv_weight[k_pre] = [None, None, None] + capture_qkv_weight[k_pre][code2idx[k_code]] = v + continue + + if ( + k.endswith(".self_attn.q_proj.bias") + or k.endswith(".self_attn.k_proj.bias") + or k.endswith(".self_attn.v_proj.bias") + ): + k_pre = k[: -len(".q_proj.bias")] + k_code = k[-len("q_proj.bias")] + if k_pre not in capture_qkv_bias: + capture_qkv_bias[k_pre] = [None, None, None] + capture_qkv_bias[k_pre][code2idx[k_code]] = v + continue + + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k) + new_state_dict[relabelled_key] = v + + for k_pre, tensors in capture_qkv_weight.items(): + if None in tensors: + raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing") + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre) + new_state_dict[relabelled_key + ".in_proj_weight"] = torch.cat(tensors) + + for k_pre, tensors in capture_qkv_bias.items(): + if None in tensors: + raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing") + relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre) + new_state_dict[relabelled_key + ".in_proj_bias"] = torch.cat(tensors) + + return new_state_dict + + +def convert_text_enc_state_dict(text_enc_dict): + return text_enc_dict + + +def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, embedding_directory=None): + diffusers_unet_conf = json.load(open(osp.join(model_path, "unet/config.json"))) + diffusers_scheduler_conf = json.load(open(osp.join(model_path, "scheduler/scheduler_config.json"))) + + # magic + v2 = diffusers_unet_conf["sample_size"] == 96 + v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction' + + if v2: + if v_pred: + config_path = folder_paths.get_full_path("configs", 'v2-inference-v.yaml') + else: + config_path = folder_paths.get_full_path("configs", 'v2-inference.yaml') + else: + config_path = folder_paths.get_full_path("configs", 'v1-inference.yaml') + + with open(config_path, 'r') as stream: + config = yaml.safe_load(stream) + + model_config_params = config['model']['params'] + clip_config = model_config_params['cond_stage_config'] + scale_factor = model_config_params['scale_factor'] + vae_config = model_config_params['first_stage_config'] + vae_config['scale_factor'] = scale_factor + + unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors") + vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors") + text_enc_path = osp.join(model_path, "text_encoder", "model.safetensors") + + # Load models from safetensors if it exists, if it doesn't pytorch + if osp.exists(unet_path): + unet_state_dict = load_file(unet_path, device="cpu") + else: + unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.bin") + unet_state_dict = torch.load(unet_path, map_location="cpu") + + if osp.exists(vae_path): + vae_state_dict = load_file(vae_path, device="cpu") + else: + vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.bin") + vae_state_dict = torch.load(vae_path, map_location="cpu") + + if osp.exists(text_enc_path): + text_enc_dict = load_file(text_enc_path, device="cpu") + else: + text_enc_path = osp.join(model_path, "text_encoder", "pytorch_model.bin") + text_enc_dict = torch.load(text_enc_path, map_location="cpu") + + # Convert the UNet model + unet_state_dict = convert_unet_state_dict(unet_state_dict) + unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()} + + # Convert the VAE model + vae_state_dict = convert_vae_state_dict(vae_state_dict) + vae_state_dict = {"first_stage_model." + k: v for k, v in vae_state_dict.items()} + + # Easiest way to identify v2.0 model seems to be that the text encoder (OpenCLIP) is deeper + is_v20_model = "text_model.encoder.layers.22.layer_norm2.bias" in text_enc_dict + + if is_v20_model: + # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm + text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()} + text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict) + text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()} + else: + text_enc_dict = convert_text_enc_state_dict(text_enc_dict) + text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()} + + # Put together new checkpoint + sd = {**unet_state_dict, **vae_state_dict, **text_enc_dict} + + clip = None + vae = None + + class WeightsLoader(torch.nn.Module): + pass + + w = WeightsLoader() + load_state_dict_to = [] + if output_vae: + vae = VAE(scale_factor=scale_factor, config=vae_config) + w.first_stage_model = vae.first_stage_model + load_state_dict_to = [w] + + if output_clip: + clip = CLIP(config=clip_config, embedding_directory=embedding_directory) + w.cond_stage_model = clip.cond_stage_model + load_state_dict_to = [w] + + model = instantiate_from_config(config["model"]) + model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to) + + if fp16: + model = model.half() + + return ModelPatcher(model), clip, vae diff --git a/models/diffusers/put_diffusers_models_here b/models/diffusers/put_diffusers_models_here new file mode 100644 index 000000000..e69de29bb diff --git a/nodes.py b/nodes.py index 187d54a11..776bc3819 100644 --- a/nodes.py +++ b/nodes.py @@ -4,13 +4,14 @@ import os import sys import json import hashlib -import copy import traceback from PIL import Image from PIL.PngImagePlugin import PngInfo import numpy as np +from comfy.diffusers_convert import load_diffusers + sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) @@ -219,6 +220,21 @@ class CheckpointLoaderSimple: out = comfy.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings")) return out +class DiffusersLoader: + @classmethod + def INPUT_TYPES(cls): + return {"required": {"model_path": (os.listdir(os.path.join(folder_paths.models_dir, 'diffusers'), ),), + }} + RETURN_TYPES = ("MODEL", "CLIP", "VAE") + FUNCTION = "load_checkpoint" + + CATEGORY = "loaders" + + def load_checkpoint(self, model_path, output_vae=True, output_clip=True): + model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path) + return load_diffusers(model_path, fp16=True, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) + + class unCLIPCheckpointLoader: @classmethod def INPUT_TYPES(s): @@ -1076,6 +1092,7 @@ NODE_CLASS_MAPPINGS = { "TomePatchModel": TomePatchModel, "unCLIPCheckpointLoader": unCLIPCheckpointLoader, "CheckpointLoader": CheckpointLoader, + "DiffusersLoader": DiffusersLoader, } def load_custom_node(module_path): From c418d988ba59b3114770a0fa111d301f04880fca Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Wed, 5 Apr 2023 23:59:03 -0700 Subject: [PATCH 08/24] update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f7d24c45..90931141d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin - Many optimizations: Only re-executes the parts of the workflow that changes between executions. - Command line option: ```--lowvram``` to make it work on GPUs with less than 3GB vram (enabled automatically on GPUs with low vram) - Works even if you don't have a GPU with: ```--cpu``` (slow) -- Can load both ckpt and safetensors models/checkpoints. Standalone VAEs and CLIP models. +- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models. - Embeddings/Textual inversion - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/) - Loading full workflows (with seeds) from generated PNG files. From 3d16077e3806b0817b1d43dc14f61e5dee5495c8 Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Thu, 6 Apr 2023 00:24:52 -0700 Subject: [PATCH 09/24] empty list if diffusers directory doesn't exist --- nodes.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nodes.py b/nodes.py index 776bc3819..1af62887d 100644 --- a/nodes.py +++ b/nodes.py @@ -223,8 +223,11 @@ class CheckpointLoaderSimple: class DiffusersLoader: @classmethod def INPUT_TYPES(cls): - return {"required": {"model_path": (os.listdir(os.path.join(folder_paths.models_dir, 'diffusers'), ),), - }} + paths = [] + search_path = os.path.join(folder_paths.models_dir, 'diffusers') + if os.path.exists(search_path): + paths = next(os.walk(search_path))[1] + return {"required": {"model_path": (paths,), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" From 42fd67b5cb0de9bd1228af7a93dec08b2f1486c3 Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Thu, 6 Apr 2023 00:28:06 -0700 Subject: [PATCH 10/24] use precision determined by model management --- nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodes.py b/nodes.py index 1af62887d..8271da04c 100644 --- a/nodes.py +++ b/nodes.py @@ -235,7 +235,7 @@ class DiffusersLoader: def load_checkpoint(self, model_path, output_vae=True, output_clip=True): model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path) - return load_diffusers(model_path, fp16=True, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) + return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) class unCLIPCheckpointLoader: From 3e2608e12b312fd5d2396d4146d992cd4f8b9ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com> Date: Thu, 6 Apr 2023 15:44:05 +0800 Subject: [PATCH 11/24] Fix auto lowvram detection on CUDA --- comfy/model_management.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index a84167746..b0123b5fc 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -22,11 +22,12 @@ set_vram_to = NORMAL_VRAM try: import torch - import intel_extension_for_pytorch as ipex - if torch.xpu.is_available(): - xpu_available = True - total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) - else: + try: + import intel_extension_for_pytorch as ipex + if torch.xpu.is_available(): + xpu_available = True + total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024) + except: total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) forced_normal_vram = "--normalvram" in sys.argv From 01c1fc669fb8cd41f627dad871257acbaaf24b47 Mon Sep 17 00:00:00 2001 From: EllangoK Date: Thu, 6 Apr 2023 13:19:00 -0400 Subject: [PATCH 12/24] set listen flag to listen on all if specifed --- comfy/cli_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 6a56e315c..a27dc7a7f 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -2,7 +2,7 @@ import argparse parser = argparse.ArgumentParser() -parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 127.0.0.1 if none given so the UI can be accessed from other computers.") +parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") From 7d62d89f9325348179fc9b0db146ff50fa7c808c Mon Sep 17 00:00:00 2001 From: EllangoK Date: Wed, 5 Apr 2023 13:08:08 -0400 Subject: [PATCH 13/24] add cors middleware --- server.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 840d9a4e7..005bf9b2c 100644 --- a/server.py +++ b/server.py @@ -27,6 +27,19 @@ async def cache_control(request: web.Request, handler): response.headers.setdefault('Cache-Control', 'no-cache') return response +@web.middleware +async def cors_middleware(request: web.Request, handler): + if request.method == "OPTIONS": + # Pre-flight request. Reply successfully: + response = web.Response() + else: + response = await handler(request) + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + response.headers['Access-Control-Allow-Credentials'] = 'true' + return response + class PromptServer(): def __init__(self, loop): PromptServer.instance = self @@ -37,7 +50,7 @@ class PromptServer(): self.loop = loop self.messages = asyncio.Queue() self.number = 0 - self.app = web.Application(client_max_size=20971520, middlewares=[cache_control]) + self.app = web.Application(client_max_size=20971520, middlewares=[cache_control, cors_middleware]) self.sockets = dict() self.web_root = os.path.join(os.path.dirname( os.path.realpath(__file__)), "web") From 48efae16084b423166f9a1930b989489169d22cf Mon Sep 17 00:00:00 2001 From: EllangoK Date: Thu, 6 Apr 2023 15:06:22 -0400 Subject: [PATCH 14/24] makes cors a cli parameter --- comfy/cli_args.py | 3 ++- server.py | 36 +++++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index a27dc7a7f..5133e0ae5 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -4,8 +4,10 @@ parser = argparse.ArgumentParser() parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") +parser.add_argument("--cors", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") +parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") attn_group = parser.add_mutually_exclusive_group() @@ -13,7 +15,6 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") -parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") vram_group = parser.add_mutually_exclusive_group() vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.") diff --git a/server.py b/server.py index 005bf9b2c..a9c0b4599 100644 --- a/server.py +++ b/server.py @@ -18,6 +18,7 @@ except ImportError: sys.exit() import mimetypes +from comfy.cli_args import args @web.middleware @@ -27,18 +28,22 @@ async def cache_control(request: web.Request, handler): response.headers.setdefault('Cache-Control', 'no-cache') return response -@web.middleware -async def cors_middleware(request: web.Request, handler): - if request.method == "OPTIONS": - # Pre-flight request. Reply successfully: - response = web.Response() - else: - response = await handler(request) - response.headers['Access-Control-Allow-Origin'] = '*' - response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS' - response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' - response.headers['Access-Control-Allow-Credentials'] = 'true' - return response +def create_cors_middleware(allowed_origin: str): + @web.middleware + async def cors_middleware(request: web.Request, handler): + if request.method == "OPTIONS": + # Pre-flight request. Reply successfully: + response = web.Response() + else: + response = await handler(request) + + response.headers['Access-Control-Allow-Origin'] = allowed_origin + response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization' + response.headers['Access-Control-Allow-Credentials'] = 'true' + return response + + return cors_middleware class PromptServer(): def __init__(self, loop): @@ -50,7 +55,12 @@ class PromptServer(): self.loop = loop self.messages = asyncio.Queue() self.number = 0 - self.app = web.Application(client_max_size=20971520, middlewares=[cache_control, cors_middleware]) + + middlewares = [cache_control] + if args.cors: + middlewares.append(create_cors_middleware(args.cors)) + + self.app = web.Application(client_max_size=20971520, middlewares=middlewares) self.sockets = dict() self.web_root = os.path.join(os.path.dirname( os.path.realpath(__file__)), "web") From f84f2508cc45a014cc27e023e9623db0450d237e Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Apr 2023 15:24:55 -0400 Subject: [PATCH 15/24] Rename the cors parameter to something more verbose. --- comfy/cli_args.py | 2 +- server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 5133e0ae5..f2960ae31 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -4,7 +4,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") -parser.add_argument("--cors", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") +parser.add_argument("--enable-cors-header", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") diff --git a/server.py b/server.py index a9c0b4599..95cdeb051 100644 --- a/server.py +++ b/server.py @@ -57,8 +57,8 @@ class PromptServer(): self.number = 0 middlewares = [cache_control] - if args.cors: - middlewares.append(create_cors_middleware(args.cors)) + if args.enable_cors_header: + middlewares.append(create_cors_middleware(args.enable_cors_header)) self.app = web.Application(client_max_size=20971520, middlewares=middlewares) self.sockets = dict() From 28fff5d1dbba8b4a546e31c69240133f35b2235f Mon Sep 17 00:00:00 2001 From: EllangoK Date: Thu, 6 Apr 2023 19:06:39 -0400 Subject: [PATCH 16/24] fixes lack of support for multi configs also adds some metavars to argarse --- comfy/cli_args.py | 8 ++++---- main.py | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index f2960ae31..b6898cea9 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -2,12 +2,12 @@ import argparse parser = argparse.ArgumentParser() -parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") +parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") -parser.add_argument("--enable-cors-header", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") -parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.") +parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") +parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") -parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.") +parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") attn_group = parser.add_mutually_exclusive_group() diff --git a/main.py b/main.py index 51a48fc6d..9c0a3d8a1 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,9 @@ import asyncio +import itertools import os import shutil import threading + from comfy.cli_args import args if os.name == "nt": @@ -94,7 +96,8 @@ if __name__ == "__main__": load_extra_path_config(extra_model_paths_config_path) if args.extra_model_paths_config: - load_extra_path_config(args.extra_model_paths_config) + for config_path in itertools.chain(*args.extra_model_paths_config): + load_extra_path_config(config_path) if args.output_directory: output_dir = os.path.abspath(args.output_directory) From 60b4c31ab3c2ec16575c26d9d08ecabc8643b381 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Apr 2023 22:22:59 -0400 Subject: [PATCH 17/24] Add webp images to upload accept list. --- web/scripts/widgets.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js index 5f5043cd0..d1a9c6c6e 100644 --- a/web/scripts/widgets.js +++ b/web/scripts/widgets.js @@ -306,7 +306,7 @@ export const ComfyWidgets = { const fileInput = document.createElement("input"); Object.assign(fileInput, { type: "file", - accept: "image/jpeg,image/png", + accept: "image/jpeg,image/png,image/webp", style: "display: none", onchange: async () => { if (fileInput.files.length) { From bceccca0e59862c3410b5d99b47fe1e01ba914af Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Apr 2023 23:52:34 -0400 Subject: [PATCH 18/24] Small refactor. --- comfy/model_management.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 92c59efe7..504da2190 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -129,7 +129,6 @@ def load_model_gpu(model): global current_loaded_model global vram_state global model_accelerated - global xpu_available if model is current_loaded_model: return @@ -148,17 +147,14 @@ def load_model_gpu(model): pass elif vram_state == VRAMState.NORMAL_VRAM or vram_state == VRAMState.HIGH_VRAM: model_accelerated = False - if xpu_available: - real_model.to("xpu") - else: - real_model.cuda() + real_model.to(get_torch_device()) else: if vram_state == VRAMState.NO_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"}) elif vram_state == VRAMState.LOW_VRAM: device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"}) - accelerate.dispatch_model(real_model, device_map=device_map, main_device="xpu" if xpu_available else "cuda") + accelerate.dispatch_model(real_model, device_map=device_map, main_device=get_torch_device()) model_accelerated = True return current_loaded_model @@ -184,12 +180,8 @@ def load_controlnet_gpu(models): def load_if_low_vram(model): global vram_state - global xpu_available if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM: - if xpu_available: - return model.to("xpu") - else: - return model.cuda() + return model.to(get_torch_device()) return model def unload_if_low_vram(model): From 64557d67810c81f72bd6a7544bd8930488868319 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Apr 2023 00:27:54 -0400 Subject: [PATCH 19/24] Add a --force-fp32 argument to force fp32 for debugging. --- comfy/cli_args.py | 1 + comfy/model_management.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index b6898cea9..739891f71 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -9,6 +9,7 @@ parser.add_argument("--extra-model-paths-config", type=str, default=None, metava parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.") +parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).") attn_group = parser.add_mutually_exclusive_group() attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.") diff --git a/comfy/model_management.py b/comfy/model_management.py index 504da2190..2407140fd 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -69,6 +69,11 @@ elif args.novram: elif args.highvram: vram_state = VRAMState.HIGH_VRAM +FORCE_FP32 = False +if args.force_fp32: + print("Forcing FP32, if this improves things please report it.") + FORCE_FP32 = True + if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM): try: @@ -273,6 +278,9 @@ def mps_mode(): def should_use_fp16(): global xpu_available + if FORCE_FP32: + return False + if cpu_mode() or mps_mode() or xpu_available: return False #TODO ? From 72a8973bd56b7cc179eb603ccd61385fdca5766d Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Thu, 6 Apr 2023 21:45:08 -0700 Subject: [PATCH 20/24] allow configurable path for diffusers models --- folder_paths.py | 1 + nodes.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/folder_paths.py b/folder_paths.py index f13e4895f..ab3359347 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -23,6 +23,7 @@ folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_ folder_names_and_paths["clip_vision"] = ([os.path.join(models_dir, "clip_vision")], supported_pt_extensions) folder_names_and_paths["style_models"] = ([os.path.join(models_dir, "style_models")], supported_pt_extensions) folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")], supported_pt_extensions) +folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"]) folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions) folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions) diff --git a/nodes.py b/nodes.py index 8271da04c..934b458f2 100644 --- a/nodes.py +++ b/nodes.py @@ -224,7 +224,7 @@ class DiffusersLoader: @classmethod def INPUT_TYPES(cls): paths = [] - search_path = os.path.join(folder_paths.models_dir, 'diffusers') + search_path = folder_paths.get_folder_paths("diffusers")[0] if os.path.exists(search_path): paths = next(os.walk(search_path))[1] return {"required": {"model_path": (paths,), }} From f51b7a92c72b5fe7a12d642a545e59f1f6150fb4 Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Thu, 6 Apr 2023 21:48:58 -0700 Subject: [PATCH 21/24] search all diffusers paths (oops) --- nodes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nodes.py b/nodes.py index 934b458f2..a4366f834 100644 --- a/nodes.py +++ b/nodes.py @@ -224,9 +224,10 @@ class DiffusersLoader: @classmethod def INPUT_TYPES(cls): paths = [] - search_path = folder_paths.get_folder_paths("diffusers")[0] - if os.path.exists(search_path): - paths = next(os.walk(search_path))[1] + search_paths = folder_paths.get_folder_paths("diffusers") + for search_path in search_paths: + if os.path.exists(search_path): + paths = next(os.walk(search_path))[1] return {"required": {"model_path": (paths,), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" From 7734d65f22a8f30f73cb72e81586b2d015229060 Mon Sep 17 00:00:00 2001 From: sALTaccount Date: Thu, 6 Apr 2023 22:02:26 -0700 Subject: [PATCH 22/24] fix loading alt folders --- nodes.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/nodes.py b/nodes.py index a4366f834..274ae2f1f 100644 --- a/nodes.py +++ b/nodes.py @@ -224,10 +224,9 @@ class DiffusersLoader: @classmethod def INPUT_TYPES(cls): paths = [] - search_paths = folder_paths.get_folder_paths("diffusers") - for search_path in search_paths: + for search_path in folder_paths.get_folder_paths("diffusers"): if os.path.exists(search_path): - paths = next(os.walk(search_path))[1] + paths += next(os.walk(search_path))[1] return {"required": {"model_path": (paths,), }} RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" @@ -235,7 +234,13 @@ class DiffusersLoader: CATEGORY = "loaders" def load_checkpoint(self, model_path, output_vae=True, output_clip=True): - model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path) + for search_path in folder_paths.get_folder_paths("diffusers"): + if os.path.exists(search_path): + paths = next(os.walk(search_path))[1] + if model_path in paths: + model_path = os.path.join(search_path, model_path) + break + search_paths = folder_paths.get_folder_paths("diffusers") return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) From 58ed0f2da438aaf253f9880578d694ad917819f8 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Apr 2023 01:28:15 -0400 Subject: [PATCH 23/24] Fix loading SD1.5 diffusers checkpoint. --- comfy/diffusers_convert.py | 4 +++- nodes.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py index a31c1c11b..950137f2c 100644 --- a/comfy/diffusers_convert.py +++ b/comfy/diffusers_convert.py @@ -272,7 +272,8 @@ def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, emb # magic v2 = diffusers_unet_conf["sample_size"] == 96 - v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction' + if 'prediction_type' in diffusers_scheduler_conf: + v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction' if v2: if v_pred: @@ -290,6 +291,7 @@ def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, emb scale_factor = model_config_params['scale_factor'] vae_config = model_config_params['first_stage_config'] vae_config['scale_factor'] = scale_factor + model_config_params["unet_config"]["params"]["use_fp16"] = fp16 unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors") vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors") diff --git a/nodes.py b/nodes.py index 274ae2f1f..025e4fcb4 100644 --- a/nodes.py +++ b/nodes.py @@ -231,7 +231,7 @@ class DiffusersLoader: RETURN_TYPES = ("MODEL", "CLIP", "VAE") FUNCTION = "load_checkpoint" - CATEGORY = "loaders" + CATEGORY = "advanced/loaders" def load_checkpoint(self, model_path, output_vae=True, output_clip=True): for search_path in folder_paths.get_folder_paths("diffusers"): @@ -240,7 +240,7 @@ class DiffusersLoader: if model_path in paths: model_path = os.path.join(search_path, model_path) break - search_paths = folder_paths.get_folder_paths("diffusers") + return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) From 44fea050649347ca4b4e7317a83d11c3b4b87f87 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Apr 2023 02:29:56 -0400 Subject: [PATCH 24/24] Cleanup. --- comfy/diffusers_convert.py | 4 ---- nodes.py | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py index 950137f2c..ceca80305 100644 --- a/comfy/diffusers_convert.py +++ b/comfy/diffusers_convert.py @@ -2,10 +2,6 @@ import json import os import yaml -# because of local import nonsense -import sys -sys.path.append(os.path.dirname(os.path.realpath(__file__))) - import folder_paths from comfy.ldm.util import instantiate_from_config from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE diff --git a/nodes.py b/nodes.py index 025e4fcb4..5c3b3a4ee 100644 --- a/nodes.py +++ b/nodes.py @@ -10,11 +10,11 @@ from PIL import Image from PIL.PngImagePlugin import PngInfo import numpy as np -from comfy.diffusers_convert import load_diffusers sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy")) +import comfy.diffusers_convert import comfy.samplers import comfy.sd import comfy.utils @@ -241,7 +241,7 @@ class DiffusersLoader: model_path = os.path.join(search_path, model_path) break - return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) + return comfy.diffusers_convert.load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings")) class unCLIPCheckpointLoader: