From 37713e3b0acfc576f4eafc0b47582374ab5987dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com>
Date: Wed, 5 Apr 2023 21:22:14 +0800
Subject: [PATCH 01/24] Add basic XPU device support

closed #387
---
 comfy/model_management.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 052dfb775..f0b8be55e 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -5,6 +5,7 @@ LOW_VRAM = 2
 NORMAL_VRAM = 3
 HIGH_VRAM = 4
 MPS = 5
+XPU = 6
 
 accelerate_enabled = False
 vram_state = NORMAL_VRAM
@@ -85,10 +86,17 @@ try:
 except:
     pass
 
+try:
+    import intel_extension_for_pytorch
+    if torch.xpu.is_available():
+        vram_state = XPU
+except:
+    pass
+
 if forced_cpu:
     vram_state = CPU
 
-print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state])
+print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state])
 
 
 current_loaded_model = None
@@ -141,6 +149,9 @@ def load_model_gpu(model):
         mps_device = torch.device("mps")
         real_model.to(mps_device)
         pass
+    elif vram_state == XPU:
+        real_model.to("xpu")
+        pass
     elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM:
         model_accelerated = False
         real_model.cuda()
@@ -189,6 +200,8 @@ def unload_if_low_vram(model):
 def get_torch_device():
     if vram_state == MPS:
         return torch.device("mps")
+    if vram_state == XPU:
+        return torch.device("xpu")
     if vram_state == CPU:
         return torch.device("cpu")
     else:
@@ -228,6 +241,9 @@ def get_free_memory(dev=None, torch_free_too=False):
     if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'):
         mem_free_total = psutil.virtual_memory().available
         mem_free_torch = mem_free_total
+    elif hasattr(dev, 'type') and (dev.type == 'xpu'):
+        mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev)
+        mem_free_torch = mem_free_total
     else:
         stats = torch.cuda.memory_stats(dev)
         mem_active = stats['active_bytes.all.current']
@@ -258,8 +274,12 @@ def mps_mode():
     global vram_state
     return vram_state == MPS
 
+def xpu_mode():
+    global vram_state
+    return vram_state == XPU
+
 def should_use_fp16():
-    if cpu_mode() or mps_mode():
+    if cpu_mode() or mps_mode() or xpu_mode():
         return False #TODO ?
 
     if torch.cuda.is_bf16_supported():

From 1ced2bdd2da9a13caf72d7bff36d7f645f443fc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com>
Date: Wed, 5 Apr 2023 21:25:37 +0800
Subject: [PATCH 02/24] Specify safetensors version to avoid upstream errors

https://github.com/huggingface/safetensors/issues/142
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3b4040a29..0527b31df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ torchsde
 einops
 open-clip-torch
 transformers>=4.25.1
-safetensors
+safetensors>=0.3.0
 pytorch_lightning
 aiohttp
 accelerate

From dd29966f8a2973529ea50de2ef3d0e7c72b5114e Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Wed, 5 Apr 2023 20:32:59 -0400
Subject: [PATCH 03/24] changes main.py to use argparse

---
 main.py | 118 ++++++++++++++++++++++----------------------------------
 1 file changed, 47 insertions(+), 71 deletions(-)

diff --git a/main.py b/main.py
index a3549b86f..20c8a49e8 100644
--- a/main.py
+++ b/main.py
@@ -1,57 +1,54 @@
-import os
-import sys
-import shutil
-
-import threading
+import argparse
 import asyncio
+import os
+import shutil
+import sys
+import threading
 
 if os.name == "nt":
     import logging
     logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
 
 if __name__ == "__main__":
-    if '--help' in sys.argv:
-        print()
-        print("Valid Command line Arguments:")
-        print("\t--listen [ip]\t\t\tListen on ip or 0.0.0.0 if none given so the UI can be accessed from other computers.")
-        print("\t--port 8188\t\t\tSet the listen port.")
-        print()
-        print("\t--extra-model-paths-config file.yaml\tload an extra_model_paths.yaml file.")
-        print("\t--output-directory path/to/output\tSet the ComfyUI output directory.")
-        print()
-        print()
-        print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n")
-        print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.")
-        print("\t--use-pytorch-cross-attention\tUse the new pytorch 2.0 cross attention function.")
-        print("\t--disable-xformers\t\tdisables xformers")
-        print("\t--cuda-device 1\t\tSet the id of the cuda device this instance will use.")
-        print()
-        print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n")
-        print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.")
-        print("\t--lowvram\t\t\tSplit the unet in parts to use less vram.")
-        print("\t--novram\t\t\tWhen lowvram isn't enough.")
-        print()
-        print("\t--cpu\t\t\tTo use the CPU for everything (slow).")
-        exit()
+    parser = argparse.ArgumentParser(description="Script Arguments")
 
-    if '--dont-upcast-attention' in sys.argv:
+    parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 0.0.0.0 if none given so the UI can be accessed from other computers.")
+    parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
+    parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
+    parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
+    parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
+    parser.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
+    parser.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
+    parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
+    parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
+    parser.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
+    parser.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.")
+    parser.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.")
+    parser.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
+    parser.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
+    parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
+    parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
+    parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.")
+
+    args = parser.parse_args()
+
+    if args.dont_upcast_attention:
         print("disabling upcasting of attention")
         os.environ['ATTN_PRECISION'] = "fp16"
 
-    try:
-        index = sys.argv.index('--cuda-device')
-        device = sys.argv[index + 1]
-        os.environ['CUDA_VISIBLE_DEVICES'] = device
-        print("Set cuda device to:", device)
-    except:
-        pass
+    if args.cuda_device is not None:
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.cuda_device)
+        print("Set cuda device to:", args.cuda_device)
+
 
-from nodes import init_custom_nodes
-import execution
-import server
-import folder_paths
 import yaml
 
+import execution
+import folder_paths
+import server
+from nodes import init_custom_nodes
+
+
 def prompt_worker(q, server):
     e = execution.PromptExecutor(server)
     while True:
@@ -110,51 +107,30 @@ if __name__ == "__main__":
     hijack_progress(server)
 
     threading.Thread(target=prompt_worker, daemon=True, args=(q,server,)).start()
-    try:
-        address = '0.0.0.0'
-        p_index = sys.argv.index('--listen')
-        try:
-            ip = sys.argv[p_index + 1]
-            if ip[:2] != '--':
-                address = ip
-        except:
-            pass
-    except:
-        address = '127.0.0.1'
 
-    dont_print = False
-    if '--dont-print-server' in sys.argv:
-        dont_print = True
+    address = args.listen
+
+    dont_print = args.dont_print_server
 
     extra_model_paths_config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "extra_model_paths.yaml")
     if os.path.isfile(extra_model_paths_config_path):
         load_extra_path_config(extra_model_paths_config_path)
 
-    if '--extra-model-paths-config' in sys.argv:
-        indices = [(i + 1) for i in range(len(sys.argv) - 1) if sys.argv[i] == '--extra-model-paths-config']
-        for i in indices:
-            load_extra_path_config(sys.argv[i])
+    if args.extra_model_paths_config:
+        load_extra_path_config(args.extra_model_paths_config)
 
-    try:
-        output_dir = sys.argv[sys.argv.index('--output-directory') + 1]
-        output_dir = os.path.abspath(output_dir)
+    if args.output_directory:
+        output_dir = os.path.abspath(args.output_directory)
         print("setting output directory to:", output_dir)
         folder_paths.set_output_directory(output_dir)
-    except:
-        pass
 
-    port = 8188
-    try:
-        p_index = sys.argv.index('--port')
-        port = int(sys.argv[p_index + 1])
-    except:
-        pass
+    port = args.port
 
-    if '--quick-test-for-ci' in sys.argv:
+    if args.quick_test_for_ci:
         exit(0)
 
     call_on_start = None
-    if "--windows-standalone-build" in sys.argv:
+    if args.windows_standalone_build:
         def startup_server(address, port):
             import webbrowser
             webbrowser.open("http://{}:{}".format(address, port))

From e5e587b1c0c5dc728d65b3e84592445cdb5e6e9b Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Wed, 5 Apr 2023 23:41:23 -0400
Subject: [PATCH 04/24] seperates out arg parser and imports args

---
 comfy/cli_args.py              |  29 +++++++++
 comfy/ldm/modules/attention.py |   5 +-
 comfy/model_management.py      | 111 ++++++++++++++++-----------------
 main.py                        |  27 +-------
 4 files changed, 88 insertions(+), 84 deletions(-)
 create mode 100644 comfy/cli_args.py

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
new file mode 100644
index 000000000..6a56e315c
--- /dev/null
+++ b/comfy/cli_args.py
@@ -0,0 +1,29 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 127.0.0.1 if none given so the UI can be accessed from other computers.")
+parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
+parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
+parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
+parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
+
+attn_group = parser.add_mutually_exclusive_group()
+attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
+attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
+
+parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
+parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
+
+vram_group = parser.add_mutually_exclusive_group()
+vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
+vram_group.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.")
+vram_group.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.")
+vram_group.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
+vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
+
+parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
+parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
+parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.")
+
+args = parser.parse_args()
diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index 07553627c..92b3eca7c 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -21,6 +21,8 @@ if model_management.xformers_enabled():
 import os
 _ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32")
 
+from cli_args import args
+
 def exists(val):
     return val is not None
 
@@ -474,7 +476,6 @@ class CrossAttentionPytorch(nn.Module):
 
         return self.to_out(out)
 
-import sys
 if model_management.xformers_enabled():
     print("Using xformers cross attention")
     CrossAttention = MemoryEfficientCrossAttention
@@ -482,7 +483,7 @@ elif model_management.pytorch_attention_enabled():
     print("Using pytorch cross attention")
     CrossAttention = CrossAttentionPytorch
 else:
-    if "--use-split-cross-attention" in sys.argv:
+    if args.use_split_cross_attention:
         print("Using split optimization for cross attention")
         CrossAttention = CrossAttentionDoggettx
     else:
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 052dfb775..7dda073dc 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -1,36 +1,35 @@
+import psutil
+from enum import Enum
+from cli_args import args
 
-CPU = 0
-NO_VRAM = 1
-LOW_VRAM = 2
-NORMAL_VRAM = 3
-HIGH_VRAM = 4
-MPS = 5
+class VRAMState(Enum):
+    CPU = 0
+    NO_VRAM = 1
+    LOW_VRAM = 2
+    NORMAL_VRAM = 3
+    HIGH_VRAM = 4
+    MPS = 5
 
-accelerate_enabled = False
-vram_state = NORMAL_VRAM
+# Determine VRAM State
+vram_state = VRAMState.NORMAL_VRAM
+set_vram_to = VRAMState.NORMAL_VRAM
 
 total_vram = 0
 total_vram_available_mb = -1
 
-import sys
-import psutil
-
-forced_cpu = "--cpu" in sys.argv
-
-set_vram_to = NORMAL_VRAM
+accelerate_enabled = False
 
 try:
     import torch
     total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024)
     total_ram = psutil.virtual_memory().total / (1024 * 1024)
-    forced_normal_vram = "--normalvram" in sys.argv
-    if not forced_normal_vram and not forced_cpu:
+    if not args.normalvram and not args.cpu:
         if total_vram <= 4096:
             print("Trying to enable lowvram mode because your GPU seems to have 4GB or less. If you don't want this use: --normalvram")
-            set_vram_to = LOW_VRAM
+            set_vram_to = VRAMState.LOW_VRAM
         elif total_vram > total_ram * 1.1 and total_vram > 14336:
             print("Enabling highvram mode because your GPU has more vram than your computer has ram. If you don't want this use: --normalvram")
-            vram_state = HIGH_VRAM
+            vram_state = VRAMState.HIGH_VRAM
 except:
     pass
 
@@ -39,34 +38,32 @@ try:
 except:
     OOM_EXCEPTION = Exception
 
-if "--disable-xformers" in sys.argv:
-    XFORMERS_IS_AVAILBLE = False
+if args.disable_xformers:
+    XFORMERS_IS_AVAILABLE = False
 else:
     try:
         import xformers
         import xformers.ops
-        XFORMERS_IS_AVAILBLE = True
+        XFORMERS_IS_AVAILABLE = True
     except:
-        XFORMERS_IS_AVAILBLE = False
+        XFORMERS_IS_AVAILABLE = False
 
-ENABLE_PYTORCH_ATTENTION = False
-if "--use-pytorch-cross-attention" in sys.argv:
+ENABLE_PYTORCH_ATTENTION = args.use_pytorch_cross_attention
+if ENABLE_PYTORCH_ATTENTION:
     torch.backends.cuda.enable_math_sdp(True)
     torch.backends.cuda.enable_flash_sdp(True)
     torch.backends.cuda.enable_mem_efficient_sdp(True)
-    ENABLE_PYTORCH_ATTENTION = True
-    XFORMERS_IS_AVAILBLE = False
+    XFORMERS_IS_AVAILABLE = False
+
+if args.lowvram:
+    set_vram_to = VRAMState.LOW_VRAM
+elif args.novram:
+    set_vram_to = VRAMState.NO_VRAM
+elif args.highvram:
+    vram_state = VRAMState.HIGH_VRAM
 
 
-if "--lowvram" in sys.argv:
-    set_vram_to = LOW_VRAM
-if "--novram" in sys.argv:
-    set_vram_to = NO_VRAM
-if "--highvram" in sys.argv:
-    vram_state = HIGH_VRAM
-
-
-if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM:
+if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
     try:
         import accelerate
         accelerate_enabled = True
@@ -81,14 +78,14 @@ if set_vram_to == LOW_VRAM or set_vram_to == NO_VRAM:
 
 try:
     if torch.backends.mps.is_available():
-        vram_state = MPS
+        vram_state = VRAMState.MPS
 except:
     pass
 
-if forced_cpu:
-    vram_state = CPU
+if args.cpu:
+    vram_state = VRAMState.CPU
 
-print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state])
+print(f"Set vram state to: {vram_state.name}")
 
 
 current_loaded_model = None
@@ -109,12 +106,12 @@ def unload_model():
             model_accelerated = False
 
         #never unload models from GPU on high vram
-        if vram_state != HIGH_VRAM:
+        if vram_state != VRAMState.HIGH_VRAM:
             current_loaded_model.model.cpu()
         current_loaded_model.unpatch_model()
         current_loaded_model = None
 
-    if vram_state != HIGH_VRAM:
+    if vram_state != VRAMState.HIGH_VRAM:
         if len(current_gpu_controlnets) > 0:
             for n in current_gpu_controlnets:
                 n.cpu()
@@ -135,19 +132,19 @@ def load_model_gpu(model):
         model.unpatch_model()
         raise e
     current_loaded_model = model
-    if vram_state == CPU:
+    if vram_state == VRAMState.CPU:
         pass
-    elif vram_state == MPS:
+    elif vram_state == VRAMState.MPS:
         mps_device = torch.device("mps")
         real_model.to(mps_device)
         pass
-    elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM:
+    elif vram_state == VRAMState.NORMAL_VRAM or vram_state == VRAMState.HIGH_VRAM:
         model_accelerated = False
         real_model.cuda()
     else:
-        if vram_state == NO_VRAM:
+        if vram_state == VRAMState.NO_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"})
-        elif vram_state == LOW_VRAM:
+        elif vram_state == VRAMState.LOW_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"})
 
         accelerate.dispatch_model(real_model, device_map=device_map, main_device="cuda")
@@ -157,10 +154,10 @@ def load_model_gpu(model):
 def load_controlnet_gpu(models):
     global current_gpu_controlnets
     global vram_state
-    if vram_state == CPU:
+    if vram_state == VRAMState.CPU:
         return
 
-    if vram_state == LOW_VRAM or vram_state == NO_VRAM:
+    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
         #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
         return
 
@@ -176,20 +173,20 @@ def load_controlnet_gpu(models):
 
 def load_if_low_vram(model):
     global vram_state
-    if vram_state == LOW_VRAM or vram_state == NO_VRAM:
+    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
         return model.cuda()
     return model
 
 def unload_if_low_vram(model):
     global vram_state
-    if vram_state == LOW_VRAM or vram_state == NO_VRAM:
+    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
         return model.cpu()
     return model
 
 def get_torch_device():
-    if vram_state == MPS:
+    if vram_state == VRAMState.MPS:
         return torch.device("mps")
-    if vram_state == CPU:
+    if vram_state == VRAMState.CPU:
         return torch.device("cpu")
     else:
         return torch.cuda.current_device()
@@ -201,9 +198,9 @@ def get_autocast_device(dev):
 
 
 def xformers_enabled():
-    if vram_state == CPU:
+    if vram_state == VRAMState.CPU:
         return False
-    return XFORMERS_IS_AVAILBLE
+    return XFORMERS_IS_AVAILABLE
 
 
 def xformers_enabled_vae():
@@ -243,7 +240,7 @@ def get_free_memory(dev=None, torch_free_too=False):
 
 def maximum_batch_area():
     global vram_state
-    if vram_state == NO_VRAM:
+    if vram_state == VRAMState.NO_VRAM:
         return 0
 
     memory_free = get_free_memory() / (1024 * 1024)
@@ -252,11 +249,11 @@ def maximum_batch_area():
 
 def cpu_mode():
     global vram_state
-    return vram_state == CPU
+    return vram_state == VRAMState.CPU
 
 def mps_mode():
     global vram_state
-    return vram_state == MPS
+    return vram_state == VRAMState.MPS
 
 def should_use_fp16():
     if cpu_mode() or mps_mode():
diff --git a/main.py b/main.py
index 20c8a49e8..51a48fc6d 100644
--- a/main.py
+++ b/main.py
@@ -1,37 +1,14 @@
-import argparse
 import asyncio
 import os
 import shutil
-import sys
 import threading
+from comfy.cli_args import args
 
 if os.name == "nt":
     import logging
     logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Script Arguments")
-
-    parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 0.0.0.0 if none given so the UI can be accessed from other computers.")
-    parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
-    parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
-    parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
-    parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
-    parser.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
-    parser.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
-    parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
-    parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
-    parser.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
-    parser.add_argument("--normalvram", action="store_true", help="Used to force normal vram use if lowvram gets automatically enabled.")
-    parser.add_argument("--lowvram", action="store_true", help="Split the unet in parts to use less vram.")
-    parser.add_argument("--novram", action="store_true", help="When lowvram isn't enough.")
-    parser.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
-    parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
-    parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
-    parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build.")
-
-    args = parser.parse_args()
-
     if args.dont_upcast_attention:
         print("disabling upcasting of attention")
         os.environ['ATTN_PRECISION'] = "fp16"
@@ -121,7 +98,7 @@ if __name__ == "__main__":
 
     if args.output_directory:
         output_dir = os.path.abspath(args.output_directory)
-        print("setting output directory to:", output_dir)
+        print(f"Setting output directory to: {output_dir}")
         folder_paths.set_output_directory(output_dir)
 
     port = args.port

From 84b9c0ac2ff49b5b18b8e7804f8fe42a379a0787 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com>
Date: Thu, 6 Apr 2023 12:27:22 +0800
Subject: [PATCH 05/24] Import intel_extension_for_pytorch as ipex

---
 comfy/model_management.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index f0b8be55e..379cc18d7 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -87,7 +87,7 @@ except:
     pass
 
 try:
-    import intel_extension_for_pytorch
+    import intel_extension_for_pytorch as ipex
     if torch.xpu.is_available():
         vram_state = XPU
 except:

From 7cb924f68469cd2481b2313f8e5fc02587279bf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com>
Date: Thu, 6 Apr 2023 14:24:47 +0800
Subject: [PATCH 06/24] Use separate variables instead of `vram_state`

---
 comfy/model_management.py | 70 +++++++++++++++++++++------------------
 1 file changed, 37 insertions(+), 33 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 379cc18d7..a84167746 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -5,9 +5,9 @@ LOW_VRAM = 2
 NORMAL_VRAM = 3
 HIGH_VRAM = 4
 MPS = 5
-XPU = 6
 
 accelerate_enabled = False
+xpu_available = False
 vram_state = NORMAL_VRAM
 
 total_vram = 0
@@ -22,7 +22,12 @@ set_vram_to = NORMAL_VRAM
 
 try:
     import torch
-    total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024)
+    import intel_extension_for_pytorch as ipex
+    if torch.xpu.is_available():
+        xpu_available = True
+        total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024)
+    else:
+        total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024)
     total_ram = psutil.virtual_memory().total / (1024 * 1024)
     forced_normal_vram = "--normalvram" in sys.argv
     if not forced_normal_vram and not forced_cpu:
@@ -86,17 +91,10 @@ try:
 except:
     pass
 
-try:
-    import intel_extension_for_pytorch as ipex
-    if torch.xpu.is_available():
-        vram_state = XPU
-except:
-    pass
-
 if forced_cpu:
     vram_state = CPU
 
-print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS", "XPU"][vram_state])
+print("Set vram state to:", ["CPU", "NO VRAM", "LOW VRAM", "NORMAL VRAM", "HIGH VRAM", "MPS"][vram_state])
 
 
 current_loaded_model = None
@@ -133,6 +131,7 @@ def load_model_gpu(model):
     global current_loaded_model
     global vram_state
     global model_accelerated
+    global xpu_available
 
     if model is current_loaded_model:
         return
@@ -149,19 +148,19 @@ def load_model_gpu(model):
         mps_device = torch.device("mps")
         real_model.to(mps_device)
         pass
-    elif vram_state == XPU:
-        real_model.to("xpu")
-        pass
     elif vram_state == NORMAL_VRAM or vram_state == HIGH_VRAM:
         model_accelerated = False
-        real_model.cuda()
+        if xpu_available:
+            real_model.to("xpu")
+        else:
+            real_model.cuda()
     else:
         if vram_state == NO_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"})
         elif vram_state == LOW_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"})
 
-        accelerate.dispatch_model(real_model, device_map=device_map, main_device="cuda")
+        accelerate.dispatch_model(real_model, device_map=device_map, main_device="xpu" if xpu_available else "cuda")
         model_accelerated = True
     return current_loaded_model
 
@@ -187,8 +186,12 @@ def load_controlnet_gpu(models):
 
 def load_if_low_vram(model):
     global vram_state
+    global xpu_available
     if vram_state == LOW_VRAM or vram_state == NO_VRAM:
-        return model.cuda()
+        if xpu_available:
+            return model.to("xpu")
+        else:
+            return model.cuda()
     return model
 
 def unload_if_low_vram(model):
@@ -198,14 +201,16 @@ def unload_if_low_vram(model):
     return model
 
 def get_torch_device():
+    global xpu_available
     if vram_state == MPS:
         return torch.device("mps")
-    if vram_state == XPU:
-        return torch.device("xpu")
     if vram_state == CPU:
         return torch.device("cpu")
     else:
-        return torch.cuda.current_device()
+        if xpu_available:
+            return torch.device("xpu")
+        else:
+            return torch.cuda.current_device()
 
 def get_autocast_device(dev):
     if hasattr(dev, 'type'):
@@ -235,22 +240,24 @@ def pytorch_attention_enabled():
     return ENABLE_PYTORCH_ATTENTION
 
 def get_free_memory(dev=None, torch_free_too=False):
+    global xpu_available
     if dev is None:
         dev = get_torch_device()
 
     if hasattr(dev, 'type') and (dev.type == 'cpu' or dev.type == 'mps'):
         mem_free_total = psutil.virtual_memory().available
         mem_free_torch = mem_free_total
-    elif hasattr(dev, 'type') and (dev.type == 'xpu'):
-        mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev)
-        mem_free_torch = mem_free_total
     else:
-        stats = torch.cuda.memory_stats(dev)
-        mem_active = stats['active_bytes.all.current']
-        mem_reserved = stats['reserved_bytes.all.current']
-        mem_free_cuda, _ = torch.cuda.mem_get_info(dev)
-        mem_free_torch = mem_reserved - mem_active
-        mem_free_total = mem_free_cuda + mem_free_torch
+        if xpu_available:
+            mem_free_total = torch.xpu.get_device_properties(dev).total_memory - torch.xpu.memory_allocated(dev)
+            mem_free_torch = mem_free_total
+        else:
+            stats = torch.cuda.memory_stats(dev)
+            mem_active = stats['active_bytes.all.current']
+            mem_reserved = stats['reserved_bytes.all.current']
+            mem_free_cuda, _ = torch.cuda.mem_get_info(dev)
+            mem_free_torch = mem_reserved - mem_active
+            mem_free_total = mem_free_cuda + mem_free_torch
 
     if torch_free_too:
         return (mem_free_total, mem_free_torch)
@@ -274,12 +281,9 @@ def mps_mode():
     global vram_state
     return vram_state == MPS
 
-def xpu_mode():
-    global vram_state
-    return vram_state == XPU
-
 def should_use_fp16():
-    if cpu_mode() or mps_mode() or xpu_mode():
+    global xpu_available
+    if cpu_mode() or mps_mode() or xpu_available:
         return False #TODO ?
 
     if torch.cuda.is_bf16_supported():

From 60127a83040b3b243457980d04f3bb25c4491978 Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Wed, 5 Apr 2023 23:57:31 -0700
Subject: [PATCH 07/24] diffusers loader

---
 comfy/diffusers_convert.py                 | 364 +++++++++++++++++++++
 models/diffusers/put_diffusers_models_here |   0
 nodes.py                                   |  19 +-
 3 files changed, 382 insertions(+), 1 deletion(-)
 create mode 100644 comfy/diffusers_convert.py
 create mode 100644 models/diffusers/put_diffusers_models_here

diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py
new file mode 100644
index 000000000..a31c1c11b
--- /dev/null
+++ b/comfy/diffusers_convert.py
@@ -0,0 +1,364 @@
+import json
+import os
+import yaml
+
+# because of local import nonsense
+import sys
+sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+
+import folder_paths
+from comfy.ldm.util import instantiate_from_config
+from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE
+import os.path as osp
+import re
+import torch
+from safetensors.torch import load_file, save_file
+
+# conversion code from https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py
+
+# =================#
+# UNet Conversion #
+# =================#
+
+unet_conversion_map = [
+    # (stable-diffusion, HF Diffusers)
+    ("time_embed.0.weight", "time_embedding.linear_1.weight"),
+    ("time_embed.0.bias", "time_embedding.linear_1.bias"),
+    ("time_embed.2.weight", "time_embedding.linear_2.weight"),
+    ("time_embed.2.bias", "time_embedding.linear_2.bias"),
+    ("input_blocks.0.0.weight", "conv_in.weight"),
+    ("input_blocks.0.0.bias", "conv_in.bias"),
+    ("out.0.weight", "conv_norm_out.weight"),
+    ("out.0.bias", "conv_norm_out.bias"),
+    ("out.2.weight", "conv_out.weight"),
+    ("out.2.bias", "conv_out.bias"),
+]
+
+unet_conversion_map_resnet = [
+    # (stable-diffusion, HF Diffusers)
+    ("in_layers.0", "norm1"),
+    ("in_layers.2", "conv1"),
+    ("out_layers.0", "norm2"),
+    ("out_layers.3", "conv2"),
+    ("emb_layers.1", "time_emb_proj"),
+    ("skip_connection", "conv_shortcut"),
+]
+
+unet_conversion_map_layer = []
+# hardcoded number of downblocks and resnets/attentions...
+# would need smarter logic for other networks.
+for i in range(4):
+    # loop over downblocks/upblocks
+
+    for j in range(2):
+        # loop over resnets/attentions for downblocks
+        hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+        sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0."
+        unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+
+        if i < 3:
+            # no attention layers in down_blocks.3
+            hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+            sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1."
+            unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+    for j in range(3):
+        # loop over resnets/attentions for upblocks
+        hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+        sd_up_res_prefix = f"output_blocks.{3 * i + j}.0."
+        unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+
+        if i > 0:
+            # no attention layers in up_blocks.0
+            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+            sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1."
+            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+    if i < 3:
+        # no downsample in down_blocks.3
+        hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+        sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op."
+        unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+
+        # no upsample in up_blocks.3
+        hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+        sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{1 if i == 0 else 2}."
+        unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+hf_mid_atn_prefix = "mid_block.attentions.0."
+sd_mid_atn_prefix = "middle_block.1."
+unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+for j in range(2):
+    hf_mid_res_prefix = f"mid_block.resnets.{j}."
+    sd_mid_res_prefix = f"middle_block.{2 * j}."
+    unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+
+def convert_unet_state_dict(unet_state_dict):
+    # buyer beware: this is a *brittle* function,
+    # and correct output requires that all of these pieces interact in
+    # the exact order in which I have arranged them.
+    mapping = {k: k for k in unet_state_dict.keys()}
+    for sd_name, hf_name in unet_conversion_map:
+        mapping[hf_name] = sd_name
+    for k, v in mapping.items():
+        if "resnets" in k:
+            for sd_part, hf_part in unet_conversion_map_resnet:
+                v = v.replace(hf_part, sd_part)
+            mapping[k] = v
+    for k, v in mapping.items():
+        for sd_part, hf_part in unet_conversion_map_layer:
+            v = v.replace(hf_part, sd_part)
+        mapping[k] = v
+    new_state_dict = {v: unet_state_dict[k] for k, v in mapping.items()}
+    return new_state_dict
+
+
+# ================#
+# VAE Conversion #
+# ================#
+
+vae_conversion_map = [
+    # (stable-diffusion, HF Diffusers)
+    ("nin_shortcut", "conv_shortcut"),
+    ("norm_out", "conv_norm_out"),
+    ("mid.attn_1.", "mid_block.attentions.0."),
+]
+
+for i in range(4):
+    # down_blocks have two resnets
+    for j in range(2):
+        hf_down_prefix = f"encoder.down_blocks.{i}.resnets.{j}."
+        sd_down_prefix = f"encoder.down.{i}.block.{j}."
+        vae_conversion_map.append((sd_down_prefix, hf_down_prefix))
+
+    if i < 3:
+        hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0."
+        sd_downsample_prefix = f"down.{i}.downsample."
+        vae_conversion_map.append((sd_downsample_prefix, hf_downsample_prefix))
+
+        hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+        sd_upsample_prefix = f"up.{3 - i}.upsample."
+        vae_conversion_map.append((sd_upsample_prefix, hf_upsample_prefix))
+
+    # up_blocks have three resnets
+    # also, up blocks in hf are numbered in reverse from sd
+    for j in range(3):
+        hf_up_prefix = f"decoder.up_blocks.{i}.resnets.{j}."
+        sd_up_prefix = f"decoder.up.{3 - i}.block.{j}."
+        vae_conversion_map.append((sd_up_prefix, hf_up_prefix))
+
+# this part accounts for mid blocks in both the encoder and the decoder
+for i in range(2):
+    hf_mid_res_prefix = f"mid_block.resnets.{i}."
+    sd_mid_res_prefix = f"mid.block_{i + 1}."
+    vae_conversion_map.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+vae_conversion_map_attn = [
+    # (stable-diffusion, HF Diffusers)
+    ("norm.", "group_norm."),
+    ("q.", "query."),
+    ("k.", "key."),
+    ("v.", "value."),
+    ("proj_out.", "proj_attn."),
+]
+
+
+def reshape_weight_for_sd(w):
+    # convert HF linear weights to SD conv2d weights
+    return w.reshape(*w.shape, 1, 1)
+
+
+def convert_vae_state_dict(vae_state_dict):
+    mapping = {k: k for k in vae_state_dict.keys()}
+    for k, v in mapping.items():
+        for sd_part, hf_part in vae_conversion_map:
+            v = v.replace(hf_part, sd_part)
+        mapping[k] = v
+    for k, v in mapping.items():
+        if "attentions" in k:
+            for sd_part, hf_part in vae_conversion_map_attn:
+                v = v.replace(hf_part, sd_part)
+            mapping[k] = v
+    new_state_dict = {v: vae_state_dict[k] for k, v in mapping.items()}
+    weights_to_convert = ["q", "k", "v", "proj_out"]
+    for k, v in new_state_dict.items():
+        for weight_name in weights_to_convert:
+            if f"mid.attn_1.{weight_name}.weight" in k:
+                print(f"Reshaping {k} for SD format")
+                new_state_dict[k] = reshape_weight_for_sd(v)
+    return new_state_dict
+
+
+# =========================#
+# Text Encoder Conversion #
+# =========================#
+
+
+textenc_conversion_lst = [
+    # (stable-diffusion, HF Diffusers)
+    ("resblocks.", "text_model.encoder.layers."),
+    ("ln_1", "layer_norm1"),
+    ("ln_2", "layer_norm2"),
+    (".c_fc.", ".fc1."),
+    (".c_proj.", ".fc2."),
+    (".attn", ".self_attn"),
+    ("ln_final.", "transformer.text_model.final_layer_norm."),
+    ("token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"),
+    ("positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"),
+]
+protected = {re.escape(x[1]): x[0] for x in textenc_conversion_lst}
+textenc_pattern = re.compile("|".join(protected.keys()))
+
+# Ordering is from https://github.com/pytorch/pytorch/blob/master/test/cpp/api/modules.cpp
+code2idx = {"q": 0, "k": 1, "v": 2}
+
+
+def convert_text_enc_state_dict_v20(text_enc_dict):
+    new_state_dict = {}
+    capture_qkv_weight = {}
+    capture_qkv_bias = {}
+    for k, v in text_enc_dict.items():
+        if (
+                k.endswith(".self_attn.q_proj.weight")
+                or k.endswith(".self_attn.k_proj.weight")
+                or k.endswith(".self_attn.v_proj.weight")
+        ):
+            k_pre = k[: -len(".q_proj.weight")]
+            k_code = k[-len("q_proj.weight")]
+            if k_pre not in capture_qkv_weight:
+                capture_qkv_weight[k_pre] = [None, None, None]
+            capture_qkv_weight[k_pre][code2idx[k_code]] = v
+            continue
+
+        if (
+                k.endswith(".self_attn.q_proj.bias")
+                or k.endswith(".self_attn.k_proj.bias")
+                or k.endswith(".self_attn.v_proj.bias")
+        ):
+            k_pre = k[: -len(".q_proj.bias")]
+            k_code = k[-len("q_proj.bias")]
+            if k_pre not in capture_qkv_bias:
+                capture_qkv_bias[k_pre] = [None, None, None]
+            capture_qkv_bias[k_pre][code2idx[k_code]] = v
+            continue
+
+        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k)
+        new_state_dict[relabelled_key] = v
+
+    for k_pre, tensors in capture_qkv_weight.items():
+        if None in tensors:
+            raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
+        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
+        new_state_dict[relabelled_key + ".in_proj_weight"] = torch.cat(tensors)
+
+    for k_pre, tensors in capture_qkv_bias.items():
+        if None in tensors:
+            raise Exception("CORRUPTED MODEL: one of the q-k-v values for the text encoder was missing")
+        relabelled_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], k_pre)
+        new_state_dict[relabelled_key + ".in_proj_bias"] = torch.cat(tensors)
+
+    return new_state_dict
+
+
+def convert_text_enc_state_dict(text_enc_dict):
+    return text_enc_dict
+
+
+def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, embedding_directory=None):
+    diffusers_unet_conf = json.load(open(osp.join(model_path, "unet/config.json")))
+    diffusers_scheduler_conf = json.load(open(osp.join(model_path, "scheduler/scheduler_config.json")))
+
+    # magic
+    v2 = diffusers_unet_conf["sample_size"] == 96
+    v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction'
+
+    if v2:
+        if v_pred:
+            config_path = folder_paths.get_full_path("configs", 'v2-inference-v.yaml')
+        else:
+            config_path = folder_paths.get_full_path("configs", 'v2-inference.yaml')
+    else:
+        config_path = folder_paths.get_full_path("configs", 'v1-inference.yaml')
+
+    with open(config_path, 'r') as stream:
+        config = yaml.safe_load(stream)
+
+    model_config_params = config['model']['params']
+    clip_config = model_config_params['cond_stage_config']
+    scale_factor = model_config_params['scale_factor']
+    vae_config = model_config_params['first_stage_config']
+    vae_config['scale_factor'] = scale_factor
+
+    unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors")
+    vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors")
+    text_enc_path = osp.join(model_path, "text_encoder", "model.safetensors")
+
+    # Load models from safetensors if it exists, if it doesn't pytorch
+    if osp.exists(unet_path):
+        unet_state_dict = load_file(unet_path, device="cpu")
+    else:
+        unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.bin")
+        unet_state_dict = torch.load(unet_path, map_location="cpu")
+
+    if osp.exists(vae_path):
+        vae_state_dict = load_file(vae_path, device="cpu")
+    else:
+        vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.bin")
+        vae_state_dict = torch.load(vae_path, map_location="cpu")
+
+    if osp.exists(text_enc_path):
+        text_enc_dict = load_file(text_enc_path, device="cpu")
+    else:
+        text_enc_path = osp.join(model_path, "text_encoder", "pytorch_model.bin")
+        text_enc_dict = torch.load(text_enc_path, map_location="cpu")
+
+    # Convert the UNet model
+    unet_state_dict = convert_unet_state_dict(unet_state_dict)
+    unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()}
+
+    # Convert the VAE model
+    vae_state_dict = convert_vae_state_dict(vae_state_dict)
+    vae_state_dict = {"first_stage_model." + k: v for k, v in vae_state_dict.items()}
+
+    # Easiest way to identify v2.0 model seems to be that the text encoder (OpenCLIP) is deeper
+    is_v20_model = "text_model.encoder.layers.22.layer_norm2.bias" in text_enc_dict
+
+    if is_v20_model:
+        # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm
+        text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()}
+        text_enc_dict = convert_text_enc_state_dict_v20(text_enc_dict)
+        text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()}
+    else:
+        text_enc_dict = convert_text_enc_state_dict(text_enc_dict)
+        text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()}
+
+    # Put together new checkpoint
+    sd = {**unet_state_dict, **vae_state_dict, **text_enc_dict}
+
+    clip = None
+    vae = None
+
+    class WeightsLoader(torch.nn.Module):
+        pass
+
+    w = WeightsLoader()
+    load_state_dict_to = []
+    if output_vae:
+        vae = VAE(scale_factor=scale_factor, config=vae_config)
+        w.first_stage_model = vae.first_stage_model
+        load_state_dict_to = [w]
+
+    if output_clip:
+        clip = CLIP(config=clip_config, embedding_directory=embedding_directory)
+        w.cond_stage_model = clip.cond_stage_model
+        load_state_dict_to = [w]
+
+    model = instantiate_from_config(config["model"])
+    model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to)
+
+    if fp16:
+        model = model.half()
+
+    return ModelPatcher(model), clip, vae
diff --git a/models/diffusers/put_diffusers_models_here b/models/diffusers/put_diffusers_models_here
new file mode 100644
index 000000000..e69de29bb
diff --git a/nodes.py b/nodes.py
index 187d54a11..776bc3819 100644
--- a/nodes.py
+++ b/nodes.py
@@ -4,13 +4,14 @@ import os
 import sys
 import json
 import hashlib
-import copy
 import traceback
 
 from PIL import Image
 from PIL.PngImagePlugin import PngInfo
 import numpy as np
 
+from comfy.diffusers_convert import load_diffusers
+
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
 
 
@@ -219,6 +220,21 @@ class CheckpointLoaderSimple:
         out = comfy.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))
         return out
 
+class DiffusersLoader:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {"required": {"model_path": (os.listdir(os.path.join(folder_paths.models_dir, 'diffusers'), ),),
+                             }}
+    RETURN_TYPES = ("MODEL", "CLIP", "VAE")
+    FUNCTION = "load_checkpoint"
+
+    CATEGORY = "loaders"
+
+    def load_checkpoint(self, model_path, output_vae=True, output_clip=True):
+        model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path)
+        return load_diffusers(model_path, fp16=True, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
+
+
 class unCLIPCheckpointLoader:
     @classmethod
     def INPUT_TYPES(s):
@@ -1076,6 +1092,7 @@ NODE_CLASS_MAPPINGS = {
     "TomePatchModel": TomePatchModel,
     "unCLIPCheckpointLoader": unCLIPCheckpointLoader,
     "CheckpointLoader": CheckpointLoader,
+    "DiffusersLoader": DiffusersLoader,
 }
 
 def load_custom_node(module_path):

From c418d988ba59b3114770a0fa111d301f04880fca Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Wed, 5 Apr 2023 23:59:03 -0700
Subject: [PATCH 08/24] update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0f7d24c45..90931141d 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 - Many optimizations: Only re-executes the parts of the workflow that changes between executions.
 - Command line option: ```--lowvram``` to make it work on GPUs with less than 3GB vram (enabled automatically on GPUs with low vram)
 - Works even if you don't have a GPU with: ```--cpu``` (slow)
-- Can load both ckpt and safetensors models/checkpoints. Standalone VAEs and CLIP models.
+- Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
 - Embeddings/Textual inversion
 - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
 - Loading full workflows (with seeds) from generated PNG files.

From 3d16077e3806b0817b1d43dc14f61e5dee5495c8 Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Thu, 6 Apr 2023 00:24:52 -0700
Subject: [PATCH 09/24] empty list if diffusers directory doesn't exist

---
 nodes.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/nodes.py b/nodes.py
index 776bc3819..1af62887d 100644
--- a/nodes.py
+++ b/nodes.py
@@ -223,8 +223,11 @@ class CheckpointLoaderSimple:
 class DiffusersLoader:
     @classmethod
     def INPUT_TYPES(cls):
-        return {"required": {"model_path": (os.listdir(os.path.join(folder_paths.models_dir, 'diffusers'), ),),
-                             }}
+        paths = []
+        search_path = os.path.join(folder_paths.models_dir, 'diffusers')
+        if os.path.exists(search_path):
+            paths = next(os.walk(search_path))[1]
+        return {"required": {"model_path": (paths,), }}
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"
 

From 42fd67b5cb0de9bd1228af7a93dec08b2f1486c3 Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Thu, 6 Apr 2023 00:28:06 -0700
Subject: [PATCH 10/24] use precision determined by model management

---
 nodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nodes.py b/nodes.py
index 1af62887d..8271da04c 100644
--- a/nodes.py
+++ b/nodes.py
@@ -235,7 +235,7 @@ class DiffusersLoader:
 
     def load_checkpoint(self, model_path, output_vae=True, output_clip=True):
         model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path)
-        return load_diffusers(model_path, fp16=True, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
+        return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
 
 
 class unCLIPCheckpointLoader:

From 3e2608e12b312fd5d2396d4146d992cd4f8b9ab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=97=8D+85CD?= <50108258+kwaa@users.noreply.github.com>
Date: Thu, 6 Apr 2023 15:44:05 +0800
Subject: [PATCH 11/24] Fix auto lowvram detection on CUDA

---
 comfy/model_management.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index a84167746..b0123b5fc 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -22,11 +22,12 @@ set_vram_to = NORMAL_VRAM
 
 try:
     import torch
-    import intel_extension_for_pytorch as ipex
-    if torch.xpu.is_available():
-        xpu_available = True
-        total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024)
-    else:
+    try:
+        import intel_extension_for_pytorch as ipex
+        if torch.xpu.is_available():
+            xpu_available = True
+            total_vram = torch.xpu.get_device_properties(torch.xpu.current_device()).total_memory / (1024 * 1024)
+    except:
         total_vram = torch.cuda.mem_get_info(torch.cuda.current_device())[1] / (1024 * 1024)
     total_ram = psutil.virtual_memory().total / (1024 * 1024)
     forced_normal_vram = "--normalvram" in sys.argv

From 01c1fc669fb8cd41f627dad871257acbaaf24b47 Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Thu, 6 Apr 2023 13:19:00 -0400
Subject: [PATCH 12/24] set listen flag to listen on all if specifed

---
 comfy/cli_args.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 6a56e315c..a27dc7a7f 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -2,7 +2,7 @@ import argparse
 
 parser = argparse.ArgumentParser()
 
-parser.add_argument("--listen", type=str, default="127.0.0.1", help="Listen on IP or 127.0.0.1 if none given so the UI can be accessed from other computers.")
+parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
 parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")

From 7d62d89f9325348179fc9b0db146ff50fa7c808c Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Wed, 5 Apr 2023 13:08:08 -0400
Subject: [PATCH 13/24] add cors middleware

---
 server.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 840d9a4e7..005bf9b2c 100644
--- a/server.py
+++ b/server.py
@@ -27,6 +27,19 @@ async def cache_control(request: web.Request, handler):
         response.headers.setdefault('Cache-Control', 'no-cache')
     return response
 
+@web.middleware
+async def cors_middleware(request: web.Request, handler):
+    if request.method == "OPTIONS":
+        # Pre-flight request. Reply successfully:
+        response = web.Response()
+    else:
+        response = await handler(request)
+    response.headers['Access-Control-Allow-Origin'] = '*'
+    response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS'
+    response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
+    response.headers['Access-Control-Allow-Credentials'] = 'true'
+    return response
+
 class PromptServer():
     def __init__(self, loop):
         PromptServer.instance = self
@@ -37,7 +50,7 @@ class PromptServer():
         self.loop = loop
         self.messages = asyncio.Queue()
         self.number = 0
-        self.app = web.Application(client_max_size=20971520, middlewares=[cache_control])
+        self.app = web.Application(client_max_size=20971520, middlewares=[cache_control, cors_middleware])
         self.sockets = dict()
         self.web_root = os.path.join(os.path.dirname(
             os.path.realpath(__file__)), "web")

From 48efae16084b423166f9a1930b989489169d22cf Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Thu, 6 Apr 2023 15:06:22 -0400
Subject: [PATCH 14/24] makes cors a cli parameter

---
 comfy/cli_args.py |  3 ++-
 server.py         | 36 +++++++++++++++++++++++-------------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index a27dc7a7f..5133e0ae5 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -4,8 +4,10 @@ parser = argparse.ArgumentParser()
 
 parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
 parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
+parser.add_argument("--cors", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
+parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
 
 attn_group = parser.add_mutually_exclusive_group()
@@ -13,7 +15,6 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help
 attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.")
 
 parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.")
-parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
 
 vram_group = parser.add_mutually_exclusive_group()
 vram_group.add_argument("--highvram", action="store_true", help="By default models will be unloaded to CPU memory after being used. This option keeps them in GPU memory.")
diff --git a/server.py b/server.py
index 005bf9b2c..a9c0b4599 100644
--- a/server.py
+++ b/server.py
@@ -18,6 +18,7 @@ except ImportError:
     sys.exit()
 
 import mimetypes
+from comfy.cli_args import args
 
 
 @web.middleware
@@ -27,18 +28,22 @@ async def cache_control(request: web.Request, handler):
         response.headers.setdefault('Cache-Control', 'no-cache')
     return response
 
-@web.middleware
-async def cors_middleware(request: web.Request, handler):
-    if request.method == "OPTIONS":
-        # Pre-flight request. Reply successfully:
-        response = web.Response()
-    else:
-        response = await handler(request)
-    response.headers['Access-Control-Allow-Origin'] = '*'
-    response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS'
-    response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
-    response.headers['Access-Control-Allow-Credentials'] = 'true'
-    return response
+def create_cors_middleware(allowed_origin: str):
+    @web.middleware
+    async def cors_middleware(request: web.Request, handler):
+        if request.method == "OPTIONS":
+            # Pre-flight request. Reply successfully:
+            response = web.Response()
+        else:
+            response = await handler(request)
+
+        response.headers['Access-Control-Allow-Origin'] = allowed_origin
+        response.headers['Access-Control-Allow-Methods'] = 'POST, GET, DELETE, PUT, OPTIONS'
+        response.headers['Access-Control-Allow-Headers'] = 'Content-Type, Authorization'
+        response.headers['Access-Control-Allow-Credentials'] = 'true'
+        return response
+
+    return cors_middleware
 
 class PromptServer():
     def __init__(self, loop):
@@ -50,7 +55,12 @@ class PromptServer():
         self.loop = loop
         self.messages = asyncio.Queue()
         self.number = 0
-        self.app = web.Application(client_max_size=20971520, middlewares=[cache_control, cors_middleware])
+
+        middlewares = [cache_control]
+        if args.cors:
+            middlewares.append(create_cors_middleware(args.cors))
+
+        self.app = web.Application(client_max_size=20971520, middlewares=middlewares)
         self.sockets = dict()
         self.web_root = os.path.join(os.path.dirname(
             os.path.realpath(__file__)), "web")

From f84f2508cc45a014cc27e023e9623db0450d237e Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 6 Apr 2023 15:24:55 -0400
Subject: [PATCH 15/24] Rename the cors parameter to something more verbose.

---
 comfy/cli_args.py | 2 +-
 server.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 5133e0ae5..f2960ae31 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -4,7 +4,7 @@ parser = argparse.ArgumentParser()
 
 parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
 parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
-parser.add_argument("--cors", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
+parser.add_argument("--enable-cors-header", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
 parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
diff --git a/server.py b/server.py
index a9c0b4599..95cdeb051 100644
--- a/server.py
+++ b/server.py
@@ -57,8 +57,8 @@ class PromptServer():
         self.number = 0
 
         middlewares = [cache_control]
-        if args.cors:
-            middlewares.append(create_cors_middleware(args.cors))
+        if args.enable_cors_header:
+            middlewares.append(create_cors_middleware(args.enable_cors_header))
 
         self.app = web.Application(client_max_size=20971520, middlewares=middlewares)
         self.sockets = dict()

From 28fff5d1dbba8b4a546e31c69240133f35b2235f Mon Sep 17 00:00:00 2001
From: EllangoK <karun.ellango7@gmail.com>
Date: Thu, 6 Apr 2023 19:06:39 -0400
Subject: [PATCH 16/24] fixes lack of support for multi configs also adds some
 metavars to argarse

---
 comfy/cli_args.py | 8 ++++----
 main.py           | 5 ++++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index f2960ae31..b6898cea9 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -2,12 +2,12 @@ import argparse
 
 parser = argparse.ArgumentParser()
 
-parser.add_argument("--listen", nargs="?", const="0.0.0.0", default="127.0.0.1", type=str, help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
+parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nargs="?", const="0.0.0.0", help="Specify the IP address to listen on (default: 127.0.0.1). If --listen is provided without an argument, it defaults to 0.0.0.0. (listens on all)")
 parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
-parser.add_argument("--enable-cors-header", default=None, nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
-parser.add_argument("--extra-model-paths-config", type=str, default=None, help="Load an extra_model_paths.yaml file.")
+parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
+parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
-parser.add_argument("--cuda-device", type=int, default=None, help="Set the id of the cuda device this instance will use.")
+parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
 
 attn_group = parser.add_mutually_exclusive_group()
diff --git a/main.py b/main.py
index 51a48fc6d..9c0a3d8a1 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,9 @@
 import asyncio
+import itertools
 import os
 import shutil
 import threading
+
 from comfy.cli_args import args
 
 if os.name == "nt":
@@ -94,7 +96,8 @@ if __name__ == "__main__":
         load_extra_path_config(extra_model_paths_config_path)
 
     if args.extra_model_paths_config:
-        load_extra_path_config(args.extra_model_paths_config)
+        for config_path in itertools.chain(*args.extra_model_paths_config):
+            load_extra_path_config(config_path)
 
     if args.output_directory:
         output_dir = os.path.abspath(args.output_directory)

From 60b4c31ab3c2ec16575c26d9d08ecabc8643b381 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 6 Apr 2023 22:22:59 -0400
Subject: [PATCH 17/24] Add webp images to upload accept list.

---
 web/scripts/widgets.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js
index 5f5043cd0..d1a9c6c6e 100644
--- a/web/scripts/widgets.js
+++ b/web/scripts/widgets.js
@@ -306,7 +306,7 @@ export const ComfyWidgets = {
 		const fileInput = document.createElement("input");
 		Object.assign(fileInput, {
 			type: "file",
-			accept: "image/jpeg,image/png",
+			accept: "image/jpeg,image/png,image/webp",
 			style: "display: none",
 			onchange: async () => {
 				if (fileInput.files.length) {

From bceccca0e59862c3410b5d99b47fe1e01ba914af Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 6 Apr 2023 23:52:34 -0400
Subject: [PATCH 18/24] Small refactor.

---
 comfy/model_management.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 92c59efe7..504da2190 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -129,7 +129,6 @@ def load_model_gpu(model):
     global current_loaded_model
     global vram_state
     global model_accelerated
-    global xpu_available
 
     if model is current_loaded_model:
         return
@@ -148,17 +147,14 @@ def load_model_gpu(model):
         pass
     elif vram_state == VRAMState.NORMAL_VRAM or vram_state == VRAMState.HIGH_VRAM:
         model_accelerated = False
-        if xpu_available:
-            real_model.to("xpu")
-        else:
-            real_model.cuda()
+        real_model.to(get_torch_device())
     else:
         if vram_state == VRAMState.NO_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"})
         elif vram_state == VRAMState.LOW_VRAM:
             device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(total_vram_available_mb), "cpu": "16GiB"})
 
-        accelerate.dispatch_model(real_model, device_map=device_map, main_device="xpu" if xpu_available else "cuda")
+        accelerate.dispatch_model(real_model, device_map=device_map, main_device=get_torch_device())
         model_accelerated = True
     return current_loaded_model
 
@@ -184,12 +180,8 @@ def load_controlnet_gpu(models):
 
 def load_if_low_vram(model):
     global vram_state
-    global xpu_available
     if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
-        if xpu_available:
-            return model.to("xpu")
-        else:
-            return model.cuda()
+        return model.to(get_torch_device())
     return model
 
 def unload_if_low_vram(model):

From 64557d67810c81f72bd6a7544bd8930488868319 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Apr 2023 00:27:54 -0400
Subject: [PATCH 19/24] Add a --force-fp32 argument to force fp32 for
 debugging.

---
 comfy/cli_args.py         | 1 +
 comfy/model_management.py | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index b6898cea9..739891f71 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -9,6 +9,7 @@ parser.add_argument("--extra-model-paths-config", type=str, default=None, metava
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 parser.add_argument("--dont-upcast-attention", action="store_true", help="Disable upcasting of attention. Can boost speed but increase the chances of black images.")
+parser.add_argument("--force-fp32", action="store_true", help="Force fp32 (If this makes your GPU work better please report it).")
 
 attn_group = parser.add_mutually_exclusive_group()
 attn_group.add_argument("--use-split-cross-attention", action="store_true", help="Use the split cross attention optimization instead of the sub-quadratic one. Ignored when xformers is used.")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 504da2190..2407140fd 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -69,6 +69,11 @@ elif args.novram:
 elif args.highvram:
     vram_state = VRAMState.HIGH_VRAM
 
+FORCE_FP32 = False
+if args.force_fp32:
+    print("Forcing FP32, if this improves things please report it.")
+    FORCE_FP32 = True
+
 
 if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
     try:
@@ -273,6 +278,9 @@ def mps_mode():
 
 def should_use_fp16():
     global xpu_available
+    if FORCE_FP32:
+        return False
+
     if cpu_mode() or mps_mode() or xpu_available:
         return False #TODO ?
 

From 72a8973bd56b7cc179eb603ccd61385fdca5766d Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Thu, 6 Apr 2023 21:45:08 -0700
Subject: [PATCH 20/24] allow configurable path for diffusers models

---
 folder_paths.py | 1 +
 nodes.py        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/folder_paths.py b/folder_paths.py
index f13e4895f..ab3359347 100644
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -23,6 +23,7 @@ folder_names_and_paths["clip"] = ([os.path.join(models_dir, "clip")], supported_
 folder_names_and_paths["clip_vision"] = ([os.path.join(models_dir, "clip_vision")], supported_pt_extensions)
 folder_names_and_paths["style_models"] = ([os.path.join(models_dir, "style_models")], supported_pt_extensions)
 folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")], supported_pt_extensions)
+folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"])
 
 folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions)
 folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions)
diff --git a/nodes.py b/nodes.py
index 8271da04c..934b458f2 100644
--- a/nodes.py
+++ b/nodes.py
@@ -224,7 +224,7 @@ class DiffusersLoader:
     @classmethod
     def INPUT_TYPES(cls):
         paths = []
-        search_path = os.path.join(folder_paths.models_dir, 'diffusers')
+        search_path = folder_paths.get_folder_paths("diffusers")[0]
         if os.path.exists(search_path):
             paths = next(os.walk(search_path))[1]
         return {"required": {"model_path": (paths,), }}

From f51b7a92c72b5fe7a12d642a545e59f1f6150fb4 Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Thu, 6 Apr 2023 21:48:58 -0700
Subject: [PATCH 21/24] search all diffusers paths (oops)

---
 nodes.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/nodes.py b/nodes.py
index 934b458f2..a4366f834 100644
--- a/nodes.py
+++ b/nodes.py
@@ -224,9 +224,10 @@ class DiffusersLoader:
     @classmethod
     def INPUT_TYPES(cls):
         paths = []
-        search_path = folder_paths.get_folder_paths("diffusers")[0]
-        if os.path.exists(search_path):
-            paths = next(os.walk(search_path))[1]
+        search_paths = folder_paths.get_folder_paths("diffusers")
+        for search_path in search_paths:
+            if os.path.exists(search_path):
+                paths = next(os.walk(search_path))[1]
         return {"required": {"model_path": (paths,), }}
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"

From 7734d65f22a8f30f73cb72e81586b2d015229060 Mon Sep 17 00:00:00 2001
From: sALTaccount <alotofcatz@gmail.com>
Date: Thu, 6 Apr 2023 22:02:26 -0700
Subject: [PATCH 22/24] fix loading alt folders

---
 nodes.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/nodes.py b/nodes.py
index a4366f834..274ae2f1f 100644
--- a/nodes.py
+++ b/nodes.py
@@ -224,10 +224,9 @@ class DiffusersLoader:
     @classmethod
     def INPUT_TYPES(cls):
         paths = []
-        search_paths = folder_paths.get_folder_paths("diffusers")
-        for search_path in search_paths:
+        for search_path in folder_paths.get_folder_paths("diffusers"):
             if os.path.exists(search_path):
-                paths = next(os.walk(search_path))[1]
+                paths += next(os.walk(search_path))[1]
         return {"required": {"model_path": (paths,), }}
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"
@@ -235,7 +234,13 @@ class DiffusersLoader:
     CATEGORY = "loaders"
 
     def load_checkpoint(self, model_path, output_vae=True, output_clip=True):
-        model_path = os.path.join(folder_paths.models_dir, 'diffusers', model_path)
+        for search_path in folder_paths.get_folder_paths("diffusers"):
+            if os.path.exists(search_path):
+                paths = next(os.walk(search_path))[1]
+                if model_path in paths:
+                    model_path = os.path.join(search_path, model_path)
+                    break
+        search_paths = folder_paths.get_folder_paths("diffusers")
         return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
 
 

From 58ed0f2da438aaf253f9880578d694ad917819f8 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Apr 2023 01:28:15 -0400
Subject: [PATCH 23/24] Fix loading SD1.5 diffusers checkpoint.

---
 comfy/diffusers_convert.py | 4 +++-
 nodes.py                   | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py
index a31c1c11b..950137f2c 100644
--- a/comfy/diffusers_convert.py
+++ b/comfy/diffusers_convert.py
@@ -272,7 +272,8 @@ def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, emb
 
     # magic
     v2 = diffusers_unet_conf["sample_size"] == 96
-    v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction'
+    if 'prediction_type' in diffusers_scheduler_conf:
+        v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction'
 
     if v2:
         if v_pred:
@@ -290,6 +291,7 @@ def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, emb
     scale_factor = model_config_params['scale_factor']
     vae_config = model_config_params['first_stage_config']
     vae_config['scale_factor'] = scale_factor
+    model_config_params["unet_config"]["params"]["use_fp16"] = fp16
 
     unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors")
     vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors")
diff --git a/nodes.py b/nodes.py
index 274ae2f1f..025e4fcb4 100644
--- a/nodes.py
+++ b/nodes.py
@@ -231,7 +231,7 @@ class DiffusersLoader:
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"
 
-    CATEGORY = "loaders"
+    CATEGORY = "advanced/loaders"
 
     def load_checkpoint(self, model_path, output_vae=True, output_clip=True):
         for search_path in folder_paths.get_folder_paths("diffusers"):
@@ -240,7 +240,7 @@ class DiffusersLoader:
                 if model_path in paths:
                     model_path = os.path.join(search_path, model_path)
                     break
-        search_paths = folder_paths.get_folder_paths("diffusers")
+
         return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
 
 

From 44fea050649347ca4b4e7317a83d11c3b4b87f87 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Apr 2023 02:29:56 -0400
Subject: [PATCH 24/24] Cleanup.

---
 comfy/diffusers_convert.py | 4 ----
 nodes.py                   | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/comfy/diffusers_convert.py b/comfy/diffusers_convert.py
index 950137f2c..ceca80305 100644
--- a/comfy/diffusers_convert.py
+++ b/comfy/diffusers_convert.py
@@ -2,10 +2,6 @@ import json
 import os
 import yaml
 
-# because of local import nonsense
-import sys
-sys.path.append(os.path.dirname(os.path.realpath(__file__)))
-
 import folder_paths
 from comfy.ldm.util import instantiate_from_config
 from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE
diff --git a/nodes.py b/nodes.py
index 025e4fcb4..5c3b3a4ee 100644
--- a/nodes.py
+++ b/nodes.py
@@ -10,11 +10,11 @@ from PIL import Image
 from PIL.PngImagePlugin import PngInfo
 import numpy as np
 
-from comfy.diffusers_convert import load_diffusers
 
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))
 
 
+import comfy.diffusers_convert
 import comfy.samplers
 import comfy.sd
 import comfy.utils
@@ -241,7 +241,7 @@ class DiffusersLoader:
                     model_path = os.path.join(search_path, model_path)
                     break
 
-        return load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
+        return comfy.diffusers_convert.load_diffusers(model_path, fp16=model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
 
 
 class unCLIPCheckpointLoader: