From 285ea7b79053fc7e3dad3c5f4a38b75a1cca6699 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 7 Aug 2023 08:29:50 -0400
Subject: [PATCH 01/37] Add "display" to custom node example.

---
 custom_nodes/example_node.py.example | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/custom_nodes/example_node.py.example b/custom_nodes/example_node.py.example
index 175202aeb..e37808b03 100644
--- a/custom_nodes/example_node.py.example
+++ b/custom_nodes/example_node.py.example
@@ -51,9 +51,10 @@ class Example:
                     "default": 0, 
                     "min": 0, #Minimum value
                     "max": 4096, #Maximum value
-                    "step": 64 #Slider's step
+                    "step": 64, #Slider's step
+                    "display": "number" # Cosmetic only: display as "number" or "slider"
                 }),
-                "float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "display": "number"}),
                 "print_to_screen": (["enable", "disable"],),
                 "string_field": ("STRING", {
                     "multiline": False, #True if you want the field to look like the one on the ClipTextEncode node

From 5e2b4893da62dec1c9ee4d0167f6b62e3b11fbef Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 7 Aug 2023 19:29:36 -0400
Subject: [PATCH 02/37] Fix path issue.

---
 web/extensions/core/linkRenderMode.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/extensions/core/linkRenderMode.js b/web/extensions/core/linkRenderMode.js
index 8b8d4e01f..1e9091ec1 100644
--- a/web/extensions/core/linkRenderMode.js
+++ b/web/extensions/core/linkRenderMode.js
@@ -1,4 +1,4 @@
-import { app } from "/scripts/app.js";
+import { app } from "../../scripts/app.js";
 
 const id = "Comfy.LinkRenderMode";
 const ext = {

From a5599ed42cef7a8aa77fa9409ab2ce95492bf914 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 8 Aug 2023 10:45:35 -0400
Subject: [PATCH 03/37] Add missing direct dep that gets pulled in by another.

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 8ee7b83d1..14524485a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ pyyaml
 Pillow
 scipy
 tqdm
+psutil

From 5ac96897e9782805cd5e8fe85bd98ad03eae2b6f Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 9 Aug 2023 11:31:27 -0400
Subject: [PATCH 04/37] Images can now be uploaded by dragging from another
 window in chromium.

---
 web/scripts/widgets.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/scripts/widgets.js b/web/scripts/widgets.js
index d4a15ba84..adf5f26fa 100644
--- a/web/scripts/widgets.js
+++ b/web/scripts/widgets.js
@@ -433,7 +433,7 @@ export const ComfyWidgets = {
 		// Add handler to check if an image is being dragged over our node
 		node.onDragOver = function (e) {
 			if (e.dataTransfer && e.dataTransfer.items) {
-				const image = [...e.dataTransfer.items].find((f) => f.kind === "file" && f.type.startsWith("image/"));
+				const image = [...e.dataTransfer.items].find((f) => f.kind === "file");
 				return !!image;
 			}
 

From cf10c5592c2ef91d5f0218a6f0e7a536c02c5d96 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 9 Aug 2023 20:32:30 -0400
Subject: [PATCH 05/37] Disable calculating uncond when CFG is 1.0

---
 comfy/samplers.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 044d518a5..de4f36da2 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -189,12 +189,13 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                     continue
 
                 to_run += [(p, COND)]
-            for x in uncond:
-                p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
-                if p is None:
-                    continue
+            if uncond is not None:
+                for x in uncond:
+                    p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
+                    if p is None:
+                        continue
 
-                to_run += [(p, UNCOND)]
+                    to_run += [(p, UNCOND)]
 
             while len(to_run) > 0:
                 first = to_run[0]
@@ -282,6 +283,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
 
 
         max_total_area = model_management.maximum_batch_area()
+        if math.isclose(cond_scale, 1.0):
+            uncond = None
+
         cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
         if "sampler_cfg_function" in model_options:
             args = {"cond": cond, "uncond": uncond, "cond_scale": cond_scale, "timestep": timestep}

From f7e6a5ed077a6e32a32c6fe65fe86519cc2f0252 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 10 Aug 2023 12:29:56 -0400
Subject: [PATCH 06/37] Fix litegraph button being black on light theme.

---
 web/lib/litegraph.core.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web/lib/litegraph.core.js b/web/lib/litegraph.core.js
index 2682ff309..356c71ac2 100644
--- a/web/lib/litegraph.core.js
+++ b/web/lib/litegraph.core.js
@@ -9766,6 +9766,7 @@ LGraphNode.prototype.executeAction = function(action)
 
             switch (w.type) {
                 case "button":
+                    ctx.fillStyle = background_color;
                     if (w.clicked) {
                         ctx.fillStyle = "#AAA";
                         w.clicked = false;

From c20583286fca8a55324ee7d60cebd3b511b16c91 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 10 Aug 2023 19:44:46 -0400
Subject: [PATCH 07/37] Support diffuser text encoder loras.

---
 comfy/sd.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index 2996a938b..b19130ada 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -72,6 +72,7 @@ def load_lora(lora, to_load):
 
         regular_lora = "{}.lora_up.weight".format(x)
         diffusers_lora = "{}_lora.up.weight".format(x)
+        transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
         A_name = None
 
         if regular_lora in lora.keys():
@@ -82,6 +83,10 @@ def load_lora(lora, to_load):
             A_name = diffusers_lora
             B_name = "{}_lora.down.weight".format(x)
             mid_name = None
+        elif transformers_lora in lora.keys():
+            A_name = transformers_lora
+            B_name ="{}.lora_linear_layer.down.weight".format(x)
+            mid_name = None
 
         if A_name is not None:
             mid = None
@@ -181,20 +186,29 @@ def model_lora_keys_clip(model, key_map={}):
                 key_map[lora_key] = k
                 lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c])
                 key_map[lora_key] = k
+                lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+                key_map[lora_key] = k
 
             k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
             if k in sdk:
                 lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
                 key_map[lora_key] = k
                 clip_l_present = True
+                lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+                key_map[lora_key] = k
 
             k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
             if k in sdk:
                 if clip_l_present:
                     lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
+                    key_map[lora_key] = k
+                    lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+                    key_map[lora_key] = k
                 else:
                     lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner
-                key_map[lora_key] = k
+                    key_map[lora_key] = k
+                    lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
+                    key_map[lora_key] = k
 
     return key_map
 

From 00877b036344af84731791d4256ecd75cdb2e911 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 11 Aug 2023 02:33:26 -0400
Subject: [PATCH 08/37] Don't ignore extra paths that don't exist.

---
 folder_paths.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/folder_paths.py b/folder_paths.py
index eb7d39b88..d3b76d0dc 100644
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -111,6 +111,8 @@ def add_model_folder_path(folder_name, full_folder_path):
     global folder_names_and_paths
     if folder_name in folder_names_and_paths:
         folder_names_and_paths[folder_name][0].append(full_folder_path)
+    else:
+        folder_names_and_paths[folder_name] = ([full_folder_path], set())
 
 def get_folder_paths(folder_name):
     return folder_names_and_paths[folder_name][0][:]

From 2bc12d3d22efb5c63ae3a7fc342bb2dd16b31735 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 11 Aug 2023 05:00:25 -0400
Subject: [PATCH 09/37] Add --temp-directory argument to set temp directory.

---
 comfy/cli_args.py | 1 +
 folder_paths.py   | 4 ++++
 main.py           | 6 +++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 81bbc4796..ec7d34a55 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -38,6 +38,7 @@ parser.add_argument("--port", type=int, default=8188, help="Set the listen port.
 parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
+parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).")
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
 parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
diff --git a/folder_paths.py b/folder_paths.py
index d3b76d0dc..e321690dd 100644
--- a/folder_paths.py
+++ b/folder_paths.py
@@ -43,6 +43,10 @@ def set_output_directory(output_dir):
     global output_directory
     output_directory = output_dir
 
+def set_temp_directory(temp_dir):
+    global temp_directory
+    temp_directory = temp_dir
+
 def get_output_directory():
     global output_directory
     return output_directory
diff --git a/main.py b/main.py
index 07ebbd701..1571376bd 100644
--- a/main.py
+++ b/main.py
@@ -100,7 +100,7 @@ def hijack_progress(server):
 
 
 def cleanup_temp():
-    temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp")
+    temp_dir = folder_paths.get_temp_directory()
     if os.path.exists(temp_dir):
         shutil.rmtree(temp_dir, ignore_errors=True)
 
@@ -127,6 +127,10 @@ def load_extra_path_config(yaml_path):
 
 
 if __name__ == "__main__":
+    if args.temp_directory:
+        temp_dir = os.path.join(os.path.abspath(args.temp_directory), "temp")
+        print(f"Setting temp directory to: {temp_dir}")
+        folder_paths.set_temp_directory(temp_dir)
     cleanup_temp()
 
     loop = asyncio.new_event_loop()

From c8a23ce9e81ce976b0e8637593d33e0790d3d95a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 11 Aug 2023 13:04:21 -0400
Subject: [PATCH 10/37] Support for yet another lora type based on diffusers.

---
 comfy/sd.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index b19130ada..bff9ee141 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -223,13 +223,16 @@ def model_lora_keys_unet(model, key_map={}):
     diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config)
     for k in diffusers_keys:
         if k.endswith(".weight"):
+            unet_key = "diffusion_model.{}".format(diffusers_keys[k])
             key_lora = k[:-len(".weight")].replace(".", "_")
-            key_map["lora_unet_{}".format(key_lora)] = "diffusion_model.{}".format(diffusers_keys[k])
+            key_map["lora_unet_{}".format(key_lora)] = unet_key
 
-            diffusers_lora_key = "unet.{}".format(k[:-len(".weight")].replace(".to_", ".processor.to_"))
-            if diffusers_lora_key.endswith(".to_out.0"):
-                diffusers_lora_key = diffusers_lora_key[:-2]
-            key_map[diffusers_lora_key] = "diffusion_model.{}".format(diffusers_keys[k])
+            diffusers_lora_prefix = ["", "unet."]
+            for p in diffusers_lora_prefix:
+                diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_"))
+                if diffusers_lora_key.endswith(".to_out.0"):
+                    diffusers_lora_key = diffusers_lora_key[:-2]
+                key_map[diffusers_lora_key] = unet_key
     return key_map
 
 def set_attr(obj, attr, value):

From 8c730dc4a7225b2b103497ea1c2f67e48db145ab Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sat, 12 Aug 2023 01:01:49 -0400
Subject: [PATCH 11/37] Add an ImageCompositeMasked node.

---
 comfy_extras/nodes_mask.py | 79 +++++++++++++++++++++++++-------------
 1 file changed, 52 insertions(+), 27 deletions(-)

diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py
index 15377af14..b80c8b9a2 100644
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -2,6 +2,35 @@ import torch
 
 from nodes import MAX_RESOLUTION
 
+def composite(destination, source, x, y, mask = None, multiplier = 8):
+    x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
+    y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
+
+    left, top = (x // multiplier, y // multiplier)
+    right, bottom = (left + source.shape[3], top + source.shape[2],)
+
+
+    if mask is None:
+        mask = torch.ones_like(source)
+    else:
+        mask = mask.clone()
+        mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
+        mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
+
+    # calculate the bounds of the source that will be overlapping the destination
+    # this prevents the source trying to overwrite latent pixels that are out of bounds
+    # of the destination
+    visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
+
+    mask = mask[:, :, :visible_height, :visible_width]
+    inverse_mask = torch.ones_like(mask) - mask
+
+    source_portion = mask * source[:, :, :visible_height, :visible_width]
+    destination_portion = inverse_mask  * destination[:, :, top:bottom, left:right]
+
+    destination[:, :, top:bottom, left:right] = source_portion + destination_portion
+    return destination
+
 class LatentCompositeMasked:
     @classmethod
     def INPUT_TYPES(s):
@@ -25,36 +54,31 @@ class LatentCompositeMasked:
         output = destination.copy()
         destination = destination["samples"].clone()
         source = source["samples"]
+        output["samples"] = composite(destination, source, x, y, mask, 8)
+        return (output,)
 
-        x = max(-source.shape[3] * 8, min(x, destination.shape[3] * 8))
-        y = max(-source.shape[2] * 8, min(y, destination.shape[2] * 8))
+class ImageCompositeMasked:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "destination": ("IMAGE",),
+                "source": ("IMAGE",),
+                "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
+                "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
+            },
+            "optional": {
+                "mask": ("MASK",),
+            }
+        }
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "composite"
 
-        left, top = (x // 8, y // 8)
-        right, bottom = (left + source.shape[3], top + source.shape[2],)
-
-
-        if mask is None:
-            mask = torch.ones_like(source)
-        else:
-            mask = mask.clone()
-            mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
-            mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
-
-        # calculate the bounds of the source that will be overlapping the destination
-        # this prevents the source trying to overwrite latent pixels that are out of bounds
-        # of the destination
-        visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
-
-        mask = mask[:, :, :visible_height, :visible_width]
-        inverse_mask = torch.ones_like(mask) - mask
-
-        source_portion = mask * source[:, :, :visible_height, :visible_width]
-        destination_portion = inverse_mask  * destination[:, :, top:bottom, left:right]
-
-        destination[:, :, top:bottom, left:right] = source_portion + destination_portion
-
-        output["samples"] = destination
+    CATEGORY = "image"
 
+    def composite(self, destination, source, x, y, mask = None):
+        destination = destination.clone().movedim(-1, 1)
+        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1).movedim(1, -1)
         return (output,)
 
 class MaskToImage:
@@ -253,6 +277,7 @@ class FeatherMask:
 
 NODE_CLASS_MAPPINGS = {
     "LatentCompositeMasked": LatentCompositeMasked,
+    "ImageCompositeMasked": ImageCompositeMasked,
     "MaskToImage": MaskToImage,
     "ImageToMask": ImageToMask,
     "SolidMask": SolidMask,

From 585a062910ad3a92b523ad71dec90723e9b782fe Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sun, 13 Aug 2023 01:39:48 -0400
Subject: [PATCH 12/37] Print unet config when model isn't detected.

---
 comfy/model_detection.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 691d4c6c4..49ee9ea70 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -113,6 +113,7 @@ def model_config_from_unet_config(unet_config):
         if model_config.matches(unet_config):
             return model_config(unet_config)
 
+    print("no match", unet_config)
     return None
 
 def model_config_from_unet(state_dict, unet_key_prefix, use_fp16):

From 861fd58819534e72c548c26c9050dc193342a505 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sun, 13 Aug 2023 12:37:53 -0400
Subject: [PATCH 13/37] Add a warning if a card that doesn't support cuda
 malloc has it enabled.

---
 cuda_malloc.py | 12 ++++++------
 main.py        | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/cuda_malloc.py b/cuda_malloc.py
index d033529cc..9527f30ee 100644
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -36,13 +36,13 @@ def get_gpu_names():
     else:
         return set()
 
-def cuda_malloc_supported():
-    blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
-                 "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
-                 "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
-                 "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000",
-                 "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"}
+blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
+                "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
+                "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
+                "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000",
+                "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"}
 
+def cuda_malloc_supported():
     try:
         names = get_gpu_names()
     except:
diff --git a/main.py b/main.py
index 1571376bd..a4038db4b 100644
--- a/main.py
+++ b/main.py
@@ -72,6 +72,17 @@ from server import BinaryEventTypes
 from nodes import init_custom_nodes
 import comfy.model_management
 
+def cuda_malloc_warning():
+    device = comfy.model_management.get_torch_device()
+    device_name = comfy.model_management.get_torch_device_name(device)
+    cuda_malloc_warning = False
+    if "cudaMallocAsync" in device_name:
+        for b in cuda_malloc.blacklist:
+            if b in device_name:
+                cuda_malloc_warning = True
+        if cuda_malloc_warning:
+            print("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n")
+
 def prompt_worker(q, server):
     e = execution.PromptExecutor(server)
     while True:
@@ -147,6 +158,9 @@ if __name__ == "__main__":
             load_extra_path_config(config_path)
 
     init_custom_nodes()
+
+    cuda_malloc_warning()
+
     server.add_routes()
     hijack_progress(server)
 

From 192ca0676c5c118fc1bf736f13c73aa742a7a9cb Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sun, 13 Aug 2023 16:08:11 -0400
Subject: [PATCH 14/37] Add some more cards to the cuda malloc blacklist.

---
 cuda_malloc.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cuda_malloc.py b/cuda_malloc.py
index 9527f30ee..144cdacd3 100644
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@@ -40,7 +40,9 @@ blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeFor
                 "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
                 "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
                 "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000",
-                "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"}
+                "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M",
+                "GeForce GTX 1650", "GeForce GTX 1630"
+                }
 
 def cuda_malloc_supported():
     try:

From 3cfad03a682667beaa240e2eb1b3a718dc419261 Mon Sep 17 00:00:00 2001
From: FizzleDorf <1fizzledorf@gmail.com>
Date: Sun, 13 Aug 2023 22:29:04 -0400
Subject: [PATCH 15/37] dpmpp 3m + dpmpp 3m sde added

---
 comfy/k_diffusion/sampling.py | 74 +++++++++++++++++++++++++++++++++++
 comfy/samplers.py             |  2 +-
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index dd234435f..27ca7cc25 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -650,4 +650,78 @@ def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disab
     noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
     return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)
 
+@torch.no_grad()
+def sample_dpmpp_3m(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
+    """DPM-Solver++(3M) without SDE-specific parts."""
+
+    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
+    extra_args = {} if extra_args is None else extra_args
+    s_in = x.new_ones([x.shape[0]])
+
+    for i in trange(len(sigmas) - 1, disable=disable):
+        denoised = model(x, sigmas[i] * s_in, **extra_args)
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+        
+        # Update x using the DPM-Solver++(3M) update rule
+        t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+        h = s - t
+        h_eta = h * (eta + 1)
+
+        x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised
+
+        if eta:
+            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
+
+    return x
+
+@torch.no_grad()
+def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
+    """DPM-Solver++(3M) SDE."""
+
+    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
+    extra_args = {} if extra_args is None else extra_args
+    s_in = x.new_ones([x.shape[0]])
+
+    denoised_1, denoised_2 = None, None
+    h_1, h_2 = None, None
+
+    for i in trange(len(sigmas) - 1, disable=disable):
+        denoised = model(x, sigmas[i] * s_in, **extra_args)
+        if callback is not None:
+            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
+        if sigmas[i + 1] == 0:
+            # Denoising step
+            x = denoised
+        else:
+            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
+            h = s - t
+            h_eta = h * (eta + 1)
+
+            x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised
+
+            if h_2 is not None:
+                r0 = h_1 / h
+                r1 = h_2 / h
+                d1_0 = (denoised - denoised_1) / r0
+                d1_1 = (denoised_1 - denoised_2) / r1
+                d1 = d1_0 + (d1_0 - d1_1) * r0 / (r0 + r1)
+                d2 = (d1_0 - d1_1) / (r0 + r1)
+                phi_2 = h_eta.neg().expm1() / h_eta + 1
+                phi_3 = phi_2 / h_eta - 0.5
+                x = x + phi_2 * d1 - phi_3 * d2
+            elif h_1 is not None:
+                r = h_1 / h
+                d = (denoised - denoised_1) / r
+                phi_2 = h_eta.neg().expm1() / h_eta + 1
+                x = x + phi_2 * d
+
+            if eta:
+                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
+
+        denoised_1, denoised_2 = denoised, denoised_1
+        h_1, h_2 = h, h_1
+    return x
 
diff --git a/comfy/samplers.py b/comfy/samplers.py
index de4f36da2..dc7c3a272 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -528,7 +528,7 @@ class KSampler:
     SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"]
     SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                 "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
-                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
+                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m","dpmpp_3m_sde", "ddim", "uni_pc", "uni_pc_bh2"]
 
     def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
         self.model = model

From 58c7da3665726a75adc4d91217364d93e7f44945 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 00:28:50 -0400
Subject: [PATCH 16/37] Gpu variant of dpmpp_3m_sde. Note: use 3m with
 exponential or karras.

---
 comfy/k_diffusion/sampling.py | 39 +++++++++++++++++++++--------------
 comfy/samplers.py             |  2 +-
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index 27ca7cc25..020e65ada 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -631,25 +631,13 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
                 elif solver_type == 'midpoint':
                     x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
 
-            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
+            if eta:
+                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
 
         old_denoised = denoised
         h_last = h
     return x
 
-@torch.no_grad()
-def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
-    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
-    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
-    return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)
-
-
-@torch.no_grad()
-def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
-    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
-    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
-    return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)
-
 @torch.no_grad()
 def sample_dpmpp_3m(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
     """DPM-Solver++(3M) without SDE-specific parts."""
@@ -663,7 +651,7 @@ def sample_dpmpp_3m(model, x, sigmas, extra_args=None, callback=None, disable=No
         denoised = model(x, sigmas[i] * s_in, **extra_args)
         if callback is not None:
             callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
-        
+
         # Update x using the DPM-Solver++(3M) update rule
         t, s = -sigmas[i].log(), -sigmas[i + 1].log()
         h = s - t
@@ -680,8 +668,9 @@ def sample_dpmpp_3m(model, x, sigmas, extra_args=None, callback=None, disable=No
 def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
     """DPM-Solver++(3M) SDE."""
 
+    seed = extra_args.get("seed", None)
     sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
-    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
     extra_args = {} if extra_args is None else extra_args
     s_in = x.new_ones([x.shape[0]])
 
@@ -725,3 +714,21 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
         h_1, h_2 = h, h_1
     return x
 
+@torch.no_grad()
+def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
+    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
+    return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)
+
+@torch.no_grad()
+def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
+    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
+    return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)
+
+@torch.no_grad()
+def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
+    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
+    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
+    return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)
+
diff --git a/comfy/samplers.py b/comfy/samplers.py
index dc7c3a272..1bccc3070 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -528,7 +528,7 @@ class KSampler:
     SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"]
     SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                 "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
-                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m","dpmpp_3m_sde", "ddim", "uni_pc", "uni_pc_bh2"]
+                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
 
     def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
         self.model = model

From e244b2df83ab928a4d08c8a9a6dfc4eae494a907 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 00:13:35 -0400
Subject: [PATCH 17/37] Add sgm_uniform scheduler that acts like the default
 one in sgm.

---
 comfy/samplers.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 1bccc3070..2973f4cff 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -347,6 +347,17 @@ def ddim_scheduler(model, steps):
     sigs += [0.0]
     return torch.FloatTensor(sigs)
 
+def sgm_scheduler(model, steps):
+    sigs = []
+    timesteps = torch.linspace(model.inner_model.inner_model.num_timesteps - 1, 0, steps + 1)[:-1].type(torch.int)
+    for x in range(len(timesteps)):
+        ts = timesteps[x]
+        if ts > 999:
+            ts = 999
+        sigs.append(model.t_to_sigma(torch.tensor(ts)))
+    sigs += [0.0]
+    return torch.FloatTensor(sigs)
+
 def blank_inpaint_image_like(latent_image):
     blank_image = torch.ones_like(latent_image)
     # these are the values for "zero" in pixel space translated to latent space
@@ -525,7 +536,7 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type):
 
 
 class KSampler:
-    SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"]
+    SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
     SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                 "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
                 "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
@@ -570,6 +581,8 @@ class KSampler:
             sigmas = simple_scheduler(self.model_wrap, steps)
         elif self.scheduler == "ddim_uniform":
             sigmas = ddim_scheduler(self.model_wrap, steps)
+        elif self.scheduler == "sgm_uniform":
+            sigmas = sgm_scheduler(self.model_wrap, steps)
         else:
             print("error invalid scheduler", self.scheduler)
 

From 0cb6dac943a2550f6e5c5cf4f99a64b40c89ac80 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 00:48:45 -0400
Subject: [PATCH 18/37] Remove 3m from PR #1213 because of some small issues.

---
 comfy/k_diffusion/sampling.py | 26 --------------------------
 comfy/samplers.py             |  2 +-
 2 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py
index 020e65ada..beaa623f3 100644
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@@ -638,32 +638,6 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
         h_last = h
     return x
 
-@torch.no_grad()
-def sample_dpmpp_3m(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
-    """DPM-Solver++(3M) without SDE-specific parts."""
-
-    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
-    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
-    extra_args = {} if extra_args is None else extra_args
-    s_in = x.new_ones([x.shape[0]])
-
-    for i in trange(len(sigmas) - 1, disable=disable):
-        denoised = model(x, sigmas[i] * s_in, **extra_args)
-        if callback is not None:
-            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
-
-        # Update x using the DPM-Solver++(3M) update rule
-        t, s = -sigmas[i].log(), -sigmas[i + 1].log()
-        h = s - t
-        h_eta = h * (eta + 1)
-
-        x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised
-
-        if eta:
-            x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
-
-    return x
-
 @torch.no_grad()
 def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
     """DPM-Solver++(3M) SDE."""
diff --git a/comfy/samplers.py b/comfy/samplers.py
index 2973f4cff..28cd46667 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -539,7 +539,7 @@ class KSampler:
     SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
     SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                 "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
-                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
+                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
 
     def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
         self.model = model

From 9cc12c833d60665b72ed68e8e74feda0670945f3 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 16:54:05 -0400
Subject: [PATCH 19/37] CLIPVisionEncode can now encode multiple images.

---
 comfy/clip_vision.py |  4 ++--
 comfy/model_base.py  | 18 +++++++++---------
 nodes.py             |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py
index e2bc3209d..8d04faf71 100644
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -24,8 +24,8 @@ class ClipVisionModel():
         return self.model.load_state_dict(sd, strict=False)
 
     def encode_image(self, image):
-        img = torch.clip((255. * image[0]), 0, 255).round().int()
-        inputs = self.processor(images=[img], return_tensors="pt")
+        img = torch.clip((255. * image), 0, 255).round().int()
+        inputs = self.processor(images=img, return_tensors="pt")
         outputs = self.model(**inputs)
         return outputs
 
diff --git a/comfy/model_base.py b/comfy/model_base.py
index bf6983fc2..c3c807a68 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -120,15 +120,15 @@ class SD21UNCLIP(BaseModel):
             weights = []
             noise_aug = []
             for unclip_cond in unclip_conditioning:
-                adm_cond = unclip_cond["clip_vision_output"].image_embeds
-                weight = unclip_cond["strength"]
-                noise_augment = unclip_cond["noise_augmentation"]
-                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
-                c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
-                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
-                weights.append(weight)
-                noise_aug.append(noise_augment)
-                adm_inputs.append(adm_out)
+                for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
+                    weight = unclip_cond["strength"]
+                    noise_augment = unclip_cond["noise_augmentation"]
+                    noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
+                    c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
+                    adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
+                    weights.append(weight)
+                    noise_aug.append(noise_augment)
+                    adm_inputs.append(adm_out)
 
             if len(noise_aug) > 1:
                 adm_out = torch.stack(adm_inputs).sum(0)
diff --git a/nodes.py b/nodes.py
index 92baffe30..5f7ea95c0 100644
--- a/nodes.py
+++ b/nodes.py
@@ -771,7 +771,7 @@ class StyleModelApply:
     CATEGORY = "conditioning/style_model"
 
     def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
-        cond = style_model.get_cond(clip_vision_output)
+        cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
         c = []
         for t in conditioning:
             n = [torch.cat((t[0], cond), dim=1), t[1].copy()]

From 06681ee035821104f1593c0efe43f5b7e1de7814 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 16:54:30 -0400
Subject: [PATCH 20/37] Add codeowners file.

---
 CODEOWNERS | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 CODEOWNERS

diff --git a/CODEOWNERS b/CODEOWNERS
new file mode 100644
index 000000000..7c7c3e19e
--- /dev/null
+++ b/CODEOWNERS
@@ -0,0 +1 @@
+*       @comfyanonymous

From d4380f3aa339a984f08bac3b872d446c552fbc7a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 18:13:11 -0400
Subject: [PATCH 21/37] Add option to use different xformers version in the
 github workflow.

---
 .../workflows/windows_release_cu118_dependencies_2.yml   | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/windows_release_cu118_dependencies_2.yml b/.github/workflows/windows_release_cu118_dependencies_2.yml
index 42adee9e7..a7760b21e 100644
--- a/.github/workflows/windows_release_cu118_dependencies_2.yml
+++ b/.github/workflows/windows_release_cu118_dependencies_2.yml
@@ -2,6 +2,13 @@ name: "Windows Release cu118 dependencies 2"
 
 on:
   workflow_dispatch:
+    inputs:
+      xformers:
+        description: 'xformers version'
+        required: true
+        type: string
+        default: "xformers"
+
 #  push:
 #    branches:
 #      - master
@@ -17,7 +24,7 @@ jobs:
 
         - shell: bash
           run: |
-            python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
+            python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
             python -m pip install --no-cache-dir ./temp_wheel_dir/*
             echo installed basic
             ls -lah temp_wheel_dir

From e7d88855f45967f03e9c3d9d753d1adced062d6e Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 20:23:38 -0400
Subject: [PATCH 22/37] Add node to batch images together.

---
 nodes.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/nodes.py b/nodes.py
index 5f7ea95c0..5b144c2fc 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1448,6 +1448,22 @@ class ImageInvert:
         s = 1.0 - image
         return (s,)
 
+class ImageBatch:
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "image1": ("IMAGE",), "image2": ("IMAGE",)}}
+
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "batch"
+
+    CATEGORY = "image"
+
+    def batch(self, image1, image2):
+        if image1.shape[1:] != image2.shape[1:]:
+            image2 = comfy.utils.common_upscale(image2.movedim(-1,1), image1.shape[2], image1.shape[1], "bilinear", "center").movedim(1,-1)
+        s = torch.cat((image1, image2), dim=0)
+        return (s,)
 
 class ImagePadForOutpaint:
 
@@ -1533,6 +1549,7 @@ NODE_CLASS_MAPPINGS = {
     "ImageScale": ImageScale,
     "ImageScaleBy": ImageScaleBy,
     "ImageInvert": ImageInvert,
+    "ImageBatch": ImageBatch,
     "ImagePadForOutpaint": ImagePadForOutpaint,
     "ConditioningAverage ": ConditioningAverage ,
     "ConditioningCombine": ConditioningCombine,
@@ -1627,6 +1644,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
     "ImageUpscaleWithModel": "Upscale Image (using Model)",
     "ImageInvert": "Invert Image",
     "ImagePadForOutpaint": "Pad Image for Outpainting",
+    "ImageBatch": "Batch Images",
     # _for_testing
     "VAEDecodeTiled": "VAE Decode (Tiled)",
     "VAEEncodeTiled": "VAE Encode (Tiled)",

From 94fceb8700316f3ebfa68dafa3c32ee19a96d987 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 21:08:45 -0400
Subject: [PATCH 23/37] Make Blur node use the image device for processing.

---
 comfy_extras/nodes_post_processing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py
index 3be141dfe..a138b292e 100644
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@@ -59,8 +59,8 @@ class Blend:
     def g(self, x):
         return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x))
 
-def gaussian_kernel(kernel_size: int, sigma: float):
-    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size), torch.linspace(-1, 1, kernel_size), indexing="ij")
+def gaussian_kernel(kernel_size: int, sigma: float, device=None):
+    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
     d = torch.sqrt(x * x + y * y)
     g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
     return g / g.sum()
@@ -101,7 +101,7 @@ class Blur:
         batch_size, height, width, channels = image.shape
 
         kernel_size = blur_radius * 2 + 1
-        kernel = gaussian_kernel(kernel_size, sigma).repeat(channels, 1, 1).unsqueeze(1)
+        kernel = gaussian_kernel(kernel_size, sigma, device=image.device).repeat(channels, 1, 1).unsqueeze(1)
 
         image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C)
         padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect')

From a2ce9655ca56b092a1457b3b77d236194aabb9d6 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 14 Aug 2023 23:41:52 -0400
Subject: [PATCH 24/37] Refactor unclip code.

---
 comfy/model_base.py | 51 +++++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/comfy/model_base.py b/comfy/model_base.py
index c3c807a68..ad661ec7d 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -105,6 +105,29 @@ class BaseModel(torch.nn.Module):
 
         return {**unet_state_dict, **vae_state_dict, **clip_state_dict}
 
+def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0):
+    adm_inputs = []
+    weights = []
+    noise_aug = []
+    for unclip_cond in unclip_conditioning:
+        for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
+            weight = unclip_cond["strength"]
+            noise_augment = unclip_cond["noise_augmentation"]
+            noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
+            c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
+            adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
+            weights.append(weight)
+            noise_aug.append(noise_augment)
+            adm_inputs.append(adm_out)
+
+    if len(noise_aug) > 1:
+        adm_out = torch.stack(adm_inputs).sum(0)
+        noise_augment = noise_augment_merge
+        noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
+        c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
+        adm_out = torch.cat((c_adm, noise_level_emb), 1)
+
+    return adm_out
 
 class SD21UNCLIP(BaseModel):
     def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):
@@ -114,33 +137,11 @@ class SD21UNCLIP(BaseModel):
     def encode_adm(self, **kwargs):
         unclip_conditioning = kwargs.get("unclip_conditioning", None)
         device = kwargs["device"]
-
-        if unclip_conditioning is not None:
-            adm_inputs = []
-            weights = []
-            noise_aug = []
-            for unclip_cond in unclip_conditioning:
-                for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
-                    weight = unclip_cond["strength"]
-                    noise_augment = unclip_cond["noise_augmentation"]
-                    noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
-                    c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
-                    adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
-                    weights.append(weight)
-                    noise_aug.append(noise_augment)
-                    adm_inputs.append(adm_out)
-
-            if len(noise_aug) > 1:
-                adm_out = torch.stack(adm_inputs).sum(0)
-                #TODO: add a way to control this
-                noise_augment = 0.05
-                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
-                c_adm, noise_level_emb = self.noise_augmentor(adm_out[:, :self.noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
-                adm_out = torch.cat((c_adm, noise_level_emb), 1)
+        if unclip_conditioning is None:
+            return torch.zeros((1, self.adm_channels))
         else:
-            adm_out = torch.zeros((1, self.adm_channels))
+            return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05))
 
-        return adm_out
 
 class SDInpaint(BaseModel):
     def __init__(self, model_config, model_type=ModelType.EPS, device=None):

From 7567c4ac8fde9990f3be575049d31b1a5c389c3e Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 15 Aug 2023 13:28:34 -0400
Subject: [PATCH 25/37] Add bypass to readme and add a Bypass menu option to
 the nodes.

---
 README.md          | 1 +
 web/scripts/app.js | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/README.md b/README.md
index b055325ed..baa8cf8b6 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,7 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git
 | Ctrl + O                  | Load workflow                                                                                                      |
 | Ctrl + A                  | Select all nodes                                                                                                   |
 | Ctrl + M                  | Mute/unmute selected nodes                                                                                         |
+| Ctrl + B                  | Bypass selected nodes (acts like the node was removed from the graph and the wires reconnected through)            |
 | Delete/Backspace          | Delete selected nodes                                                                                              |
 | Ctrl + Delete/Backspace   | Delete the current graph                                                                                           |
 | Space                     | Move the canvas around when held and moving the cursor                                                             |
diff --git a/web/scripts/app.js b/web/scripts/app.js
index 40156abc3..1c95c765c 100644
--- a/web/scripts/app.js
+++ b/web/scripts/app.js
@@ -284,6 +284,11 @@ export class ComfyApp {
 				}
 			}
 
+			options.push({
+					content: "Bypass",
+					callback: (obj) => { if (this.mode === 4) this.mode = 0; else this.mode = 4; this.graph.change(); }
+				});
+
 			// prevent conflict of clipspace content
 			if(!ComfyApp.clipspace_return_node) {
 				options.push({

From 6dc02c7baca04c0fadd4e30a055ced67fe0d58d1 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 15 Aug 2023 17:51:52 -0400
Subject: [PATCH 26/37] Add a "resize_source" option to Image and Latent
 CompositeMasked.

---
 comfy_extras/nodes_mask.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py
index b80c8b9a2..87ba36fc6 100644
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -2,14 +2,16 @@ import torch
 
 from nodes import MAX_RESOLUTION
 
-def composite(destination, source, x, y, mask = None, multiplier = 8):
+def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
+    if resize_source:
+        source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")
+
     x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
     y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
 
     left, top = (x // multiplier, y // multiplier)
     right, bottom = (left + source.shape[3], top + source.shape[2],)
 
-
     if mask is None:
         mask = torch.ones_like(source)
     else:
@@ -40,6 +42,7 @@ class LatentCompositeMasked:
                 "source": ("LATENT",),
                 "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
                 "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
+                "resize_source": ("BOOLEAN", {"default": False}),
             },
             "optional": {
                 "mask": ("MASK",),
@@ -50,11 +53,11 @@ class LatentCompositeMasked:
 
     CATEGORY = "latent"
 
-    def composite(self, destination, source, x, y, mask = None):
+    def composite(self, destination, source, x, y, resize_source, mask = None):
         output = destination.copy()
         destination = destination["samples"].clone()
         source = source["samples"]
-        output["samples"] = composite(destination, source, x, y, mask, 8)
+        output["samples"] = composite(destination, source, x, y, mask, 8, resize_source)
         return (output,)
 
 class ImageCompositeMasked:
@@ -66,6 +69,7 @@ class ImageCompositeMasked:
                 "source": ("IMAGE",),
                 "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
                 "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
+                "resize_source": ("BOOLEAN", {"default": False}),
             },
             "optional": {
                 "mask": ("MASK",),
@@ -76,9 +80,9 @@ class ImageCompositeMasked:
 
     CATEGORY = "image"
 
-    def composite(self, destination, source, x, y, mask = None):
+    def composite(self, destination, source, x, y, resize_source, mask = None):
         destination = destination.clone().movedim(-1, 1)
-        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1).movedim(1, -1)
+        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1)
         return (output,)
 
 class MaskToImage:

From 27b87c25a11927850dba00cb89c7974ab72e6f6d Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 15 Aug 2023 17:53:10 -0400
Subject: [PATCH 27/37] Add an EmptyImage node.

TODO: implement color picker in the frontend.
---
 nodes.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/nodes.py b/nodes.py
index 5b144c2fc..e46aed825 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1465,6 +1465,28 @@ class ImageBatch:
         s = torch.cat((image1, image2), dim=0)
         return (s,)
 
+class EmptyImage:
+    def __init__(self, device="cpu"):
+        self.device = device
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "width": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
+                              "height": ("INT", {"default": 512, "min": 1, "max": MAX_RESOLUTION, "step": 1}),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 64}),
+                              "color": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFF, "step": 1, "display": "color"}),
+                              }}
+    RETURN_TYPES = ("IMAGE",)
+    FUNCTION = "generate"
+
+    CATEGORY = "image"
+
+    def generate(self, width, height, batch_size=1, color=0):
+        r = torch.full([batch_size, height, width, 1], ((color >> 16) & 0xFF) / 0xFF)
+        g = torch.full([batch_size, height, width, 1], ((color >> 8) & 0xFF) / 0xFF)
+        b = torch.full([batch_size, height, width, 1], ((color) & 0xFF) / 0xFF)
+        return (torch.cat((r, g, b), dim=-1), )
+
 class ImagePadForOutpaint:
 
     @classmethod
@@ -1551,6 +1573,7 @@ NODE_CLASS_MAPPINGS = {
     "ImageInvert": ImageInvert,
     "ImageBatch": ImageBatch,
     "ImagePadForOutpaint": ImagePadForOutpaint,
+    "EmptyImage": EmptyImage,
     "ConditioningAverage ": ConditioningAverage ,
     "ConditioningCombine": ConditioningCombine,
     "ConditioningConcat": ConditioningConcat,

From 18e86a4010099a39eee8c901e22696575b330f86 Mon Sep 17 00:00:00 2001
From: Corey <17171838+coreyryanhanson@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:57:14 -0400
Subject: [PATCH 28/37] add a node to allow growing of masks through dilation

---
 comfy_extras/nodes_mask.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py
index 87ba36fc6..5adb468ac 100644
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@@ -1,3 +1,5 @@
+import numpy as np
+from scipy.ndimage import grey_dilation
 import torch
 
 from nodes import MAX_RESOLUTION
@@ -276,6 +278,35 @@ class FeatherMask:
             output[-y, :] *= feather_rate
 
         return (output,)
+    
+class GrowMask:
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "mask": ("MASK",),
+                "expand": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
+                "tapered_corners": ("BOOLEAN", {"default": True}),
+            },
+        }
+    
+    CATEGORY = "mask"
+
+    RETURN_TYPES = ("MASK",)
+
+    FUNCTION = "expand_mask"
+
+    def expand_mask(self, mask, expand, tapered_corners):
+        c = 0 if tapered_corners else 1
+        kernel = np.array([[c, 1, c],
+                           [1, 1, 1],
+                           [c, 1, c]])
+        output = mask.numpy().copy()
+        while expand > 0:
+            output = grey_dilation(output, footprint=kernel)
+            expand -= 1
+        output = torch.from_numpy(output)
+        return (output,)
 
 
 
@@ -289,6 +320,7 @@ NODE_CLASS_MAPPINGS = {
     "CropMask": CropMask,
     "MaskComposite": MaskComposite,
     "FeatherMask": FeatherMask,
+    "GrowMask": GrowMask,
 }
 
 NODE_DISPLAY_NAME_MAPPINGS = {

From ae270f79bcb0da0efe09094eddc488a17a900913 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 16 Aug 2023 11:05:11 -0400
Subject: [PATCH 29/37] Fix potential issue with batch size and clip vision.

---
 comfy/clip_vision.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py
index 8d04faf71..c408a732d 100644
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -25,6 +25,8 @@ class ClipVisionModel():
 
     def encode_image(self, image):
         img = torch.clip((255. * image), 0, 255).round().int()
+        if len(img.shape) == 3:
+            img = [img]
         inputs = self.processor(images=img, return_tensors="pt")
         outputs = self.model(**inputs)
         return outputs

From 58f0c616ed168b728c93d095f2e11041c6de0582 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 16 Aug 2023 11:36:22 -0400
Subject: [PATCH 30/37] Fix clip vision issue with old transformers versions.

---
 comfy/clip_vision.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py
index c408a732d..2c8603bbe 100644
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -25,8 +25,7 @@ class ClipVisionModel():
 
     def encode_image(self, image):
         img = torch.clip((255. * image), 0, 255).round().int()
-        if len(img.shape) == 3:
-            img = [img]
+        img = list(map(lambda a: a, img))
         inputs = self.processor(images=img, return_tensors="pt")
         outputs = self.model(**inputs)
         return outputs

From 53f326a3d8cfcab008d00a7603de3c90fe7f6288 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 16 Aug 2023 12:22:46 -0400
Subject: [PATCH 31/37] Support diffusers mini controlnets.

---
 comfy/model_detection.py | 40 +++++++++++++++++++++++++++++++---------
 comfy/sd.py              |  5 ++++-
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index 49ee9ea70..d18e019f3 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -121,9 +121,20 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_fp16):
     return model_config_from_unet_config(unet_config)
 
 
-def model_config_from_diffusers_unet(state_dict, use_fp16):
+def unet_config_from_diffusers_unet(state_dict, use_fp16):
     match = {}
-    match["context_dim"] = state_dict["down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight"].shape[1]
+    attention_resolutions = []
+
+    attn_res = 1
+    for i in range(5):
+        k = "down_blocks.{}.attentions.1.transformer_blocks.0.attn2.to_k.weight".format(i)
+        if k in state_dict:
+            match["context_dim"] = state_dict[k].shape[1]
+            attention_resolutions.append(attn_res)
+        attn_res *= 2
+
+    match["attention_resolutions"] = attention_resolutions
+
     match["model_channels"] = state_dict["conv_in.weight"].shape[0]
     match["in_channels"] = state_dict["conv_in.weight"].shape[1]
     match["adm_in_channels"] = None
@@ -135,22 +146,22 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
     SDXL = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
             'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
             'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 2, 10], 'channel_mult': [1, 2, 4],
-            'transformer_depth_middle': 10, 'use_linear_in_transformer': True, 'context_dim': 2048}
+            'transformer_depth_middle': 10, 'use_linear_in_transformer': True, 'context_dim': 2048, "num_head_channels": 64}
 
     SDXL_refiner = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
                     'num_classes': 'sequential', 'adm_in_channels': 2560, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 384,
                     'num_res_blocks': 2, 'attention_resolutions': [2, 4], 'transformer_depth': [0, 4, 4, 0], 'channel_mult': [1, 2, 4, 4],
-                    'transformer_depth_middle': 4, 'use_linear_in_transformer': True, 'context_dim': 1280}
+                    'transformer_depth_middle': 4, 'use_linear_in_transformer': True, 'context_dim': 1280, "num_head_channels": 64}
 
     SD21 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
             'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2,
             'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
-            'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024}
+            'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, "num_head_channels": 64}
 
     SD21_uncliph = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
                     'num_classes': 'sequential', 'adm_in_channels': 2048, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
                     'num_res_blocks': 2, 'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
-                    'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024}
+                    'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, "num_head_channels": 64}
 
     SD21_unclipl = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
                     'num_classes': 'sequential', 'adm_in_channels': 1536, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
@@ -160,9 +171,14 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
     SD15 = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
             'adm_in_channels': None, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': 2,
             'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
-            'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768}
+            'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768, "num_heads": 8}
 
-    supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl]
+    SDXL_mini_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
+            'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
+            'num_res_blocks': 2, 'attention_resolutions': [4], 'transformer_depth': [0, 0, 1], 'channel_mult': [1, 2, 4],
+            'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 2048, "num_head_channels": 64}
+
+    supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mini_cnet]
 
     for unet_config in supported_models:
         matches = True
@@ -171,5 +187,11 @@ def model_config_from_diffusers_unet(state_dict, use_fp16):
                 matches = False
                 break
         if matches:
-            return model_config_from_unet_config(unet_config)
+            return unet_config
+    return None
+
+def model_config_from_diffusers_unet(state_dict, use_fp16):
+    unet_config = unet_config_from_diffusers_unet(state_dict, use_fp16)
+    if unet_config is not None:
+        return model_config_from_unet_config(unet_config)
     return None
diff --git a/comfy/sd.py b/comfy/sd.py
index bff9ee141..06b640968 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -835,7 +835,7 @@ def load_controlnet(ckpt_path, model=None):
     controlnet_config = None
     if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
         use_fp16 = model_management.should_use_fp16()
-        controlnet_config = model_detection.model_config_from_diffusers_unet(controlnet_data, use_fp16).unet_config
+        controlnet_config = model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16)
         diffusers_keys = utils.unet_to_diffusers(controlnet_config)
         diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
         diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
@@ -874,6 +874,9 @@ def load_controlnet(ckpt_path, model=None):
             if k in controlnet_data:
                 new_sd[diffusers_keys[k]] = controlnet_data.pop(k)
 
+        leftover_keys = controlnet_data.keys()
+        if len(leftover_keys) > 0:
+            print("leftover keys:", leftover_keys)
         controlnet_data = new_sd
 
     pth_key = 'control_model.zero_convs.0.0.weight'

From 2c97c30256a00e71e93f11130012a5da0b726c17 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 16 Aug 2023 12:45:13 -0400
Subject: [PATCH 32/37] Support small diffusers controlnet so both types are
 now supported.

---
 comfy/model_detection.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/comfy/model_detection.py b/comfy/model_detection.py
index d18e019f3..0edc4f180 100644
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -173,12 +173,18 @@ def unet_config_from_diffusers_unet(state_dict, use_fp16):
             'attention_resolutions': [1, 2, 4], 'transformer_depth': [1, 1, 1, 0], 'channel_mult': [1, 2, 4, 4],
             'transformer_depth_middle': 1, 'use_linear_in_transformer': False, 'context_dim': 768, "num_heads": 8}
 
-    SDXL_mini_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
+    SDXL_mid_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
             'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
             'num_res_blocks': 2, 'attention_resolutions': [4], 'transformer_depth': [0, 0, 1], 'channel_mult': [1, 2, 4],
             'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 2048, "num_head_channels": 64}
 
-    supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mini_cnet]
+    SDXL_small_cnet = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False,
+            'num_classes': 'sequential', 'adm_in_channels': 2816, 'use_fp16': use_fp16, 'in_channels': 4, 'model_channels': 320,
+            'num_res_blocks': 2, 'attention_resolutions': [], 'transformer_depth': [0, 0, 0], 'channel_mult': [1, 2, 4],
+            'transformer_depth_middle': 0, 'use_linear_in_transformer': True, "num_head_channels": 64, 'context_dim': 1}
+
+
+    supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mid_cnet, SDXL_small_cnet]
 
     for unet_config in supported_models:
         matches = True

From 89a0767abf019817a73ad9c7a693a2efcff75b12 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 17 Aug 2023 01:06:34 -0400
Subject: [PATCH 33/37] Smarter memory management.

Try to keep models on the vram when possible.

Better lowvram mode for controlnets.
---
 comfy/gligen.py           |  35 +----
 comfy/model_management.py | 280 ++++++++++++++++++++++----------------
 comfy/sample.py           |  19 ++-
 comfy/samplers.py         |   4 +-
 comfy/sd.py               |  59 +++++---
 execution.py              |   1 +
 6 files changed, 230 insertions(+), 168 deletions(-)

diff --git a/comfy/gligen.py b/comfy/gligen.py
index 90558785b..8d182839e 100644
--- a/comfy/gligen.py
+++ b/comfy/gligen.py
@@ -244,30 +244,15 @@ class Gligen(nn.Module):
         self.position_net = position_net
         self.key_dim = key_dim
         self.max_objs = 30
-        self.lowvram = False
+        self.current_device = torch.device("cpu")
 
     def _set_position(self, boxes, masks, positive_embeddings):
-        if self.lowvram == True:
-            self.position_net.to(boxes.device)
-
         objs = self.position_net(boxes, masks, positive_embeddings)
-
-        if self.lowvram == True:
-            self.position_net.cpu()
-            def func_lowvram(x, extra_options):
-                key = extra_options["transformer_index"]
-                module = self.module_list[key]
-                module.to(x.device)
-                r = module(x, objs)
-                module.cpu()
-                return r
-            return func_lowvram
-        else:
-            def func(x, extra_options):
-                key = extra_options["transformer_index"]
-                module = self.module_list[key]
-                return module(x, objs)
-            return func
+        def func(x, extra_options):
+            key = extra_options["transformer_index"]
+            module = self.module_list[key]
+            return module(x, objs)
+        return func
 
     def set_position(self, latent_image_shape, position_params, device):
         batch, c, h, w = latent_image_shape
@@ -312,14 +297,6 @@ class Gligen(nn.Module):
             masks.to(device),
             conds.to(device))
 
-    def set_lowvram(self, value=True):
-        self.lowvram = value
-
-    def cleanup(self):
-        self.lowvram = False
-
-    def get_models(self):
-        return [self]
 
 def load_gligen(sd):
     sd_k = sd.keys()
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 4dd15b41c..3736b57a9 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -2,6 +2,7 @@ import psutil
 from enum import Enum
 from comfy.cli_args import args
 import torch
+import sys
 
 class VRAMState(Enum):
     DISABLED = 0    #No vram present: no need to move models to vram
@@ -221,132 +222,161 @@ except:
     print("Could not pick default device.")
 
 
-current_loaded_model = None
-current_gpu_controlnets = []
+current_loaded_models = []
 
-model_accelerated = False
+class LoadedModel:
+    def __init__(self, model):
+        self.model = model
+        self.model_accelerated = False
+        self.device = model.load_device
 
+    def model_memory(self):
+        return self.model.model_size()
 
-def unload_model():
-    global current_loaded_model
-    global model_accelerated
-    global current_gpu_controlnets
-    global vram_state
+    def model_memory_required(self, device):
+        if device == self.model.current_device:
+            return 0
+        else:
+            return self.model_memory()
 
-    if current_loaded_model is not None:
-        if model_accelerated:
-            accelerate.hooks.remove_hook_from_submodules(current_loaded_model.model)
-            model_accelerated = False
+    def model_load(self, lowvram_model_memory=0):
+        patch_model_to = None
+        if lowvram_model_memory == 0:
+            patch_model_to = self.device
 
-        current_loaded_model.unpatch_model()
-        current_loaded_model.model.to(current_loaded_model.offload_device)
-        current_loaded_model.model_patches_to(current_loaded_model.offload_device)
-        current_loaded_model = None
-        if vram_state != VRAMState.HIGH_VRAM:
-            soft_empty_cache()
+        self.model.model_patches_to(self.device)
+        self.model.model_patches_to(self.model.model_dtype())
 
-    if vram_state != VRAMState.HIGH_VRAM:
-        if len(current_gpu_controlnets) > 0:
-            for n in current_gpu_controlnets:
-                n.cpu()
-            current_gpu_controlnets = []
+        try:
+            self.real_model = self.model.patch_model(device_to=patch_model_to) #TODO: do something with loras and offloading to CPU
+        except Exception as e:
+            self.model.unpatch_model(self.model.offload_device)
+            self.model_unload()
+            raise e
+
+        if lowvram_model_memory > 0:
+            print("loading in lowvram mode", lowvram_model_memory/(1024 * 1024))
+            device_map = accelerate.infer_auto_device_map(self.real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"})
+            accelerate.dispatch_model(self.real_model, device_map=device_map, main_device=self.device)
+            self.model_accelerated = True
+
+        return self.real_model
+
+    def model_unload(self):
+        if self.model_accelerated:
+            accelerate.hooks.remove_hook_from_submodules(self.real_model)
+            self.model_accelerated = False
+
+        self.model.unpatch_model(self.model.offload_device)
+        self.model.model_patches_to(self.model.offload_device)
+
+    def __eq__(self, other):
+        return self.model is other.model
 
 def minimum_inference_memory():
-    return (768 * 1024 * 1024)
+    return (1024 * 1024 * 1024)
+
+def unload_model_clones(model):
+    to_unload = []
+    for i in range(len(current_loaded_models)):
+        if model.is_clone(current_loaded_models[i].model):
+            to_unload = [i] + to_unload
+
+    for i in to_unload:
+        print("unload clone", i)
+        current_loaded_models.pop(i).model_unload()
+
+def free_memory(memory_required, device, keep_loaded=[]):
+    unloaded_model = False
+    for i in range(len(current_loaded_models) -1, -1, -1):
+        current_free_mem = get_free_memory(device)
+        if current_free_mem > memory_required:
+            break
+        shift_model = current_loaded_models[i]
+        if shift_model.device == device:
+            if shift_model not in keep_loaded:
+                current_loaded_models.pop(i).model_unload()
+                unloaded_model = True
+
+    if unloaded_model:
+        soft_empty_cache()
+
+
+def load_models_gpu(models, memory_required=0):
+    global vram_state
+
+    inference_memory = minimum_inference_memory()
+    extra_mem = max(inference_memory, memory_required)
+
+    models_to_load = []
+    models_already_loaded = []
+    for x in models:
+        loaded_model = LoadedModel(x)
+
+        if loaded_model in current_loaded_models:
+            index = current_loaded_models.index(loaded_model)
+            current_loaded_models.insert(0, current_loaded_models.pop(index))
+            models_already_loaded.append(loaded_model)
+        else:
+            models_to_load.append(loaded_model)
+
+    if len(models_to_load) == 0:
+        devs = set(map(lambda a: a.device, models_already_loaded))
+        for d in devs:
+            if d != torch.device("cpu"):
+                free_memory(extra_mem, d, models_already_loaded)
+        return
+
+    print("loading new")
+
+    total_memory_required = {}
+    for loaded_model in models_to_load:
+        unload_model_clones(loaded_model.model)
+        total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
+
+    for device in total_memory_required:
+        if device != torch.device("cpu"):
+            free_memory(total_memory_required[device] * 1.3 + extra_mem, device, models_already_loaded)
+
+    for loaded_model in models_to_load:
+        model = loaded_model.model
+        torch_dev = model.load_device
+        if is_device_cpu(torch_dev):
+            vram_set_state = VRAMState.DISABLED
+        else:
+            vram_set_state = vram_state
+        lowvram_model_memory = 0
+        if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
+            model_size = loaded_model.model_memory_required(torch_dev)
+            current_free_mem = get_free_memory(torch_dev)
+            lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
+            if model_size > (current_free_mem - inference_memory): #only switch to lowvram if really necessary
+                vram_set_state = VRAMState.LOW_VRAM
+            else:
+                lowvram_model_memory = 0
+
+        if vram_set_state == VRAMState.NO_VRAM:
+            lowvram_model_memory = 256 * 1024 * 1024
+
+        cur_loaded_model = loaded_model.model_load(lowvram_model_memory)
+        current_loaded_models.insert(0, loaded_model)
+    return
+
 
 def load_model_gpu(model):
-    global current_loaded_model
-    global vram_state
-    global model_accelerated
+    return load_models_gpu([model])
 
-    if model is current_loaded_model:
-        return
-    unload_model()
+def cleanup_models():
+    to_delete = []
+    for i in range(len(current_loaded_models)):
+        print(sys.getrefcount(current_loaded_models[i].model))
+        if sys.getrefcount(current_loaded_models[i].model) <= 2:
+            to_delete = [i] + to_delete
 
-    torch_dev = model.load_device
-    model.model_patches_to(torch_dev)
-    model.model_patches_to(model.model_dtype())
-    current_loaded_model = model
-
-    if is_device_cpu(torch_dev):
-        vram_set_state = VRAMState.DISABLED
-    else:
-        vram_set_state = vram_state
-
-    if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
-        model_size = model.model_size()
-        current_free_mem = get_free_memory(torch_dev)
-        lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
-        if model_size > (current_free_mem - minimum_inference_memory()): #only switch to lowvram if really necessary
-            vram_set_state = VRAMState.LOW_VRAM
-
-    real_model = model.model
-    patch_model_to = None
-    if vram_set_state == VRAMState.DISABLED:
-        pass
-    elif vram_set_state == VRAMState.NORMAL_VRAM or vram_set_state == VRAMState.HIGH_VRAM or vram_set_state == VRAMState.SHARED:
-        model_accelerated = False
-        patch_model_to = torch_dev
-
-    try:
-        real_model = model.patch_model(device_to=patch_model_to)
-    except Exception as e:
-        model.unpatch_model()
-        unload_model()
-        raise e
-
-    if patch_model_to is not None:
-        real_model.to(torch_dev)
-
-    if vram_set_state == VRAMState.NO_VRAM:
-        device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "256MiB", "cpu": "16GiB"})
-        accelerate.dispatch_model(real_model, device_map=device_map, main_device=torch_dev)
-        model_accelerated = True
-    elif vram_set_state == VRAMState.LOW_VRAM:
-        device_map = accelerate.infer_auto_device_map(real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"})
-        accelerate.dispatch_model(real_model, device_map=device_map, main_device=torch_dev)
-        model_accelerated = True
-
-    return current_loaded_model
-
-def load_controlnet_gpu(control_models):
-    global current_gpu_controlnets
-    global vram_state
-    if vram_state == VRAMState.DISABLED:
-        return
-
-    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
-        for m in control_models:
-            if hasattr(m, 'set_lowvram'):
-                m.set_lowvram(True)
-        #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
-        return
-
-    models = []
-    for m in control_models:
-        models += m.get_models()
-
-    for m in current_gpu_controlnets:
-        if m not in models:
-            m.cpu()
-
-    device = get_torch_device()
-    current_gpu_controlnets = []
-    for m in models:
-        current_gpu_controlnets.append(m.to(device))
-
-
-def load_if_low_vram(model):
-    global vram_state
-    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
-        return model.to(get_torch_device())
-    return model
-
-def unload_if_low_vram(model):
-    global vram_state
-    if vram_state == VRAMState.LOW_VRAM or vram_state == VRAMState.NO_VRAM:
-        return model.cpu()
-    return model
+    for i in to_delete:
+        x = current_loaded_models.pop(i)
+        x.model_unload()
+        del x
 
 def unet_offload_device():
     if vram_state == VRAMState.HIGH_VRAM:
@@ -354,6 +384,21 @@ def unet_offload_device():
     else:
         return torch.device("cpu")
 
+def unet_inital_load_device(parameters, dtype):
+    torch_dev = get_torch_device()
+    if vram_state == VRAMState.HIGH_VRAM:
+        return torch_dev
+
+    cpu_dev = torch.device("cpu")
+    model_size = dtype.itemsize * parameters
+
+    mem_dev = get_free_memory(torch_dev)
+    mem_cpu = get_free_memory(cpu_dev)
+    if mem_dev > mem_cpu and model_size < mem_dev:
+        return torch_dev
+    else:
+        return cpu_dev
+
 def text_encoder_offload_device():
     if args.gpu_only:
         return get_torch_device()
@@ -456,6 +501,13 @@ def get_free_memory(dev=None, torch_free_too=False):
     else:
         return mem_free_total
 
+def batch_area_memory(area):
+    if xformers_enabled() or pytorch_attention_flash_attention():
+        #TODO: these formulas are copied from maximum_batch_area below
+        return (area / 20) * (1024 * 1024)
+    else:
+        return (((area * 0.6) / 0.9) + 1024) * (1024 * 1024)
+
 def maximum_batch_area():
     global vram_state
     if vram_state == VRAMState.NO_VRAM:
diff --git a/comfy/sample.py b/comfy/sample.py
index 48530f132..1dfca4204 100644
--- a/comfy/sample.py
+++ b/comfy/sample.py
@@ -51,19 +51,24 @@ def get_models_from_cond(cond, model_type):
             models += [c[1][model_type]]
     return models
 
-def load_additional_models(positive, negative, dtype):
+def get_additional_models(positive, negative):
     """loads additional models in positive and negative conditioning"""
     control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control")
+
+    control_models = []
+    for m in control_nets:
+        control_models += m.get_models()
+
     gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen")
-    gligen = [x[1].to(dtype) for x in gligen]
-    models = control_nets + gligen
-    comfy.model_management.load_controlnet_gpu(models)
+    gligen = [x[1] for x in gligen]
+    models = control_models + gligen
     return models
 
 def cleanup_additional_models(models):
     """cleanup additional models that were loaded"""
     for m in models:
-        m.cleanup()
+        if hasattr(m, 'cleanup'):
+            m.cleanup()
 
 def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None, callback=None, disable_pbar=False, seed=None):
     device = comfy.model_management.get_torch_device()
@@ -72,7 +77,8 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
         noise_mask = prepare_mask(noise_mask, noise.shape, device)
 
     real_model = None
-    comfy.model_management.load_model_gpu(model)
+    models = get_additional_models(positive, negative)
+    comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise.shape[2] * noise.shape[3]))
     real_model = model.model
 
     noise = noise.to(device)
@@ -81,7 +87,6 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
     positive_copy = broadcast_cond(positive, noise.shape[0], device)
     negative_copy = broadcast_cond(negative, noise.shape[0], device)
 
-    models = load_additional_models(positive, negative, model.model_dtype())
 
     sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
 
diff --git a/comfy/samplers.py b/comfy/samplers.py
index 28cd46667..ee37913e6 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -88,9 +88,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                 gligen_type = gligen[0]
                 gligen_model = gligen[1]
                 if gligen_type == "position":
-                    gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device)
+                    gligen_patch = gligen_model.model.set_position(input_x.shape, gligen[2], input_x.device)
                 else:
-                    gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device)
+                    gligen_patch = gligen_model.model.set_empty(input_x.shape, input_x.device)
 
                 patches['middle_patch'] = [gligen_patch]
 
diff --git a/comfy/sd.py b/comfy/sd.py
index 06b640968..8d8c8ee3f 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -244,7 +244,7 @@ def set_attr(obj, attr, value):
     del prev
 
 class ModelPatcher:
-    def __init__(self, model, load_device, offload_device, size=0):
+    def __init__(self, model, load_device, offload_device, size=0, current_device=None):
         self.size = size
         self.model = model
         self.patches = {}
@@ -253,6 +253,10 @@ class ModelPatcher:
         self.model_size()
         self.load_device = load_device
         self.offload_device = offload_device
+        if current_device is None:
+            self.current_device = self.offload_device
+        else:
+            self.current_device = current_device
 
     def model_size(self):
         if self.size > 0:
@@ -267,7 +271,7 @@ class ModelPatcher:
         return size
 
     def clone(self):
-        n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size)
+        n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device)
         n.patches = {}
         for k in self.patches:
             n.patches[k] = self.patches[k][:]
@@ -276,6 +280,11 @@ class ModelPatcher:
         n.model_keys = self.model_keys
         return n
 
+    def is_clone(self, other):
+        if hasattr(other, 'model') and self.model is other.model:
+            return True
+        return False
+
     def set_model_sampler_cfg_function(self, sampler_cfg_function):
         if len(inspect.signature(sampler_cfg_function).parameters) == 3:
             self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
@@ -390,6 +399,11 @@ class ModelPatcher:
             out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype)
             set_attr(self.model, key, out_weight)
             del temp_weight
+
+        if device_to is not None:
+            self.model.to(device_to)
+            self.current_device = device_to
+
         return self.model
 
     def calculate_weight(self, patches, weight, key):
@@ -482,7 +496,7 @@ class ModelPatcher:
 
         return weight
 
-    def unpatch_model(self):
+    def unpatch_model(self, device_to=None):
         keys = list(self.backup.keys())
 
         for k in keys:
@@ -490,6 +504,11 @@ class ModelPatcher:
 
         self.backup = {}
 
+        if device_to is not None:
+            self.model.to(device_to)
+            self.current_device = device_to
+
+
 def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
     key_map = model_lora_keys_unet(model.model)
     key_map = model_lora_keys_clip(clip.cond_stage_model, key_map)
@@ -630,11 +649,12 @@ class VAE:
         return samples
 
     def decode(self, samples_in):
-        model_management.unload_model()
         self.first_stage_model = self.first_stage_model.to(self.device)
         try:
+            memory_used = (2562 * samples_in.shape[2] * samples_in.shape[3] * 64) * 1.4
+            model_management.free_memory(memory_used, self.device)
             free_memory = model_management.get_free_memory(self.device)
-            batch_number = int((free_memory * 0.7) / (2562 * samples_in.shape[2] * samples_in.shape[3] * 64))
+            batch_number = int(free_memory / memory_used)
             batch_number = max(1, batch_number)
 
             pixel_samples = torch.empty((samples_in.shape[0], 3, round(samples_in.shape[2] * 8), round(samples_in.shape[3] * 8)), device="cpu")
@@ -650,19 +670,19 @@ class VAE:
         return pixel_samples
 
     def decode_tiled(self, samples, tile_x=64, tile_y=64, overlap = 16):
-        model_management.unload_model()
         self.first_stage_model = self.first_stage_model.to(self.device)
         output = self.decode_tiled_(samples, tile_x, tile_y, overlap)
         self.first_stage_model = self.first_stage_model.to(self.offload_device)
         return output.movedim(1,-1)
 
     def encode(self, pixel_samples):
-        model_management.unload_model()
         self.first_stage_model = self.first_stage_model.to(self.device)
         pixel_samples = pixel_samples.movedim(-1,1)
         try:
+            memory_used = (2078 * pixel_samples.shape[2] * pixel_samples.shape[3]) * 1.4 #NOTE: this constant along with the one in the decode above are estimated from the mem usage for the VAE and could change.
+            model_management.free_memory(memory_used, self.device)
             free_memory = model_management.get_free_memory(self.device)
-            batch_number = int((free_memory * 0.7) / (2078 * pixel_samples.shape[2] * pixel_samples.shape[3])) #NOTE: this constant along with the one in the decode above are estimated from the mem usage for the VAE and could change.
+            batch_number = int(free_memory / memory_used)
             batch_number = max(1, batch_number)
             samples = torch.empty((pixel_samples.shape[0], 4, round(pixel_samples.shape[2] // 8), round(pixel_samples.shape[3] // 8)), device="cpu")
             for x in range(0, pixel_samples.shape[0], batch_number):
@@ -677,7 +697,6 @@ class VAE:
         return samples
 
     def encode_tiled(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
-        model_management.unload_model()
         self.first_stage_model = self.first_stage_model.to(self.device)
         pixel_samples = pixel_samples.movedim(-1,1)
         samples = self.encode_tiled_(pixel_samples, tile_x=tile_x, tile_y=tile_y, overlap=overlap)
@@ -757,6 +776,7 @@ class ControlNet(ControlBase):
     def __init__(self, control_model, global_average_pooling=False, device=None):
         super().__init__(device)
         self.control_model = control_model
+        self.control_model_wrapped = ModelPatcher(self.control_model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
         self.global_average_pooling = global_average_pooling
 
     def get_control(self, x_noisy, t, cond, batched_number):
@@ -786,11 +806,9 @@ class ControlNet(ControlBase):
             precision_scope = contextlib.nullcontext
 
         with precision_scope(model_management.get_autocast_device(self.device)):
-            self.control_model = model_management.load_if_low_vram(self.control_model)
             context = torch.cat(cond['c_crossattn'], 1)
             y = cond.get('c_adm', None)
             control = self.control_model(x=x_noisy, hint=self.cond_hint, timesteps=t, context=context, y=y)
-            self.control_model = model_management.unload_if_low_vram(self.control_model)
         out = {'middle':[], 'output': []}
         autocast_enabled = torch.is_autocast_enabled()
 
@@ -825,7 +843,7 @@ class ControlNet(ControlBase):
 
     def get_models(self):
         out = super().get_models()
-        out.append(self.control_model)
+        out.append(self.control_model_wrapped)
         return out
 
 
@@ -1004,7 +1022,6 @@ class T2IAdapter(ControlBase):
         self.copy_to(c)
         return c
 
-
 def load_t2i_adapter(t2i_data):
     keys = t2i_data.keys()
     if 'adapter' in keys:
@@ -1090,7 +1107,7 @@ def load_gligen(ckpt_path):
     model = gligen.load_gligen(data)
     if model_management.should_use_fp16():
         model = model.half()
-    return model
+    return ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
 
 def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
     #TODO: this function is a mess and should be removed eventually
@@ -1202,8 +1219,13 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
         if output_clipvision:
             clipvision = clip_vision.load_clipvision_from_sd(sd, model_config.clip_vision_prefix, True)
 
+    dtype = torch.float32
+    if fp16:
+        dtype = torch.float16
+
+    inital_load_device = model_management.unet_inital_load_device(parameters, dtype)
     offload_device = model_management.unet_offload_device()
-    model = model_config.get_model(sd, "model.diffusion_model.", device=offload_device)
+    model = model_config.get_model(sd, "model.diffusion_model.", device=inital_load_device)
     model.load_model_weights(sd, "model.diffusion_model.")
 
     if output_vae:
@@ -1224,7 +1246,12 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
     if len(left_over) > 0:
         print("left over keys:", left_over)
 
-    return (ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae, clipvision)
+    model_patcher = ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
+    if inital_load_device != torch.device("cpu"):
+        print("loaded straight to GPU")
+        model_management.load_model_gpu(model_patcher)
+
+    return (model_patcher, clip, vae, clipvision)
 
 
 def load_unet(unet_path): #load unet in diffusers format
diff --git a/execution.py b/execution.py
index a1a7c75c8..e10fdbb60 100644
--- a/execution.py
+++ b/execution.py
@@ -354,6 +354,7 @@ class PromptExecutor:
                     d = self.outputs_ui.pop(x)
                     del d
 
+            comfy.model_management.cleanup_models()
             if self.server.client_id is not None:
                 self.server.send_sync("execution_cached", { "nodes": list(current_outputs) , "prompt_id": prompt_id}, self.server.client_id)
             executed = set()

From 2be27427116c58b027c0d5e1a45be435910f7c77 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 17 Aug 2023 01:58:54 -0400
Subject: [PATCH 34/37] Fix issue with regular torch version.

---
 comfy/model_management.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3736b57a9..4c2441f4e 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -390,7 +390,11 @@ def unet_inital_load_device(parameters, dtype):
         return torch_dev
 
     cpu_dev = torch.device("cpu")
-    model_size = dtype.itemsize * parameters
+    dtype_size = 4
+    if dtype == torch.float16 or dtype == torch.bfloat16:
+        dtype_size = 2
+
+    model_size = dtype_size * parameters
 
     mem_dev = get_free_memory(torch_dev)
     mem_cpu = get_free_memory(cpu_dev)

From 3aee33b54e53e032755edf253ba39005bae6e20e Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 17 Aug 2023 03:12:37 -0400
Subject: [PATCH 35/37] Add --disable-smart-memory for those that want the old
 behaviour.

---
 comfy/cli_args.py         | 3 +++
 comfy/model_management.py | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index ec7d34a55..374dd2f7d 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -82,6 +82,9 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn'
 vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
 
 
+parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.")
+
+
 parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
 parser.add_argument("--quick-test-for-ci", action="store_true", help="Quick test for CI.")
 parser.add_argument("--windows-standalone-build", action="store_true", help="Windows standalone build: Enable convenient things that most people using the standalone windows build will probably enjoy (like auto opening the page on startup).")
diff --git a/comfy/model_management.py b/comfy/model_management.py
index 4c2441f4e..5c5d5ab74 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -202,6 +202,10 @@ if cpu_state == CPUState.MPS:
 
 print(f"Set vram state to: {vram_state.name}")
 
+DISABLE_SMART_MEMORY = args.disable_smart_memory
+
+if DISABLE_SMART_MEMORY:
+    print("Disabling smart memory management")
 
 def get_torch_device_name(device):
     if hasattr(device, 'type'):
@@ -289,7 +293,10 @@ def unload_model_clones(model):
 def free_memory(memory_required, device, keep_loaded=[]):
     unloaded_model = False
     for i in range(len(current_loaded_models) -1, -1, -1):
-        current_free_mem = get_free_memory(device)
+        if DISABLE_SMART_MEMORY:
+            current_free_mem = 0
+        else:
+            current_free_mem = get_free_memory(device)
         if current_free_mem > memory_required:
             break
         shift_model = current_loaded_models[i]

From c28db1f315ccaef45cf710be06e71694db573d2e Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 17 Aug 2023 10:58:59 -0400
Subject: [PATCH 36/37] Fix potential issues with patching models when saving
 checkpoints.

---
 comfy/sd.py | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/comfy/sd.py b/comfy/sd.py
index 8d8c8ee3f..461c234db 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -574,7 +574,7 @@ class CLIP:
         else:
             self.cond_stage_model.reset_clip_layer()
 
-        model_management.load_model_gpu(self.patcher)
+        self.load_model()
         cond, pooled = self.cond_stage_model.encode_token_weights(tokens)
         if return_pooled:
             return cond, pooled
@@ -590,11 +590,9 @@ class CLIP:
     def get_sd(self):
         return self.cond_stage_model.state_dict()
 
-    def patch_model(self):
-        self.patcher.patch_model()
-
-    def unpatch_model(self):
-        self.patcher.unpatch_model()
+    def load_model(self):
+        model_management.load_model_gpu(self.patcher)
+        return self.patcher
 
     def get_key_patches(self):
         return self.patcher.get_key_patches()
@@ -922,8 +920,8 @@ def load_controlnet(ckpt_path, model=None):
     if pth:
         if 'difference' in controlnet_data:
             if model is not None:
-                m = model.patch_model()
-                model_sd = m.state_dict()
+                model_management.load_models_gpu([model])
+                model_sd = model.model_state_dict()
                 for x in controlnet_data:
                     c_m = "control_model."
                     if x.startswith(c_m):
@@ -931,7 +929,6 @@ def load_controlnet(ckpt_path, model=None):
                         if sd_key in model_sd:
                             cd = controlnet_data[x]
                             cd += model_sd[sd_key].type(cd.dtype).to(cd.device)
-                model.unpatch_model()
             else:
                 print("WARNING: Loaded a diff controlnet without a model. It will very likely not work.")
 
@@ -1279,14 +1276,6 @@ def load_unet(unet_path): #load unet in diffusers format
     return ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device)
 
 def save_checkpoint(output_path, model, clip, vae, metadata=None):
-    try:
-        model.patch_model()
-        clip.patch_model()
-        sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
-        utils.save_torch_file(sd, output_path, metadata=metadata)
-        model.unpatch_model()
-        clip.unpatch_model()
-    except Exception as e:
-        model.unpatch_model()
-        clip.unpatch_model()
-        raise e
+    model_management.load_models_gpu([model, clip.load_model()])
+    sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
+    utils.save_torch_file(sd, output_path, metadata=metadata)

From ac0758a1a4a276da7e6f787e8a98d1b9722b927d Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 17 Aug 2023 13:38:51 -0400
Subject: [PATCH 37/37] Fix bug with lowvram and controlnet advanced node.

---
 comfy/sample.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/sample.py b/comfy/sample.py
index 1dfca4204..50e596749 100644
--- a/comfy/sample.py
+++ b/comfy/sample.py
@@ -53,7 +53,7 @@ def get_models_from_cond(cond, model_type):
 
 def get_additional_models(positive, negative):
     """loads additional models in positive and negative conditioning"""
-    control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control")
+    control_nets = set(get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control"))
 
     control_models = []
     for m in control_nets: