From 7e941f9f247f9b013a33c2e7d117466108414e99 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 30 Aug 2023 12:55:07 -0400
Subject: [PATCH 1/8] Clean up DiffusersLoader node.

---
 comfy/diffusers_load.py | 101 ++++++++++------------------------------
 nodes.py                |   2 +-
 2 files changed, 26 insertions(+), 77 deletions(-)

diff --git a/comfy/diffusers_load.py b/comfy/diffusers_load.py
index 11d94c340..a52e0102b 100644
--- a/comfy/diffusers_load.py
+++ b/comfy/diffusers_load.py
@@ -1,87 +1,36 @@
 import json
 import os
-import yaml
 
-import folder_paths
-from comfy.sd import load_checkpoint
-import os.path as osp
-import re
-import torch
-from safetensors.torch import load_file, save_file
-from . import diffusers_convert
+import comfy.sd
 
+def first_file(path, filenames):
+    for f in filenames:
+        p = os.path.join(path, f)
+        if os.path.exists(p):
+            return p
+    return None
 
-def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, embedding_directory=None):
-    diffusers_unet_conf = json.load(open(osp.join(model_path, "unet/config.json")))
-    diffusers_scheduler_conf = json.load(open(osp.join(model_path, "scheduler/scheduler_config.json")))
+def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_directory=None):
+    diffusion_model_names = ["diffusion_pytorch_model.fp16.safetensors", "diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.fp16.bin", "diffusion_pytorch_model.bin"]
+    unet_path = first_file(os.path.join(model_path, "unet"), diffusion_model_names)
+    vae_path = first_file(os.path.join(model_path, "vae"), diffusion_model_names)
 
-    # magic
-    v2 = diffusers_unet_conf["sample_size"] == 96
-    if 'prediction_type' in diffusers_scheduler_conf:
-        v_pred = diffusers_scheduler_conf['prediction_type'] == 'v_prediction'
+    text_encoder_model_names = ["model.fp16.safetensors", "model.safetensors", "pytorch_model.fp16.bin", "pytorch_model.bin"]
+    text_encoder1_path = first_file(os.path.join(model_path, "text_encoder"), text_encoder_model_names)
+    text_encoder2_path = first_file(os.path.join(model_path, "text_encoder_2"), text_encoder_model_names)
 
-    if v2:
-        if v_pred:
-            config_path = folder_paths.get_full_path("configs", 'v2-inference-v.yaml')
-        else:
-            config_path = folder_paths.get_full_path("configs", 'v2-inference.yaml')
-    else:
-        config_path = folder_paths.get_full_path("configs", 'v1-inference.yaml')
+    text_encoder_paths = [text_encoder1_path]
+    if text_encoder2_path is not None:
+        text_encoder_paths.append(text_encoder2_path)
 
-    with open(config_path, 'r') as stream:
-        config = yaml.safe_load(stream)
+    unet = comfy.sd.load_unet(unet_path)
 
-    model_config_params = config['model']['params']
-    clip_config = model_config_params['cond_stage_config']
-    scale_factor = model_config_params['scale_factor']
-    vae_config = model_config_params['first_stage_config']
-    vae_config['scale_factor'] = scale_factor
-    model_config_params["unet_config"]["params"]["use_fp16"] = fp16
+    clip = None
+    if output_clip:
+        clip = comfy.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
 
-    unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.safetensors")
-    vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.safetensors")
-    text_enc_path = osp.join(model_path, "text_encoder", "model.safetensors")
+    vae = None
+    if output_vae:
+        vae = comfy.sd.VAE(ckpt_path=vae_path)
 
-    # Load models from safetensors if it exists, if it doesn't pytorch
-    if osp.exists(unet_path):
-        unet_state_dict = load_file(unet_path, device="cpu")
-    else:
-        unet_path = osp.join(model_path, "unet", "diffusion_pytorch_model.bin")
-        unet_state_dict = torch.load(unet_path, map_location="cpu")
-
-    if osp.exists(vae_path):
-        vae_state_dict = load_file(vae_path, device="cpu")
-    else:
-        vae_path = osp.join(model_path, "vae", "diffusion_pytorch_model.bin")
-        vae_state_dict = torch.load(vae_path, map_location="cpu")
-
-    if osp.exists(text_enc_path):
-        text_enc_dict = load_file(text_enc_path, device="cpu")
-    else:
-        text_enc_path = osp.join(model_path, "text_encoder", "pytorch_model.bin")
-        text_enc_dict = torch.load(text_enc_path, map_location="cpu")
-
-    # Convert the UNet model
-    unet_state_dict = diffusers_convert.convert_unet_state_dict(unet_state_dict)
-    unet_state_dict = {"model.diffusion_model." + k: v for k, v in unet_state_dict.items()}
-
-    # Convert the VAE model
-    vae_state_dict = diffusers_convert.convert_vae_state_dict(vae_state_dict)
-    vae_state_dict = {"first_stage_model." + k: v for k, v in vae_state_dict.items()}
-
-    # Easiest way to identify v2.0 model seems to be that the text encoder (OpenCLIP) is deeper
-    is_v20_model = "text_model.encoder.layers.22.layer_norm2.bias" in text_enc_dict
-
-    if is_v20_model:
-        # Need to add the tag 'transformer' in advance so we can knock it out from the final layer-norm
-        text_enc_dict = {"transformer." + k: v for k, v in text_enc_dict.items()}
-        text_enc_dict = diffusers_convert.convert_text_enc_state_dict_v20(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.model." + k: v for k, v in text_enc_dict.items()}
-    else:
-        text_enc_dict = diffusers_convert.convert_text_enc_state_dict(text_enc_dict)
-        text_enc_dict = {"cond_stage_model.transformer." + k: v for k, v in text_enc_dict.items()}
-
-    # Put together new checkpoint
-    sd = {**unet_state_dict, **vae_state_dict, **text_enc_dict}
-
-    return load_checkpoint(embedding_directory=embedding_directory, state_dict=sd, config=config)
+    return (unet, clip, vae)
diff --git a/nodes.py b/nodes.py
index 3e4d5240b..5e755f149 100644
--- a/nodes.py
+++ b/nodes.py
@@ -475,7 +475,7 @@ class DiffusersLoader:
                     model_path = path
                     break
 
-        return comfy.diffusers_load.load_diffusers(model_path, fp16=comfy.model_management.should_use_fp16(), output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
+        return comfy.diffusers_load.load_diffusers(model_path, output_vae=output_vae, output_clip=output_clip, embedding_directory=folder_paths.get_folder_paths("embeddings"))
 
 
 class unCLIPCheckpointLoader:

From 2cd3980199ea1769ea3007009c516683b472337b Mon Sep 17 00:00:00 2001
From: Ridan Vandenbergh <ridanvandenbergh@gmail.com>
Date: Wed, 30 Aug 2023 20:46:53 +0200
Subject: [PATCH 2/8] Remove forced lowercase on embeddings endpoint

---
 server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 0d7d28a0b..57d5a65df 100644
--- a/server.py
+++ b/server.py
@@ -127,7 +127,7 @@ class PromptServer():
         @routes.get("/embeddings")
         def get_embeddings(self):
             embeddings = folder_paths.get_filename_list("embeddings")
-            return web.json_response(list(map(lambda a: os.path.splitext(a)[0].lower(), embeddings)))
+            return web.json_response(list(map(lambda a: os.path.splitext(a)[0], embeddings)))
 
         @routes.get("/extensions")
         async def get_extensions(request):

From 5f101f4da14e0b4a360ca1d0c380fab174d301bf Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 31 Aug 2023 02:25:21 -0400
Subject: [PATCH 3/8] Update litegraph with upstream: middle mouse dragging.

---
 web/lib/litegraph.core.js | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/web/lib/litegraph.core.js b/web/lib/litegraph.core.js
index 356c71ac2..4bb2f0d99 100644
--- a/web/lib/litegraph.core.js
+++ b/web/lib/litegraph.core.js
@@ -6233,11 +6233,17 @@ LGraphNode.prototype.executeAction = function(action)
 																					,posAdd:[!mClikSlot_isOut?-30:30, -alphaPosY*130] //-alphaPosY*30]
 																					,posSizeFix:[!mClikSlot_isOut?-1:0, 0] //-alphaPosY*2*/
 																				});
-								
+							skip_action = true;
 						}
 					}
 				}
 			}
+
+			if (!skip_action && this.allow_dragcanvas) {
+            	//console.log("pointerevents: dragging_canvas start from middle button");
+            	this.dragging_canvas = true;
+            }
+
         	
         } else if (e.which == 3 || this.pointer_is_double) {
 			

From 1c012d69afa8bd92a007a3e468e2a1f874365d39 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 31 Aug 2023 13:25:00 -0400
Subject: [PATCH 4/8] It doesn't make sense for c_crossattn and c_concat to be
 lists.

---
 comfy/model_base.py | 4 ++--
 comfy/samplers.py   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/comfy/model_base.py b/comfy/model_base.py
index acd4169a8..677a23de7 100644
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -50,10 +50,10 @@ class BaseModel(torch.nn.Module):
 
     def apply_model(self, x, t, c_concat=None, c_crossattn=None, c_adm=None, control=None, transformer_options={}):
         if c_concat is not None:
-            xc = torch.cat([x] + c_concat, dim=1)
+            xc = torch.cat([x] + [c_concat], dim=1)
         else:
             xc = x
-        context = torch.cat(c_crossattn, 1)
+        context = c_crossattn
         dtype = self.get_dtype()
         xc = xc.to(dtype)
         t = t.to(dtype)
diff --git a/comfy/samplers.py b/comfy/samplers.py
index 134336de6..103ac33ff 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -165,9 +165,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                 c_crossattn_out.append(c)
 
             if len(c_crossattn_out) > 0:
-                out['c_crossattn'] = [torch.cat(c_crossattn_out)]
+                out['c_crossattn'] = torch.cat(c_crossattn_out)
             if len(c_concat) > 0:
-                out['c_concat'] = [torch.cat(c_concat)]
+                out['c_concat'] = torch.cat(c_concat)
             if len(c_adm) > 0:
                 out['c_adm'] = torch.cat(c_adm)
             return out

From 57beace324b49f4b6b45291e3940b99c84387e89 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 31 Aug 2023 14:26:16 -0400
Subject: [PATCH 5/8] Fix VAEDecodeTiled minimum.

---
 nodes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nodes.py b/nodes.py
index 5e755f149..38d947d65 100644
--- a/nodes.py
+++ b/nodes.py
@@ -245,7 +245,7 @@ class VAEDecodeTiled:
     @classmethod
     def INPUT_TYPES(s):
         return {"required": {"samples": ("LATENT", ), "vae": ("VAE", ),
-                             "tile_size": ("INT", {"default": 512, "min": 192, "max": 4096, "step": 64})
+                             "tile_size": ("INT", {"default": 512, "min": 320, "max": 4096, "step": 64})
                             }}
     RETURN_TYPES = ("IMAGE",)
     FUNCTION = "decode"

From cfe1c54de88e7525ec7e4189a8a3294dfc3cd4c0 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 31 Aug 2023 15:16:58 -0400
Subject: [PATCH 6/8] Fix controlnet issue.

---
 comfy/controlnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index 83e1be058..f62dd9c88 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -155,7 +155,7 @@ class ControlNet(ControlBase):
             self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
 
 
-        context = torch.cat(cond['c_crossattn'], 1)
+        context = cond['c_crossattn']
         y = cond.get('c_adm', None)
         if y is not None:
             y = y.to(self.control_model.dtype)

From 5c363a9d86827d194e3a8e5dd6085a67f65c7ee6 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 1 Sep 2023 02:01:08 -0400
Subject: [PATCH 7/8] Fix controlnet bug.

---
 comfy/controlnet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index f62dd9c88..490be6bbc 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -143,7 +143,7 @@ class ControlNet(ControlBase):
                 if control_prev is not None:
                     return control_prev
                 else:
-                    return {}
+                    return None
 
         output_dtype = x_noisy.dtype
         if self.cond_hint is None or x_noisy.shape[2] * 8 != self.cond_hint.shape[2] or x_noisy.shape[3] * 8 != self.cond_hint.shape[3]:

From 0e3b64117218c50a554b492269f5f35779839695 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 1 Sep 2023 02:12:03 -0400
Subject: [PATCH 8/8] Remove xformers related print.

---
 comfy/ldm/modules/attention.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index 973619bf2..9fdfbd217 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -402,8 +402,6 @@ class MemoryEfficientCrossAttention(nn.Module):
     # https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
     def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, dtype=None, device=None, operations=comfy.ops):
         super().__init__()
-        print(f"Setting up {self.__class__.__name__}. Query dim is {query_dim}, context_dim is {context_dim} and using "
-              f"{heads} heads.")
         inner_dim = dim_head * heads
         context_dim = default(context_dim, query_dim)