From f067ad15d139d6e07e44801759f7ccdd9985c636 Mon Sep 17 00:00:00 2001
From: Silver <65376327+silveroxides@users.noreply.github.com>
Date: Wed, 4 Sep 2024 01:16:38 +0200
Subject: [PATCH 1/5] Make live preview size a configurable launch argument
 (#4649)

* Make live preview size a configurable launch argument

* Remove import from testing phase

* Update cli_args.py
---
 comfy/cli_args.py | 2 ++
 latent_preview.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/comfy/cli_args.py b/comfy/cli_args.py
index 3a0a26d07..65b879907 100644
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@@ -92,6 +92,8 @@ class LatentPreviewMethod(enum.Enum):
 
 parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction)
 
+parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.")
+
 cache_group = parser.add_mutually_exclusive_group()
 cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.")
 cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.")
diff --git a/latent_preview.py b/latent_preview.py
index ae6c106e4..e14c72ce4 100644
--- a/latent_preview.py
+++ b/latent_preview.py
@@ -9,7 +9,7 @@ import folder_paths
 import comfy.utils
 import logging
 
-MAX_PREVIEW_RESOLUTION = 512
+MAX_PREVIEW_RESOLUTION = args.preview_size
 
 def preview_to_image(latent_image):
         latents_ubyte = (((latent_image + 1.0) / 2.0).clamp(0, 1)  # change scale from -1..1 to 0..1

From f04229b84d3d127af5168188b86b16c9e64f3725 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Wed, 4 Sep 2024 13:35:15 -0500
Subject: [PATCH 2/5] Add emb_patch support to UNetModel forward (#4779)

---
 comfy/ldm/modules/diffusionmodules/openaimodel.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py
index 6535e899c..2902073d5 100644
--- a/comfy/ldm/modules/diffusionmodules/openaimodel.py
+++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py
@@ -842,6 +842,11 @@ class UNetModel(nn.Module):
         t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype)
         emb = self.time_embed(t_emb)
 
+        if "emb_patch" in transformer_patches:
+            patch = transformer_patches["emb_patch"]
+            for p in patch:
+                emb = p(emb, self.model_channels, transformer_options)
+
         if self.num_classes is not None:
             assert y.shape[0] == x.shape[0]
             emb = emb + self.label_emb(y)

From 22d1241a503461c9ca4f3ad48ddec5ce6e5ee491 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 4 Sep 2024 16:38:38 -0400
Subject: [PATCH 3/5] Add an experimental LoraSave node to extract model loras.

The model_diff input should be connected to the output of a
ModelMergeSubtract node.
---
 comfy_extras/nodes_lora_extract.py | 91 ++++++++++++++++++++++++++++++
 main.py                            |  1 +
 nodes.py                           |  1 +
 3 files changed, 93 insertions(+)
 create mode 100644 comfy_extras/nodes_lora_extract.py

diff --git a/comfy_extras/nodes_lora_extract.py b/comfy_extras/nodes_lora_extract.py
new file mode 100644
index 000000000..dcb46f0e0
--- /dev/null
+++ b/comfy_extras/nodes_lora_extract.py
@@ -0,0 +1,91 @@
+import torch
+import comfy.model_management
+import comfy.utils
+import folder_paths
+import os
+import logging
+
+CLAMP_QUANTILE = 0.99
+
+def extract_lora(diff, rank):
+    conv2d = (len(diff.shape) == 4)
+    kernel_size = None if not conv2d else diff.size()[2:4]
+    conv2d_3x3 = conv2d and kernel_size != (1, 1)
+    out_dim, in_dim = diff.size()[0:2]
+    rank = min(rank, in_dim, out_dim)
+
+    if conv2d:
+        if conv2d_3x3:
+            diff = diff.flatten(start_dim=1)
+        else:
+            diff = diff.squeeze()
+
+
+    U, S, Vh = torch.linalg.svd(diff.float())
+    U = U[:, :rank]
+    S = S[:rank]
+    U = U @ torch.diag(S)
+    Vh = Vh[:rank, :]
+
+    dist = torch.cat([U.flatten(), Vh.flatten()])
+    hi_val = torch.quantile(dist, CLAMP_QUANTILE)
+    low_val = -hi_val
+
+    U = U.clamp(low_val, hi_val)
+    Vh = Vh.clamp(low_val, hi_val)
+    if conv2d:
+        U = U.reshape(out_dim, rank, 1, 1)
+        Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1])
+    return (U, Vh)
+
+class LoraSave:
+    def __init__(self):
+        self.output_dir = folder_paths.get_output_directory()
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"filename_prefix": ("STRING", {"default": "loras/ComfyUI_extracted_lora"}),
+                              "rank": ("INT", {"default": 8, "min": 1, "max": 1024, "step": 1}),
+                            },
+                "optional": {"model_diff": ("MODEL",),},
+    }
+    RETURN_TYPES = ()
+    FUNCTION = "save"
+    OUTPUT_NODE = True
+
+    CATEGORY = "_for_testing"
+
+    def save(self, filename_prefix, rank, model_diff=None):
+        if model_diff is None:
+            return {}
+
+        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir)
+
+        output_sd = {}
+        prefix_key = "diffusion_model."
+        stored = set()
+
+        comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True)
+        sd = model_diff.model_state_dict(filter_prefix=prefix_key)
+
+        for k in sd:
+            if k.endswith(".weight"):
+                weight_diff = sd[k]
+                if weight_diff.ndim < 2:
+                    continue
+                try:
+                    out = extract_lora(weight_diff, rank)
+                    output_sd["{}.lora_up.weight".format(k[:-7])] = out[0].contiguous().half().cpu()
+                    output_sd["{}.lora_down.weight".format(k[:-7])] = out[1].contiguous().half().cpu()
+                except:
+                    logging.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k))
+
+        output_checkpoint = f"{filename}_{counter:05}_.safetensors"
+        output_checkpoint = os.path.join(full_output_folder, output_checkpoint)
+
+        comfy.utils.save_torch_file(output_sd, output_checkpoint, metadata=None)
+        return {}
+
+NODE_CLASS_MAPPINGS = {
+    "LoraSave": LoraSave
+}
diff --git a/main.py b/main.py
index 3db28e1fd..d791a169c 100644
--- a/main.py
+++ b/main.py
@@ -247,6 +247,7 @@ if __name__ == "__main__":
     folder_paths.add_model_folder_path("clip", os.path.join(folder_paths.get_output_directory(), "clip"))
     folder_paths.add_model_folder_path("vae", os.path.join(folder_paths.get_output_directory(), "vae"))
     folder_paths.add_model_folder_path("diffusion_models", os.path.join(folder_paths.get_output_directory(), "diffusion_models"))
+    folder_paths.add_model_folder_path("loras", os.path.join(folder_paths.get_output_directory(), "loras"))
 
     if args.input_directory:
         input_dir = os.path.abspath(args.input_directory)
diff --git a/nodes.py b/nodes.py
index 707d86b63..bbe73282a 100644
--- a/nodes.py
+++ b/nodes.py
@@ -2101,6 +2101,7 @@ def init_builtin_extra_nodes():
         "nodes_controlnet.py",
         "nodes_hunyuan.py",
         "nodes_flux.py",
+        "nodes_lora_extract.py",
     ]
 
     import_failed = []

From c7427375ee27f3a57d9d945d3217436ef43fb53f Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 4 Sep 2024 19:47:32 -0400
Subject: [PATCH 4/5] Prioritize freeing partially offloaded models first.

---
 comfy/model_management.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 11683a905..77c05510e 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -426,7 +426,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
         shift_model = current_loaded_models[i]
         if shift_model.device == device:
             if shift_model not in keep_loaded:
-                can_unload.append((sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
+                can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
                 shift_model.currently_used = False
 
     for x in sorted(can_unload):

From 5cbaa9e07c97296b536f240688f5a19300ecf30d Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 5 Sep 2024 00:04:52 -0400
Subject: [PATCH 5/5] Mistoline flux controlnet support.

---
 comfy/controlnet.py          | 16 ++++--
 comfy/ldm/flux/controlnet.py | 99 +++++++++++++++++++++++++++---------
 2 files changed, 85 insertions(+), 30 deletions(-)

diff --git a/comfy/controlnet.py b/comfy/controlnet.py
index d2d2cefaa..c0f9b6511 100644
--- a/comfy/controlnet.py
+++ b/comfy/controlnet.py
@@ -430,9 +430,9 @@ def load_controlnet_hunyuandit(controlnet_data):
     control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds, strength_type=StrengthType.CONSTANT)
     return control
 
-def load_controlnet_flux_xlabs(sd):
+def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False):
     model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd)
-    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
+    control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
     control_model = controlnet_load_state_dict(control_model, sd)
     extra_conds = ['y', 'guidance']
     control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
@@ -457,6 +457,10 @@ def load_controlnet_flux_instantx(sd):
     control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
     return control
 
+def convert_mistoline(sd):
+    return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."})
+
+
 def load_controlnet(ckpt_path, model=None):
     controlnet_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True)
     if 'after_proj_list.18.bias' in controlnet_data.keys(): #Hunyuan DiT
@@ -518,13 +522,15 @@ def load_controlnet(ckpt_path, model=None):
         if len(leftover_keys) > 0:
             logging.warning("leftover keys: {}".format(leftover_keys))
         controlnet_data = new_sd
-    elif "controlnet_blocks.0.weight" in controlnet_data: #SD3 diffusers format
+    elif "controlnet_blocks.0.weight" in controlnet_data:
         if "double_blocks.0.img_attn.norm.key_norm.scale" in controlnet_data:
-            return load_controlnet_flux_xlabs(controlnet_data)
+            return load_controlnet_flux_xlabs_mistoline(controlnet_data)
         elif "pos_embed_input.proj.weight" in controlnet_data:
-            return load_controlnet_mmdit(controlnet_data)
+            return load_controlnet_mmdit(controlnet_data) #SD3 diffusers controlnet
         elif "controlnet_x_embedder.weight" in controlnet_data:
             return load_controlnet_flux_instantx(controlnet_data)
+    elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux
+        return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True)
 
     pth_key = 'control_model.zero_convs.0.0.weight'
     pth = False
diff --git a/comfy/ldm/flux/controlnet.py b/comfy/ldm/flux/controlnet.py
index 2598e7172..d8b776129 100644
--- a/comfy/ldm/flux/controlnet.py
+++ b/comfy/ldm/flux/controlnet.py
@@ -1,4 +1,5 @@
 #Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py
+#modified to support different types of flux controlnets
 
 import torch
 import math
@@ -12,22 +13,65 @@ from .layers import (DoubleStreamBlock, EmbedND, LastLayer,
 from .model import Flux
 import comfy.ldm.common_dit
 
+class MistolineCondDownsamplBlock(nn.Module):
+    def __init__(self, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 1, dtype=dtype, device=device),
+            nn.SiLU(),
+            operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
+        )
+
+    def forward(self, x):
+        return self.encoder(x)
+
+class MistolineControlnetBlock(nn.Module):
+    def __init__(self, hidden_size, dtype=None, device=None, operations=None):
+        super().__init__()
+        self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device)
+        self.act = nn.SiLU()
+
+    def forward(self, x):
+        return self.act(self.linear(x))
+
 
 class ControlNetFlux(Flux):
-    def __init__(self, latent_input=False, num_union_modes=0, image_model=None, dtype=None, device=None, operations=None, **kwargs):
+    def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, image_model=None, dtype=None, device=None, operations=None, **kwargs):
         super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs)
 
         self.main_model_double = 19
         self.main_model_single = 38
+
+        self.mistoline = mistoline
         # add ControlNet blocks
+        if self.mistoline:
+            control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations)
+        else:
+            control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)
+
         self.controlnet_blocks = nn.ModuleList([])
         for _ in range(self.params.depth):
-            controlnet_block = operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)
-            self.controlnet_blocks.append(controlnet_block)
+            self.controlnet_blocks.append(control_block())
 
         self.controlnet_single_blocks = nn.ModuleList([])
         for _ in range(self.params.depth_single_blocks):
-            self.controlnet_single_blocks.append(operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device))
+            self.controlnet_single_blocks.append(control_block())
 
         self.num_union_modes = num_union_modes
         self.controlnet_mode_embedder = None
@@ -38,23 +82,26 @@ class ControlNetFlux(Flux):
         self.latent_input = latent_input
         self.pos_embed_input = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device)
         if not self.latent_input:
-            self.input_hint_block = nn.Sequential(
-                operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
-                nn.SiLU(),
-                operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
-            )
+            if self.mistoline:
+                self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations)
+            else:
+                self.input_hint_block = nn.Sequential(
+                    operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device),
+                    nn.SiLU(),
+                    operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device)
+                )
 
     def forward_orig(
         self,
@@ -73,9 +120,6 @@ class ControlNetFlux(Flux):
 
         # running on sequences img
         img = self.img_in(img)
-        if not self.latent_input:
-            controlnet_cond = self.input_hint_block(controlnet_cond)
-            controlnet_cond = rearrange(controlnet_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
 
         controlnet_cond = self.pos_embed_input(controlnet_cond)
         img = img + controlnet_cond
@@ -131,9 +175,14 @@ class ControlNetFlux(Flux):
         patch_size = 2
         if self.latent_input:
             hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size))
-            hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
+        elif self.mistoline:
+            hint = hint * 2.0 - 1.0
+            hint = self.input_cond_block(hint)
         else:
             hint = hint * 2.0 - 1.0
+            hint = self.input_hint_block(hint)
+
+        hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size)
 
         bs, c, h, w = x.shape
         x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))