From f067ad15d139d6e07e44801759f7ccdd9985c636 Mon Sep 17 00:00:00 2001 From: Silver <65376327+silveroxides@users.noreply.github.com> Date: Wed, 4 Sep 2024 01:16:38 +0200 Subject: [PATCH 1/5] Make live preview size a configurable launch argument (#4649) * Make live preview size a configurable launch argument * Remove import from testing phase * Update cli_args.py --- comfy/cli_args.py | 2 ++ latent_preview.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 3a0a26d07..65b879907 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -92,6 +92,8 @@ class LatentPreviewMethod(enum.Enum): parser.add_argument("--preview-method", type=LatentPreviewMethod, default=LatentPreviewMethod.NoPreviews, help="Default preview method for sampler nodes.", action=EnumAction) +parser.add_argument("--preview-size", type=int, default=512, help="Sets the maximum preview size for sampler nodes.") + cache_group = parser.add_mutually_exclusive_group() cache_group.add_argument("--cache-classic", action="store_true", help="Use the old style (aggressive) caching.") cache_group.add_argument("--cache-lru", type=int, default=0, help="Use LRU caching with a maximum of N node results cached. May use more RAM/VRAM.") diff --git a/latent_preview.py b/latent_preview.py index ae6c106e4..e14c72ce4 100644 --- a/latent_preview.py +++ b/latent_preview.py @@ -9,7 +9,7 @@ import folder_paths import comfy.utils import logging -MAX_PREVIEW_RESOLUTION = 512 +MAX_PREVIEW_RESOLUTION = args.preview_size def preview_to_image(latent_image): latents_ubyte = (((latent_image + 1.0) / 2.0).clamp(0, 1) # change scale from -1..1 to 0..1 From f04229b84d3d127af5168188b86b16c9e64f3725 Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Wed, 4 Sep 2024 13:35:15 -0500 Subject: [PATCH 2/5] Add emb_patch support to UNetModel forward (#4779) --- comfy/ldm/modules/diffusionmodules/openaimodel.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py index 6535e899c..2902073d5 100644 --- a/comfy/ldm/modules/diffusionmodules/openaimodel.py +++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py @@ -842,6 +842,11 @@ class UNetModel(nn.Module): t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False).to(x.dtype) emb = self.time_embed(t_emb) + if "emb_patch" in transformer_patches: + patch = transformer_patches["emb_patch"] + for p in patch: + emb = p(emb, self.model_channels, transformer_options) + if self.num_classes is not None: assert y.shape[0] == x.shape[0] emb = emb + self.label_emb(y) From 22d1241a503461c9ca4f3ad48ddec5ce6e5ee491 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 4 Sep 2024 16:38:38 -0400 Subject: [PATCH 3/5] Add an experimental LoraSave node to extract model loras. The model_diff input should be connected to the output of a ModelMergeSubtract node. --- comfy_extras/nodes_lora_extract.py | 91 ++++++++++++++++++++++++++++++ main.py | 1 + nodes.py | 1 + 3 files changed, 93 insertions(+) create mode 100644 comfy_extras/nodes_lora_extract.py diff --git a/comfy_extras/nodes_lora_extract.py b/comfy_extras/nodes_lora_extract.py new file mode 100644 index 000000000..dcb46f0e0 --- /dev/null +++ b/comfy_extras/nodes_lora_extract.py @@ -0,0 +1,91 @@ +import torch +import comfy.model_management +import comfy.utils +import folder_paths +import os +import logging + +CLAMP_QUANTILE = 0.99 + +def extract_lora(diff, rank): + conv2d = (len(diff.shape) == 4) + kernel_size = None if not conv2d else diff.size()[2:4] + conv2d_3x3 = conv2d and kernel_size != (1, 1) + out_dim, in_dim = diff.size()[0:2] + rank = min(rank, in_dim, out_dim) + + if conv2d: + if conv2d_3x3: + diff = diff.flatten(start_dim=1) + else: + diff = diff.squeeze() + + + U, S, Vh = torch.linalg.svd(diff.float()) + U = U[:, :rank] + S = S[:rank] + U = U @ torch.diag(S) + Vh = Vh[:rank, :] + + dist = torch.cat([U.flatten(), Vh.flatten()]) + hi_val = torch.quantile(dist, CLAMP_QUANTILE) + low_val = -hi_val + + U = U.clamp(low_val, hi_val) + Vh = Vh.clamp(low_val, hi_val) + if conv2d: + U = U.reshape(out_dim, rank, 1, 1) + Vh = Vh.reshape(rank, in_dim, kernel_size[0], kernel_size[1]) + return (U, Vh) + +class LoraSave: + def __init__(self): + self.output_dir = folder_paths.get_output_directory() + + @classmethod + def INPUT_TYPES(s): + return {"required": {"filename_prefix": ("STRING", {"default": "loras/ComfyUI_extracted_lora"}), + "rank": ("INT", {"default": 8, "min": 1, "max": 1024, "step": 1}), + }, + "optional": {"model_diff": ("MODEL",),}, + } + RETURN_TYPES = () + FUNCTION = "save" + OUTPUT_NODE = True + + CATEGORY = "_for_testing" + + def save(self, filename_prefix, rank, model_diff=None): + if model_diff is None: + return {} + + full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, self.output_dir) + + output_sd = {} + prefix_key = "diffusion_model." + stored = set() + + comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True) + sd = model_diff.model_state_dict(filter_prefix=prefix_key) + + for k in sd: + if k.endswith(".weight"): + weight_diff = sd[k] + if weight_diff.ndim < 2: + continue + try: + out = extract_lora(weight_diff, rank) + output_sd["{}.lora_up.weight".format(k[:-7])] = out[0].contiguous().half().cpu() + output_sd["{}.lora_down.weight".format(k[:-7])] = out[1].contiguous().half().cpu() + except: + logging.warning("Could not generate lora weights for key {}, is the weight difference a zero?".format(k)) + + output_checkpoint = f"{filename}_{counter:05}_.safetensors" + output_checkpoint = os.path.join(full_output_folder, output_checkpoint) + + comfy.utils.save_torch_file(output_sd, output_checkpoint, metadata=None) + return {} + +NODE_CLASS_MAPPINGS = { + "LoraSave": LoraSave +} diff --git a/main.py b/main.py index 3db28e1fd..d791a169c 100644 --- a/main.py +++ b/main.py @@ -247,6 +247,7 @@ if __name__ == "__main__": folder_paths.add_model_folder_path("clip", os.path.join(folder_paths.get_output_directory(), "clip")) folder_paths.add_model_folder_path("vae", os.path.join(folder_paths.get_output_directory(), "vae")) folder_paths.add_model_folder_path("diffusion_models", os.path.join(folder_paths.get_output_directory(), "diffusion_models")) + folder_paths.add_model_folder_path("loras", os.path.join(folder_paths.get_output_directory(), "loras")) if args.input_directory: input_dir = os.path.abspath(args.input_directory) diff --git a/nodes.py b/nodes.py index 707d86b63..bbe73282a 100644 --- a/nodes.py +++ b/nodes.py @@ -2101,6 +2101,7 @@ def init_builtin_extra_nodes(): "nodes_controlnet.py", "nodes_hunyuan.py", "nodes_flux.py", + "nodes_lora_extract.py", ] import_failed = [] From c7427375ee27f3a57d9d945d3217436ef43fb53f Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 4 Sep 2024 19:47:32 -0400 Subject: [PATCH 4/5] Prioritize freeing partially offloaded models first. --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 11683a905..77c05510e 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -426,7 +426,7 @@ def free_memory(memory_required, device, keep_loaded=[]): shift_model = current_loaded_models[i] if shift_model.device == device: if shift_model not in keep_loaded: - can_unload.append((sys.getrefcount(shift_model.model), shift_model.model_memory(), i)) + can_unload.append((-shift_model.model_offloaded_memory(), sys.getrefcount(shift_model.model), shift_model.model_memory(), i)) shift_model.currently_used = False for x in sorted(can_unload): From 5cbaa9e07c97296b536f240688f5a19300ecf30d Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 5 Sep 2024 00:04:52 -0400 Subject: [PATCH 5/5] Mistoline flux controlnet support. --- comfy/controlnet.py | 16 ++++-- comfy/ldm/flux/controlnet.py | 99 +++++++++++++++++++++++++++--------- 2 files changed, 85 insertions(+), 30 deletions(-) diff --git a/comfy/controlnet.py b/comfy/controlnet.py index d2d2cefaa..c0f9b6511 100644 --- a/comfy/controlnet.py +++ b/comfy/controlnet.py @@ -430,9 +430,9 @@ def load_controlnet_hunyuandit(controlnet_data): control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds, strength_type=StrengthType.CONSTANT) return control -def load_controlnet_flux_xlabs(sd): +def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False): model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd) - control_model = comfy.ldm.flux.controlnet.ControlNetFlux(operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config) + control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config) control_model = controlnet_load_state_dict(control_model, sd) extra_conds = ['y', 'guidance'] control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds) @@ -457,6 +457,10 @@ def load_controlnet_flux_instantx(sd): control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds) return control +def convert_mistoline(sd): + return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."}) + + def load_controlnet(ckpt_path, model=None): controlnet_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True) if 'after_proj_list.18.bias' in controlnet_data.keys(): #Hunyuan DiT @@ -518,13 +522,15 @@ def load_controlnet(ckpt_path, model=None): if len(leftover_keys) > 0: logging.warning("leftover keys: {}".format(leftover_keys)) controlnet_data = new_sd - elif "controlnet_blocks.0.weight" in controlnet_data: #SD3 diffusers format + elif "controlnet_blocks.0.weight" in controlnet_data: if "double_blocks.0.img_attn.norm.key_norm.scale" in controlnet_data: - return load_controlnet_flux_xlabs(controlnet_data) + return load_controlnet_flux_xlabs_mistoline(controlnet_data) elif "pos_embed_input.proj.weight" in controlnet_data: - return load_controlnet_mmdit(controlnet_data) + return load_controlnet_mmdit(controlnet_data) #SD3 diffusers controlnet elif "controlnet_x_embedder.weight" in controlnet_data: return load_controlnet_flux_instantx(controlnet_data) + elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux + return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True) pth_key = 'control_model.zero_convs.0.0.weight' pth = False diff --git a/comfy/ldm/flux/controlnet.py b/comfy/ldm/flux/controlnet.py index 2598e7172..d8b776129 100644 --- a/comfy/ldm/flux/controlnet.py +++ b/comfy/ldm/flux/controlnet.py @@ -1,4 +1,5 @@ #Original code can be found on: https://github.com/XLabs-AI/x-flux/blob/main/src/flux/controlnet.py +#modified to support different types of flux controlnets import torch import math @@ -12,22 +13,65 @@ from .layers import (DoubleStreamBlock, EmbedND, LastLayer, from .model import Flux import comfy.ldm.common_dit +class MistolineCondDownsamplBlock(nn.Module): + def __init__(self, dtype=None, device=None, operations=None): + super().__init__() + self.encoder = nn.Sequential( + operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) + ) + + def forward(self, x): + return self.encoder(x) + +class MistolineControlnetBlock(nn.Module): + def __init__(self, hidden_size, dtype=None, device=None, operations=None): + super().__init__() + self.linear = operations.Linear(hidden_size, hidden_size, dtype=dtype, device=device) + self.act = nn.SiLU() + + def forward(self, x): + return self.act(self.linear(x)) + class ControlNetFlux(Flux): - def __init__(self, latent_input=False, num_union_modes=0, image_model=None, dtype=None, device=None, operations=None, **kwargs): + def __init__(self, latent_input=False, num_union_modes=0, mistoline=False, image_model=None, dtype=None, device=None, operations=None, **kwargs): super().__init__(final_layer=False, dtype=dtype, device=device, operations=operations, **kwargs) self.main_model_double = 19 self.main_model_single = 38 + + self.mistoline = mistoline # add ControlNet blocks + if self.mistoline: + control_block = lambda : MistolineControlnetBlock(self.hidden_size, dtype=dtype, device=device, operations=operations) + else: + control_block = lambda : operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device) + self.controlnet_blocks = nn.ModuleList([]) for _ in range(self.params.depth): - controlnet_block = operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device) - self.controlnet_blocks.append(controlnet_block) + self.controlnet_blocks.append(control_block()) self.controlnet_single_blocks = nn.ModuleList([]) for _ in range(self.params.depth_single_blocks): - self.controlnet_single_blocks.append(operations.Linear(self.hidden_size, self.hidden_size, dtype=dtype, device=device)) + self.controlnet_single_blocks.append(control_block()) self.num_union_modes = num_union_modes self.controlnet_mode_embedder = None @@ -38,23 +82,26 @@ class ControlNetFlux(Flux): self.latent_input = latent_input self.pos_embed_input = operations.Linear(self.in_channels, self.hidden_size, bias=True, dtype=dtype, device=device) if not self.latent_input: - self.input_hint_block = nn.Sequential( - operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), - nn.SiLU(), - operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) - ) + if self.mistoline: + self.input_cond_block = MistolineCondDownsamplBlock(dtype=dtype, device=device, operations=operations) + else: + self.input_hint_block = nn.Sequential( + operations.Conv2d(3, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, stride=2, dtype=dtype, device=device), + nn.SiLU(), + operations.Conv2d(16, 16, 3, padding=1, dtype=dtype, device=device) + ) def forward_orig( self, @@ -73,9 +120,6 @@ class ControlNetFlux(Flux): # running on sequences img img = self.img_in(img) - if not self.latent_input: - controlnet_cond = self.input_hint_block(controlnet_cond) - controlnet_cond = rearrange(controlnet_cond, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2) controlnet_cond = self.pos_embed_input(controlnet_cond) img = img + controlnet_cond @@ -131,9 +175,14 @@ class ControlNetFlux(Flux): patch_size = 2 if self.latent_input: hint = comfy.ldm.common_dit.pad_to_patch_size(hint, (patch_size, patch_size)) - hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) + elif self.mistoline: + hint = hint * 2.0 - 1.0 + hint = self.input_cond_block(hint) else: hint = hint * 2.0 - 1.0 + hint = self.input_hint_block(hint) + + hint = rearrange(hint, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=patch_size, pw=patch_size) bs, c, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, (patch_size, patch_size))