Merge remote-tracking branch 'upstream/master' into addBatchIndex

2026-03-08 02:37:42 +08:00 · 2023-06-11 18:05:31 +02:00 · 2023-06-11 18:05:31 +02:00 · 2759982b37
commit 2759982b37
parent b947c92121 c64ca8c0b2
16 changed files with 421 additions and 249 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,3 +9,8 @@ custom_nodes/
 !custom_nodes/example_node.py.example
 extra_model_paths.yaml
 /.vs
+.idea/
+venv/
+web/extensions/*
+!web/extensions/logging.js.example
+!web/extensions/core/
--- a/README.md
+++ b/README.md
@ -38,28 +38,28 @@ Workflow examples can be found on the [Examples page](https://comfyanonymous.git

 ## Shortcuts

-| Keybind | Explanation |
-| - | - |
-| Ctrl + Enter | Queue up current graph for generation |
-| Ctrl + Shift + Enter | Queue up current graph as first for generation |
-| Ctrl + S | Save workflow |
-| Ctrl + O | Load workflow |
-| Ctrl + A | Select all nodes |
-| Ctrl + M | Mute/unmute selected nodes |
-| Delete/Backspace | Delete selected nodes |
-| Ctrl + Delete/Backspace | Delete the current graph |
-| Space | Move the canvas around when held and moving the cursor |
-| Ctrl/Shift + Click | Add clicked node to selection |
-| Ctrl + C/Ctrl + V | Copy and paste selected nodes (without maintaining connections to outputs of unselected nodes) |
-| Ctrl + C/Ctrl + Shift + V| Copy and paste selected nodes (maintaining connections from outputs of unselected nodes to inputs of pasted nodes) |
-| Shift + Drag | Move multiple selected nodes at the same time |
-| Ctrl + D | Load default graph |
-| Q | Toggle visibility of the queue |
-| H | Toggle visibility of history |
-| R | Refresh graph |
-| Double-Click LMB | Open node quick search palette |
+| Keybind                   | Explanation                                                                                                        |
+|---------------------------|--------------------------------------------------------------------------------------------------------------------|
+| Ctrl + Enter              | Queue up current graph for generation                                                                              |
+| Ctrl + Shift + Enter      | Queue up current graph as first for generation                                                                     |
+| Ctrl + S                  | Save workflow                                                                                                      |
+| Ctrl + O                  | Load workflow                                                                                                      |
+| Ctrl + A                  | Select all nodes                                                                                                   |
+| Ctrl + M                  | Mute/unmute selected nodes                                                                                         |
+| Delete/Backspace          | Delete selected nodes                                                                                              |
+| Ctrl + Delete/Backspace   | Delete the current graph                                                                                           |
+| Space                     | Move the canvas around when held and moving the cursor                                                             |
+| Ctrl/Shift + Click        | Add clicked node to selection                                                                                      |
+| Ctrl + C/Ctrl + V         | Copy and paste selected nodes (without maintaining connections to outputs of unselected nodes)                     |
+| Ctrl + C/Ctrl + Shift + V | Copy and paste selected nodes (maintaining connections from outputs of unselected nodes to inputs of pasted nodes) |
+| Shift + Drag              | Move multiple selected nodes at the same time                                                                      |
+| Ctrl + D                  | Load default graph                                                                                                 |
+| Q                         | Toggle visibility of the queue                                                                                     |
+| H                         | Toggle visibility of history                                                                                       |
+| R                         | Refresh graph                                                                                                      |
+| Double-Click LMB          | Open node quick search palette                                                                                     |

-Ctrl can also be replaced with Cmd instead for MacOS users
+Ctrl can also be replaced with Cmd instead for macOS users

 # Installing

@ -119,13 +119,26 @@ After this you should have everything installed and can proceed to running Comfy

 ### Others:

-[Intel Arc](https://github.com/comfyanonymous/ComfyUI/discussions/476)
+#### [Intel Arc](https://github.com/comfyanonymous/ComfyUI/discussions/476)

-Mac/MPS: There is basic support in the code but until someone makes some install instruction you are on your own.
+#### Apple Mac silicon
+
+You can install ComfyUI in Apple Mac silicon (M1 or M2) with any recent macOS version.
+
+1. Install pytorch. For instructions, read the [Accelerated PyTorch training on Mac](https://developer.apple.com/metal/pytorch/) Apple Developer guide.
+1. Follow the [ComfyUI manual installation](#manual-install-windows-linux) instructions for Windows and Linux.
+1. Install the ComfyUI [dependencies](#dependencies). If you have another Stable Diffusion UI [you might be able to reuse the dependencies](#i-already-have-another-ui-for-stable-diffusion-installed-do-i-really-have-to-install-all-of-these-dependencies).
+1. Launch ComfyUI by running `python main.py`.
+
+> **Note**: Remember to add your models, VAE, LoRAs etc. to the corresponding Comfy folders, as discussed in [ComfyUI manual installation](#manual-install-windows-linux).
+
+#### DirectML (AMD Cards on Windows)
+
+```pip install torch-directml``` Then you can launch ComfyUI with: ```python main.py --directml```

 ### I already have another UI for Stable Diffusion installed do I really have to install all of these dependencies?

-You don't. If you have another UI installed and working with it's own python venv you can use that venv to run ComfyUI. You can open up your favorite terminal and activate it:
+You don't. If you have another UI installed and working with its own python venv you can use that venv to run ComfyUI. You can open up your favorite terminal and activate it:

 ```source path_to_other_sd_gui/venv/bin/activate```

@ -135,7 +148,7 @@ With Powershell: ```"path_to_other_sd_gui\venv\Scripts\Activate.ps1"```

 With cmd.exe: ```"path_to_other_sd_gui\venv\Scripts\activate.bat"```

-And then you can use that terminal to run Comfyui without installing any dependencies. Note that the venv folder might be called something else depending on the SD UI.
+And then you can use that terminal to run ComfyUI without installing any dependencies. Note that the venv folder might be called something else depending on the SD UI.

 # Running

@ -159,6 +172,8 @@ You can use () to change emphasis of a word or phrase like: (good code:1.2) or (

 You can use {day|night}, for wildcard/dynamic prompts. With this syntax "{wild|card|test}" will be randomly replaced by either "wild", "card" or "test" by the frontend every time you queue the prompt. To use {} characters in your actual prompt escape them like: \\{ or \\}.

+Dynamic prompts also support C-style comments, like `// comment` or `/* comment */`.
+
 To use a textual inversion concepts/embeddings in a text prompt put them in the models/embeddings directory and use them in the CLIPTextEncode node like this (you can omit the .pt extension):

 ```embedding:embedding_filename.pt```
--- a/comfy/diffusers_load.py
+++ b/comfy/diffusers_load.py
@ -4,7 +4,7 @@ import yaml

 import folder_paths
 from comfy.ldm.util import instantiate_from_config
-from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE
+from comfy.sd import ModelPatcher, load_model_weights, CLIP, VAE, load_checkpoint
 import os.path as osp
 import re
 import torch
@ -84,28 +84,4 @@ def load_diffusers(model_path, fp16=True, output_vae=True, output_clip=True, emb
    # Put together new checkpoint
    sd = {**unet_state_dict, **vae_state_dict, **text_enc_dict}

-    clip = None
-    vae = None
-
-    class WeightsLoader(torch.nn.Module):
-        pass
-
-    w = WeightsLoader()
-    load_state_dict_to = []
-    if output_vae:
-        vae = VAE(scale_factor=scale_factor, config=vae_config)
-        w.first_stage_model = vae.first_stage_model
-        load_state_dict_to = [w]
-
-    if output_clip:
-        clip = CLIP(config=clip_config, embedding_directory=embedding_directory)
-        w.cond_stage_model = clip.cond_stage_model
-        load_state_dict_to = [w]
-
-    model = instantiate_from_config(config["model"])
-    model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to)
-
-    if fp16:
-        model = model.half()
-
-    return ModelPatcher(model), clip, vae
+    return load_checkpoint(embedding_directory=embedding_directory, state_dict=sd, config=config)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -0,0 +1,97 @@
+import torch
+from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel
+from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
+from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule
+import numpy as np
+
+class BaseModel(torch.nn.Module):
+    def __init__(self, unet_config, v_prediction=False):
+        super().__init__()
+
+        self.register_schedule(given_betas=None, beta_schedule="linear", timesteps=1000, linear_start=0.00085, linear_end=0.012, cosine_s=8e-3)
+        self.diffusion_model = UNetModel(**unet_config)
+        self.v_prediction = v_prediction
+        if self.v_prediction:
+            self.parameterization = "v"
+        else:
+            self.parameterization = "eps"
+        if "adm_in_channels" in unet_config:
+            self.adm_channels = unet_config["adm_in_channels"]
+        else:
+            self.adm_channels = 0
+        print("v_prediction", v_prediction)
+        print("adm", self.adm_channels)
+
+    def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+        if given_betas is not None:
+            betas = given_betas
+        else:
+            betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
+        alphas = 1. - betas
+        alphas_cumprod = np.cumprod(alphas, axis=0)
+        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
+
+        timesteps, = betas.shape
+        self.num_timesteps = int(timesteps)
+        self.linear_start = linear_start
+        self.linear_end = linear_end
+
+        self.register_buffer('betas', torch.tensor(betas, dtype=torch.float32))
+        self.register_buffer('alphas_cumprod', torch.tensor(alphas_cumprod, dtype=torch.float32))
+        self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32))
+
+    def apply_model(self, x, t, c_concat=None, c_crossattn=None, c_adm=None, control=None, transformer_options={}):
+        if c_concat is not None:
+            xc = torch.cat([x] + c_concat, dim=1)
+        else:
+            xc = x
+        context = torch.cat(c_crossattn, 1)
+        return self.diffusion_model(xc, t, context=context, y=c_adm, control=control, transformer_options=transformer_options)
+
+    def get_dtype(self):
+        return self.diffusion_model.dtype
+
+    def is_adm(self):
+        return self.adm_channels > 0
+
+class SD21UNCLIP(BaseModel):
+    def __init__(self, unet_config, noise_aug_config, v_prediction=True):
+        super().__init__(unet_config, v_prediction)
+        self.noise_augmentor = CLIPEmbeddingNoiseAugmentation(**noise_aug_config)
+
+    def encode_adm(self, **kwargs):
+        unclip_conditioning = kwargs.get("unclip_conditioning", None)
+        device = kwargs["device"]
+
+        if unclip_conditioning is not None:
+            adm_inputs = []
+            weights = []
+            noise_aug = []
+            for unclip_cond in unclip_conditioning:
+                adm_cond = unclip_cond["clip_vision_output"].image_embeds
+                weight = unclip_cond["strength"]
+                noise_augment = unclip_cond["noise_augmentation"]
+                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
+                c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
+                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
+                weights.append(weight)
+                noise_aug.append(noise_augment)
+                adm_inputs.append(adm_out)
+
+            if len(noise_aug) > 1:
+                adm_out = torch.stack(adm_inputs).sum(0)
+                #TODO: add a way to control this
+                noise_augment = 0.05
+                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
+                c_adm, noise_level_emb = self.noise_augmentor(adm_out[:, :self.noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
+                adm_out = torch.cat((c_adm, noise_level_emb), 1)
+        else:
+            adm_out = torch.zeros((1, self.adm_channels))
+
+        return adm_out
+
+class SDInpaint(BaseModel):
+    def __init__(self, unet_config, v_prediction=False):
+        super().__init__(unet_config, v_prediction)
+        self.concat_keys = ("mask", "masked_image")
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -248,7 +248,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con

                c['transformer_options'] = transformer_options

-                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
+                output = model_function(input_x, timestep_, **c).chunk(batch_chunks)
                del input_x

                model_management.throw_exception_if_processing_interrupted()
@ -460,36 +460,18 @@ def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
            uncond[temp[1]] = [o[0], n]


-def encode_adm(noise_augmentor, conds, batch_size, device):
+def encode_adm(model, conds, batch_size, device):
    for t in range(len(conds)):
        x = conds[t]
+        adm_out = None
        if 'adm' in x[1]:
-            adm_inputs = []
-            weights = []
-            noise_aug = []
-            adm_in = x[1]["adm"]
-            for adm_c in adm_in:
-                adm_cond = adm_c[0].image_embeds
-                weight = adm_c[1]
-                noise_augment = adm_c[2]
-                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
-                c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
-                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
-                weights.append(weight)
-                noise_aug.append(noise_augment)
-                adm_inputs.append(adm_out)
-
-            if len(noise_aug) > 1:
-                adm_out = torch.stack(adm_inputs).sum(0)
-                #TODO: add a way to control this
-                noise_augment = 0.05
-                noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
-                c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
-                adm_out = torch.cat((c_adm, noise_level_emb), 1)
+            adm_out = x[1]["adm"]
        else:
-            adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
-        x[1] = x[1].copy()
-        x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size)
+            params = x[1].copy()
+            adm_out = model.encode_adm(device=device, **params)
+        if adm_out is not None:
+            x[1] = x[1].copy()
+            x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size).to(device)

    return conds

@ -591,14 +573,14 @@ class KSampler:
        apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x])
        apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])

-        if self.model.model.diffusion_model.dtype == torch.float16:
+        if self.model.get_dtype() == torch.float16:
            precision_scope = torch.autocast
        else:
            precision_scope = contextlib.nullcontext

-        if hasattr(self.model, 'noise_augmentor'): #unclip
-            positive = encode_adm(self.model.noise_augmentor, positive, noise.shape[0], self.device)
-            negative = encode_adm(self.model.noise_augmentor, negative, noise.shape[0], self.device)
+        if self.model.is_adm():
+            positive = encode_adm(self.model, positive, noise.shape[0], self.device)
+            negative = encode_adm(self.model, negative, noise.shape[0], self.device)

        extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": self.model_options}

--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -15,8 +15,15 @@ from . import utils
 from . import clip_vision
 from . import gligen
 from . import diffusers_convert
+from . import model_base

 def load_model_weights(model, sd, verbose=False, load_state_dict_to=[]):
+    replace_prefix = {"model.diffusion_model.": "diffusion_model."}
+    for rp in replace_prefix:
+        replace = list(map(lambda a: (a, "{}{}".format(replace_prefix[rp], a[len(rp):])), filter(lambda a: a.startswith(rp), sd.keys())))
+        for x in replace:
+            sd[x[1]] = sd.pop(x[0])
+
    m, u = model.load_state_dict(sd, strict=False)

    k = list(sd.keys())
@ -31,17 +38,6 @@ def load_model_weights(model, sd, verbose=False, load_state_dict_to=[]):
        if ids.dtype == torch.float32:
            sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round()

-    keys_to_replace = {
-        "cond_stage_model.model.positional_embedding": "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight",
-        "cond_stage_model.model.token_embedding.weight": "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight",
-        "cond_stage_model.model.ln_final.weight": "cond_stage_model.transformer.text_model.final_layer_norm.weight",
-        "cond_stage_model.model.ln_final.bias": "cond_stage_model.transformer.text_model.final_layer_norm.bias",
-    }
-
-    for x in keys_to_replace:
-        if x in sd:
-            sd[keys_to_replace[x]] = sd.pop(x)
-
    sd = utils.transformers_convert(sd, "cond_stage_model.model", "cond_stage_model.transformer.text_model", 24)

    for x in load_state_dict_to:
@ -193,7 +189,7 @@ def model_lora_keys(model, key_map={}):

    counter = 0
    for b in range(12):
-        tk = "model.diffusion_model.input_blocks.{}.1".format(b)
+        tk = "diffusion_model.input_blocks.{}.1".format(b)
        up_counter = 0
        for c in LORA_UNET_MAP_ATTENTIONS:
            k = "{}.{}.weight".format(tk, c)
@ -204,13 +200,13 @@ def model_lora_keys(model, key_map={}):
        if up_counter >= 4:
            counter += 1
    for c in LORA_UNET_MAP_ATTENTIONS:
-        k = "model.diffusion_model.middle_block.1.{}.weight".format(c)
+        k = "diffusion_model.middle_block.1.{}.weight".format(c)
        if k in sdk:
            lora_key = "lora_unet_mid_block_attentions_0_{}".format(LORA_UNET_MAP_ATTENTIONS[c])
            key_map[lora_key] = k
    counter = 3
    for b in range(12):
-        tk = "model.diffusion_model.output_blocks.{}.1".format(b)
+        tk = "diffusion_model.output_blocks.{}.1".format(b)
        up_counter = 0
        for c in LORA_UNET_MAP_ATTENTIONS:
            k = "{}.{}.weight".format(tk, c)
@ -234,7 +230,7 @@ def model_lora_keys(model, key_map={}):
    ds_counter = 0
    counter = 0
    for b in range(12):
-        tk = "model.diffusion_model.input_blocks.{}.0".format(b)
+        tk = "diffusion_model.input_blocks.{}.0".format(b)
        key_in = False
        for c in LORA_UNET_MAP_RESNET:
            k = "{}.{}.weight".format(tk, c)
@ -253,7 +249,7 @@ def model_lora_keys(model, key_map={}):

    counter = 0
    for b in range(3):
-        tk = "model.diffusion_model.middle_block.{}".format(b)
+        tk = "diffusion_model.middle_block.{}".format(b)
        key_in = False
        for c in LORA_UNET_MAP_RESNET:
            k = "{}.{}.weight".format(tk, c)
@ -267,7 +263,7 @@ def model_lora_keys(model, key_map={}):
    counter = 0
    us_counter = 0
    for b in range(12):
-        tk = "model.diffusion_model.output_blocks.{}.0".format(b)
+        tk = "diffusion_model.output_blocks.{}.0".format(b)
        key_in = False
        for c in LORA_UNET_MAP_RESNET:
            k = "{}.{}.weight".format(tk, c)
@ -343,7 +339,7 @@ class ModelPatcher:
                        patch_list[i] = patch_list[i].to(device)

    def model_dtype(self):
-        return self.model.diffusion_model.dtype
+        return self.model.get_dtype()

    def add_patches(self, patches, strength=1.0):
        p = {}
@ -775,7 +771,7 @@ def load_controlnet(ckpt_path, model=None):
                for x in controlnet_data:
                    c_m = "control_model."
                    if x.startswith(c_m):
-                        sd_key = "model.diffusion_model.{}".format(x[len(c_m):])
+                        sd_key = "diffusion_model.{}".format(x[len(c_m):])
                        if sd_key in model_sd:
                            cd = controlnet_data[x]
                            cd += model_sd[sd_key].type(cd.dtype).to(cd.device)
@ -942,9 +938,10 @@ def load_gligen(ckpt_path):
        model = model.half()
    return model

-def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=None):
-    with open(config_path, 'r') as stream:
-        config = yaml.safe_load(stream)
+def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
+    if config is None:
+        with open(config_path, 'r') as stream:
+            config = yaml.safe_load(stream)
    model_config_params = config['model']['params']
    clip_config = model_config_params['cond_stage_config']
    scale_factor = model_config_params['scale_factor']
@ -953,8 +950,19 @@ def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, e
    fp16 = False
    if "unet_config" in model_config_params:
        if "params" in model_config_params["unet_config"]:
-            if "use_fp16" in model_config_params["unet_config"]["params"]:
-                fp16 = model_config_params["unet_config"]["params"]["use_fp16"]
+            unet_config = model_config_params["unet_config"]["params"]
+            if "use_fp16" in unet_config:
+                fp16 = unet_config["use_fp16"]
+
+    noise_aug_config = None
+    if "noise_aug_config" in model_config_params:
+        noise_aug_config = model_config_params["noise_aug_config"]
+
+    v_prediction = False
+
+    if "parameterization" in model_config_params:
+        if model_config_params["parameterization"] == "v":
+            v_prediction = True

    clip = None
    vae = None
@ -974,9 +982,16 @@ def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, e
        w.cond_stage_model = clip.cond_stage_model
        load_state_dict_to = [w]

-    model = instantiate_from_config(config["model"])
-    sd = utils.load_torch_file(ckpt_path)
-    model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to)
+    if config['model']["target"].endswith("LatentInpaintDiffusion"):
+        model = model_base.SDInpaint(unet_config, v_prediction=v_prediction)
+    elif config['model']["target"].endswith("ImageEmbeddingConditionedLatentDiffusion"):
+        model = model_base.SD21UNCLIP(unet_config, noise_aug_config["params"], v_prediction=v_prediction)
+    else:
+        model = model_base.BaseModel(unet_config, v_prediction=v_prediction)
+
+    if state_dict is None:
+        state_dict = utils.load_torch_file(ckpt_path)
+    model = load_model_weights(model, state_dict, verbose=False, load_state_dict_to=load_state_dict_to)

    if fp16:
        model = model.half()
@ -1073,47 +1088,59 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
        "legacy": False
    }

-    if len(sd['model.diffusion_model.input_blocks.1.1.proj_in.weight'].shape) == 2:
+    if len(sd['model.diffusion_model.input_blocks.4.1.proj_in.weight'].shape) == 2:
        unet_config['use_linear_in_transformer'] = True

    unet_config["use_fp16"] = fp16
    unet_config["model_channels"] = sd['model.diffusion_model.input_blocks.0.0.weight'].shape[0]
    unet_config["in_channels"] = sd['model.diffusion_model.input_blocks.0.0.weight'].shape[1]
-    unet_config["context_dim"] = sd['model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight'].shape[1]
+    unet_config["context_dim"] = sd['model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight'].shape[1]

    sd_config["unet_config"] = {"target": "comfy.ldm.modules.diffusionmodules.openaimodel.UNetModel", "params": unet_config}
    model_config = {"target": "comfy.ldm.models.diffusion.ddpm.LatentDiffusion", "params": sd_config}

+    unclip_model = False
+    inpaint_model = False
    if noise_aug_config is not None: #SD2.x unclip model
        sd_config["noise_aug_config"] = noise_aug_config
        sd_config["image_size"] = 96
        sd_config["embedding_dropout"] = 0.25
        sd_config["conditioning_key"] = 'crossattn-adm'
+        unclip_model = True
        model_config["target"] = "comfy.ldm.models.diffusion.ddpm.ImageEmbeddingConditionedLatentDiffusion"
    elif unet_config["in_channels"] > 4: #inpainting model
        sd_config["conditioning_key"] = "hybrid"
        sd_config["finetune_keys"] = None
        model_config["target"] = "comfy.ldm.models.diffusion.ddpm.LatentInpaintDiffusion"
+        inpaint_model = True
    else:
        sd_config["conditioning_key"] = "crossattn"

-    if unet_config["context_dim"] == 1024:
-        unet_config["num_head_channels"] = 64 #SD2.x
-    else:
+    if unet_config["context_dim"] == 768:
        unet_config["num_heads"] = 8 #SD1.x
+    else:
+        unet_config["num_head_channels"] = 64 #SD2.x

    unclip = 'model.diffusion_model.label_emb.0.0.weight'
    if unclip in sd_keys:
        unet_config["num_classes"] = "sequential"
        unet_config["adm_in_channels"] = sd[unclip].shape[1]

+    v_prediction = False
    if unet_config["context_dim"] == 1024 and unet_config["in_channels"] == 4: #only SD2.x non inpainting models are v prediction
        k = "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias"
        out = sd[k]
        if torch.std(out, unbiased=False) > 0.09: # not sure how well this will actually work. I guess we will find out.
+            v_prediction = True
            sd_config["parameterization"] = 'v'

-    model = instantiate_from_config(model_config)
+    if inpaint_model:
+        model = model_base.SDInpaint(unet_config, v_prediction=v_prediction)
+    elif unclip_model:
+        model = model_base.SD21UNCLIP(unet_config, noise_aug_config["params"], v_prediction=v_prediction)
+    else:
+        model = model_base.BaseModel(unet_config, v_prediction=v_prediction)
+
    model = load_model_weights(model, sd, verbose=False, load_state_dict_to=load_state_dict_to)

    if fp16:
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@ -82,6 +82,8 @@ class SD1ClipModel(torch.nn.Module, ClipTokenWeightEncoder):
                        next_new_token += 1
                    else:
                        print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored", y.shape[0], current_embeds.weight.shape[1])
+            while len(tokens_temp) < len(x):
+                tokens_temp += [self.empty_tokens[0][-1]]
            out_tokens += [tokens_temp]

        if len(embedding_weights) > 0:
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -24,6 +24,18 @@ def load_torch_file(ckpt, safe_load=False):
    return sd

 def transformers_convert(sd, prefix_from, prefix_to, number):
+    keys_to_replace = {
+        "{}.positional_embedding": "{}.embeddings.position_embedding.weight",
+        "{}.token_embedding.weight": "{}.embeddings.token_embedding.weight",
+        "{}.ln_final.weight": "{}.final_layer_norm.weight",
+        "{}.ln_final.bias": "{}.final_layer_norm.bias",
+    }
+
+    for k in keys_to_replace:
+        x = k.format(prefix_from)
+        if x in sd:
+            sd[keys_to_replace[k].format(prefix_to)] = sd.pop(x)
+
    resblock_to_replace = {
        "ln_1": "layer_norm1",
        "ln_2": "layer_norm2",
--- a/main.py
+++ b/main.py
@ -37,21 +37,25 @@ def prompt_worker(q, server):
        e.execute(item[2], item[1], item[3], item[4])
        q.task_done(item_id, e.outputs_ui)

+
 async def run(server, address='', port=8188, verbose=True, call_on_start=None):
    await asyncio.gather(server.start(address, port, verbose, call_on_start), server.publish_loop())

+
 def hijack_progress(server):
    def hook(value, total, preview_image_bytes):
-        server.send_sync("progress", { "value": value, "max": total}, server.client_id)
+        server.send_sync("progress", {"value": value, "max": total}, server.client_id)
        if preview_image_bytes is not None:
            server.send_sync(BinaryEventTypes.PREVIEW_IMAGE, preview_image_bytes, server.client_id)
    comfy.utils.set_progress_bar_global_hook(hook)

+
 def cleanup_temp():
    temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp")
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir, ignore_errors=True)

+
 def load_extra_path_config(yaml_path):
    with open(yaml_path, 'r') as stream:
        config = yaml.safe_load(stream)
@ -72,6 +76,7 @@ def load_extra_path_config(yaml_path):
                print("Adding extra search path", x, full_path)
                folder_paths.add_model_folder_path(x, full_path)

+
 if __name__ == "__main__":
    cleanup_temp()

@ -92,7 +97,7 @@ if __name__ == "__main__":
    server.add_routes()
    hijack_progress(server)

-    threading.Thread(target=prompt_worker, daemon=True, args=(q,server,)).start()
+    threading.Thread(target=prompt_worker, daemon=True, args=(q, server,)).start()

    if args.output_directory:
        output_dir = os.path.abspath(args.output_directory)
@ -106,15 +111,12 @@ if __name__ == "__main__":
    if args.auto_launch:
        def startup_server(address, port):
            import webbrowser
-            webbrowser.open("http://{}:{}".format(address, port))
+            webbrowser.open(f"http://{address}:{port}")
        call_on_start = startup_server

-    if os.name == "nt":
-        try:
-            loop.run_until_complete(run(server, address=args.listen, port=args.port, verbose=not args.dont_print_server, call_on_start=call_on_start))
-        except KeyboardInterrupt:
-            pass
-    else:
+    try:
        loop.run_until_complete(run(server, address=args.listen, port=args.port, verbose=not args.dont_print_server, call_on_start=call_on_start))
+    except KeyboardInterrupt:
+        print("\nStopped server")

    cleanup_temp()
--- a/nodes.py
+++ b/nodes.py
@ -623,11 +623,11 @@ class unCLIPConditioning:
        c = []
        for t in conditioning:
            o = t[1].copy()
-            x = (clip_vision_output, strength, noise_augmentation)
-            if "adm" in o:
-                o["adm"] = o["adm"][:] + [x]
+            x = {"clip_vision_output": clip_vision_output, "strength": strength, "noise_augmentation": noise_augmentation}
+            if "unclip_conditioning" in o:
+                o["unclip_conditioning"] = o["unclip_conditioning"][:] + [x]
            else:
-                o["adm"] = [x]
+                o["unclip_conditioning"] = [x]
            n = [t[0], o]
            c.append(n)
        return (c, )
--- a/web/extensions/core/contextMenuFilter.js
+++ b/web/extensions/core/contextMenuFilter.js
@ -1,132 +1,138 @@
-import { app } from "/scripts/app.js";
+import {app} from "/scripts/app.js";

 // Adds filtering to combo context menus

-const id = "Comfy.ContextMenuFilter";
-app.registerExtension({
-	name: id,
+const ext = {
+	name: "Comfy.ContextMenuFilter",
 	init() {
 		const ctxMenu = LiteGraph.ContextMenu;
+
 		LiteGraph.ContextMenu = function (values, options) {
 			const ctx = ctxMenu.call(this, values, options);

 			// If we are a dark menu (only used for combo boxes) then add a filter input
 			if (options?.className === "dark" && values?.length > 10) {
 				const filter = document.createElement("input");
-				Object.assign(filter.style, {
-					width: "calc(100% - 10px)",
-					border: "0",
-					boxSizing: "border-box",
-					background: "#333",
-					border: "1px solid #999",
-					margin: "0 0 5px 5px",
-					color: "#fff",
-				});
+				filter.classList.add("comfy-context-menu-filter");
 				filter.placeholder = "Filter list";
 				this.root.prepend(filter);

-				let selectedIndex = 0;
-				let items = this.root.querySelectorAll(".litemenu-entry");
-				let itemCount = items.length;
-				let selectedItem;
+				const items = Array.from(this.root.querySelectorAll(".litemenu-entry"));
+				let displayedItems = [...items];
+				let itemCount = displayedItems.length;

-				// Apply highlighting to the selected item
-				function updateSelected() {
-					if (selectedItem) {
-						selectedItem.style.setProperty("background-color", "");
-						selectedItem.style.setProperty("color", "");
-					}
-					selectedItem = items[selectedIndex];
-					if (selectedItem) {
-						selectedItem.style.setProperty("background-color", "#ccc", "important");
-						selectedItem.style.setProperty("color", "#000", "important");
-					}
-				}
+				// We must request an animation frame for the current node of the active canvas to update.
+				requestAnimationFrame(() => {
+					const currentNode = LGraphCanvas.active_canvas.current_node;
+					const clickedComboValue = currentNode.widgets
+						.filter(w => w.type === "combo" && w.options.values.length === values.length)
+						.find(w => w.options.values.every((v, i) => v === values[i]))
+						.value;

-				const positionList = () => {
-					const rect = this.root.getBoundingClientRect();
-
-					// If the top is off screen then shift the element with scaling applied
-					if (rect.top < 0) {
-						const scale = 1 - this.root.getBoundingClientRect().height / this.root.clientHeight;
-						const shift = (this.root.clientHeight * scale) / 2;
-						this.root.style.top = -shift + "px";
-					}
-				}
-
-				updateSelected();
-
-				// Arrow up/down to select items
-				filter.addEventListener("keydown", (e) => {
-					if (e.key === "ArrowUp") {
-						if (selectedIndex === 0) {
-							selectedIndex = itemCount - 1;
-						} else {
-							selectedIndex--;
-						}
-						updateSelected();
-						e.preventDefault();
-					} else if (e.key === "ArrowDown") {
-						if (selectedIndex === itemCount - 1) {
-							selectedIndex = 0;
-						} else {
-							selectedIndex++;
-						}
-						updateSelected();
-						e.preventDefault();
-					} else if ((selectedItem && e.key === "Enter") || e.keyCode === 13 || e.keyCode === 10) {
-						selectedItem.click();
-					} else if(e.key === "Escape") {
-						this.close();
-					}
-				});
-
-				filter.addEventListener("input", () => {
-					// Hide all items that dont match our filter
-					const term = filter.value.toLocaleLowerCase();
-					items = this.root.querySelectorAll(".litemenu-entry");
-					// When filtering recompute which items are visible for arrow up/down
-					// Try and maintain selection
-					let visibleItems = [];
-					for (const item of items) {
-						const visible = !term || item.textContent.toLocaleLowerCase().includes(term);
-						if (visible) {
-							item.style.display = "block";
-							if (item === selectedItem) {
-								selectedIndex = visibleItems.length;
-							}
-							visibleItems.push(item);
-						} else {
-							item.style.display = "none";
-							if (item === selectedItem) {
-								selectedIndex = 0;
-							}
-						}
-					}
-					items = visibleItems;
+					let selectedIndex = values.findIndex(v => v === clickedComboValue);
+					let selectedItem = displayedItems?.[selectedIndex];
 					updateSelected();

-					// If we have an event then we can try and position the list under the source
-					if (options.event) {
-						let top = options.event.clientY - 10;
-
-						const bodyRect = document.body.getBoundingClientRect();
-						const rootRect = this.root.getBoundingClientRect();
-						if (bodyRect.height && top > bodyRect.height - rootRect.height - 10) {
-							top = Math.max(0, bodyRect.height - rootRect.height - 10);
-						}
-
-						this.root.style.top = top + "px";
-						positionList();
+					// Apply highlighting to the selected item
+					function updateSelected() {
+						selectedItem?.style.setProperty("background-color", "");
+						selectedItem?.style.setProperty("color", "");
+						selectedItem = displayedItems[selectedIndex];
+						selectedItem?.style.setProperty("background-color", "#ccc", "important");
+						selectedItem?.style.setProperty("color", "#000", "important");
 					}
-				});

-				requestAnimationFrame(() => {
-					// Focus the filter box when opening
-					filter.focus();
+					const positionList = () => {
+						const rect = this.root.getBoundingClientRect();

-					positionList();
-				});
+						// If the top is off-screen then shift the element with scaling applied
+						if (rect.top < 0) {
+							const scale = 1 - this.root.getBoundingClientRect().height / this.root.clientHeight;
+							const shift = (this.root.clientHeight * scale) / 2;
+							this.root.style.top = -shift + "px";
+						}
+					}
+
+					// Arrow up/down to select items
+					filter.addEventListener("keydown", (event) => {
+						switch (event.key) {
+							case "ArrowUp":
+								event.preventDefault();
+								if (selectedIndex === 0) {
+									selectedIndex = itemCount - 1;
+								} else {
+									selectedIndex--;
+								}
+								updateSelected();
+								break;
+							case "ArrowRight":
+								event.preventDefault();
+								selectedIndex = itemCount - 1;
+								updateSelected();
+								break;
+							case "ArrowDown":
+								event.preventDefault();
+								if (selectedIndex === itemCount - 1) {
+									selectedIndex = 0;
+								} else {
+									selectedIndex++;
+								}
+								updateSelected();
+								break;
+							case "ArrowLeft":
+								event.preventDefault();
+								selectedIndex = 0;
+								updateSelected();
+								break;
+							case "Enter":
+								selectedItem?.click();
+								break;
+							case "Escape":
+								this.close();
+								break;
+						}
+					});
+
+					filter.addEventListener("input", () => {
+						// Hide all items that don't match our filter
+						const term = filter.value.toLocaleLowerCase();
+						// When filtering, recompute which items are visible for arrow up/down and maintain selection.
+						displayedItems = items.filter(item => {
+							const isVisible = !term || item.textContent.toLocaleLowerCase().includes(term);
+							item.style.display = isVisible ? "block" : "none";
+							return isVisible;
+						});
+
+						selectedIndex = 0;
+						if (displayedItems.includes(selectedItem)) {
+							selectedIndex = displayedItems.findIndex(d => d === selectedItem);
+						}
+						itemCount = displayedItems.length;
+
+						updateSelected();
+
+						// If we have an event then we can try and position the list under the source
+						if (options.event) {
+							let top = options.event.clientY - 10;
+
+							const bodyRect = document.body.getBoundingClientRect();
+							const rootRect = this.root.getBoundingClientRect();
+							if (bodyRect.height && top > bodyRect.height - rootRect.height - 10) {
+								top = Math.max(0, bodyRect.height - rootRect.height - 10);
+							}
+
+							this.root.style.top = top + "px";
+							positionList();
+						}
+					});
+
+					requestAnimationFrame(() => {
+						// Focus the filter box when opening
+						filter.focus();
+
+						positionList();
+					});
+				})
 			}

 			return ctx;
@ -134,4 +140,6 @@ app.registerExtension({

 		LiteGraph.ContextMenu.prototype = ctxMenu.prototype;
 	},
-});
+}
+
+app.registerExtension(ext);
--- a/web/extensions/core/dynamicPrompts.js
+++ b/web/extensions/core/dynamicPrompts.js
@ -3,6 +3,13 @@ import { app } from "../../scripts/app.js";
 // Allows for simple dynamic prompt replacement
 // Inputs in the format {a|b} will have a random value of a or b chosen when the prompt is queued.

+/*
+ * Strips C-style line and block comments from a string
+ */
+function stripComments(str) {
+	return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g,'');
+}
+
 app.registerExtension({
 	name: "Comfy.DynamicPrompts",
 	nodeCreated(node) {
@ -15,7 +22,7 @@ app.registerExtension({
 			for (const widget of widgets) {
 				// Override the serialization of the value to resolve dynamic prompts for all widgets supporting it in this node
 				widget.serializeValue = (workflowNode, widgetIndex) => {
-					let prompt = widget.value;
+					let prompt = stripComments(widget.value);
 					while (prompt.replace("\\{", "").includes("{") && prompt.replace("\\}", "").includes("}")) {
 						const startIndex = prompt.replace("\\{", "00").indexOf("{");
 						const endIndex = prompt.replace("\\}", "00").indexOf("}");
--- a/web/extensions/core/widgetInputs.js
+++ b/web/extensions/core/widgetInputs.js
@ -200,8 +200,23 @@ app.registerExtension({
 			applyToGraph() {
 				if (!this.outputs[0].links?.length) return;

+				function get_links(node) {
+					let links = [];
+					for (const l of node.outputs[0].links) {
+						const linkInfo = app.graph.links[l];
+						const n = node.graph.getNodeById(linkInfo.target_id);
+						if (n.type == "Reroute") {
+							links = links.concat(get_links(n));
+						} else {
+							links.push(l);
+						}
+					}
+					return links;
+				}
+
+				let links = get_links(this);
 				// For each output link copy our value over the original widget value
-				for (const l of this.outputs[0].links) {
+				for (const l of links) {
 					const linkInfo = app.graph.links[l];
 					const node = this.graph.getNodeById(linkInfo.target_id);
 					const input = node.inputs[linkInfo.target_slot];
--- a/web/scripts/app.js
+++ b/web/scripts/app.js
@ -125,10 +125,14 @@ export class ComfyApp {
 			if(ComfyApp.clipspace.imgs && node.imgs) {
 				if(node.images && ComfyApp.clipspace.images) {
 					if(ComfyApp.clipspace['img_paste_mode'] == 'selected') {
-						app.nodeOutputs[node.id + ""].images = node.images = [ComfyApp.clipspace.images[ComfyApp.clipspace['selectedIndex']]];
+						node.images = [ComfyApp.clipspace.images[ComfyApp.clipspace['selectedIndex']]];
 					}
-					else
-						app.nodeOutputs[node.id + ""].images = node.images = ComfyApp.clipspace.images;
+					else {
+						node.images = ComfyApp.clipspace.images;
+					}
+
+					if(app.nodeOutputs[node.id + ""])
+						app.nodeOutputs[node.id + ""].images = node.images;
 				}

 				if(ComfyApp.clipspace.imgs) {
@ -161,7 +165,16 @@ export class ComfyApp {
 				if(ComfyApp.clipspace.widgets) {
 					ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => {
 						const prop = Object.values(node.widgets).find(obj => obj.type === type && obj.name === name);
-						if (prop && prop.type != 'button') {
+						if (prop && prop.type != 'image') {
+							if(typeof prop.value == "string" && value.filename) {
+								prop.value = (value.subfolder?value.subfolder+'/':'') + value.filename + (value.type?` [${value.type}]`:'');
+							}
+							else {
+								prop.value = value;
+								prop.callback(value);
+							}
+						}
+						else if (prop && prop.type != 'button') {
 							prop.value = value;
 							prop.callback(value);
 						}
@ -365,6 +378,10 @@ export class ComfyApp {
 		}

 		node.prototype.setSizeForImage = function () {
+			if (this.inputHeight) {
+				this.setSize(this.size);
+				return;
+			}
 			const minHeight = getImageTop(this) + 220;
 			if (this.size[1] < minHeight) {
 				this.setSize([this.size[0], minHeight]);
--- a/web/scripts/widgets.js
+++ b/web/scripts/widgets.js
@ -115,12 +115,12 @@ function addMultilineWidget(node, name, opts, app) {

 		// See how large each text input can be
 		freeSpace -= widgetHeight;
-		freeSpace /= multi.length;
+		freeSpace /= multi.length + (!!node.imgs?.length);

 		if (freeSpace < MIN_SIZE) {
 			// There isnt enough space for all the widgets, increase the size of the node
 			freeSpace = MIN_SIZE;
-			node.size[1] = y + widgetHeight + freeSpace * multi.length;
+			node.size[1] = y + widgetHeight + freeSpace * (multi.length + (!!node.imgs?.length));
 			node.graph.setDirtyCanvas(true);
 		}

--- a/web/style.css
+++ b/web/style.css
@ -50,7 +50,7 @@ body {
 	padding: 30px 30px 10px 30px;
 	background-color: var(--comfy-menu-bg); /* Modal background */
 	color: var(--error-text);
-	box-shadow: 0px 0px 20px #888888;
+	box-shadow: 0 0 20px #888888;
 	border-radius: 10px;
 	top: 50%;
 	left: 50%;
@ -84,7 +84,7 @@ body {
 	font-size: 15px;
 	position: absolute;
 	top: 50%;
-	right: 0%;
+	right: 0;
 	text-align: center;
 	z-index: 100;
 	width: 170px;
@ -252,7 +252,7 @@ button.comfy-queue-btn {
 		bottom: 0 !important;
 		left: auto !important;
 		right: 0 !important;
-		border-radius: 0px;
+		border-radius: 0;
 	}
 	.comfy-menu span.drag-handle {
 		visibility:hidden
@ -291,7 +291,7 @@ button.comfy-queue-btn {

 .litegraph .dialog {
 	 z-index: 1;
-	 font-family: Arial;
+	 font-family: Arial, sans-serif;
 }

 .litegraph .litemenu-entry.has_submenu {
@ -330,6 +330,13 @@ button.comfy-queue-btn {
 	color: var(--input-text) !important;
 }

+.comfy-context-menu-filter {
+	box-sizing: border-box;
+	border: 1px solid #999;
+	margin: 0 0 5px 5px;
+	width: calc(100% - 10px);
+}
+
 /* Search box */

 .litegraph.litesearchbox {