Merge branch 'comfyanonymous:master' into master

2026-06-04 13:27:41 +08:00 · 2023-04-25 15:47:37 -04:00 · 2023-04-25 15:47:37 -04:00 · 397477e877
commit 397477e877
parent 2838937acf 07194297fd
29 changed files with 1293 additions and 183 deletions
--- a/.github/workflows/windows_release_cu118_dependencies_2.yml
+++ b/.github/workflows/windows_release_cu118_dependencies_2.yml
@ -0,0 +1,30 @@
+name: "Windows Release cu118 dependencies 2"
+
+on:
+  workflow_dispatch:
+#  push:
+#    branches:
+#      - master
+
+jobs:
+  build_dependencies:
+    runs-on: windows-latest
+    steps:
+        - uses: actions/checkout@v3
+        - uses: actions/setup-python@v4
+          with:
+            python-version: '3.10.9'
+
+        - shell: bash
+          run: |
+            python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers==0.0.19.dev516 --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
+            python -m pip install --no-cache-dir ./temp_wheel_dir/*
+            echo installed basic
+            ls -lah temp_wheel_dir
+            mv temp_wheel_dir cu118_python_deps
+            tar cf cu118_python_deps.tar cu118_python_deps
+
+        - uses: actions/cache/save@v3
+          with:
+            path: cu118_python_deps.tar
+            key: ${{ runner.os }}-build-cu118
--- a/README.md
+++ b/README.md
@ -17,6 +17,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 - Can load ckpt, safetensors and diffusers models/checkpoints. Standalone VAEs and CLIP models.
 - Embeddings/Textual inversion
 - [Loras (regular, locon and loha)](https://comfyanonymous.github.io/ComfyUI_examples/lora/)
+- [Hypernetworks](https://comfyanonymous.github.io/ComfyUI_examples/hypernetworks/)
 - Loading full workflows (with seeds) from generated PNG files.
 - Saving/Loading workflows as Json files.
 - Nodes interface can be used to create complex workflows like one for [Hires fix](https://comfyanonymous.github.io/ComfyUI_examples/2_pass_txt2img/) or much more advanced ones.
@ -25,6 +26,7 @@ This ui will let you design and execute advanced stable diffusion pipelines usin
 - [ControlNet and T2I-Adapter](https://comfyanonymous.github.io/ComfyUI_examples/controlnet/)
 - [Upscale Models (ESRGAN, ESRGAN variants, SwinIR, Swin2SR, etc...)](https://comfyanonymous.github.io/ComfyUI_examples/upscale_models/)
 - [unCLIP Models](https://comfyanonymous.github.io/ComfyUI_examples/unclip/)
+- [GLIGEN](https://comfyanonymous.github.io/ComfyUI_examples/gligen/)
 - Starts up very fast.
 - Works fully offline: will never download anything.
 - [Config file](extra_model_paths.yaml.example) to set the search paths for models.
@ -83,7 +85,7 @@ Put your VAE in: models/vae

 At the time of writing this pytorch has issues with python versions higher than 3.10 so make sure your python/pip versions are 3.10.

-### AMD (Linux only)
+### AMD GPUs (Linux only)
 AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version:

 ```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/rocm5.4.2```
--- a/comfy/gligen.py
+++ b/comfy/gligen.py
@ -0,0 +1,343 @@
+import torch
+from torch import nn, einsum
+from ldm.modules.attention import CrossAttention
+from inspect import isfunction
+
+
+def exists(val):
+    return val is not None
+
+
+def uniq(arr):
+    return{el: True for el in arr}.keys()
+
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+
+# feedforward
+class GEGLU(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super().__init__()
+        self.proj = nn.Linear(dim_in, dim_out * 2)
+
+    def forward(self, x):
+        x, gate = self.proj(x).chunk(2, dim=-1)
+        return x * torch.nn.functional.gelu(gate)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        dim_out = default(dim_out, dim)
+        project_in = nn.Sequential(
+            nn.Linear(dim, inner_dim),
+            nn.GELU()
+        ) if not glu else GEGLU(dim, inner_dim)
+
+        self.net = nn.Sequential(
+            project_in,
+            nn.Dropout(dropout),
+            nn.Linear(inner_dim, dim_out)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class GatedCrossAttentionDense(nn.Module):
+    def __init__(self, query_dim, context_dim, n_heads, d_head):
+        super().__init__()
+
+        self.attn = CrossAttention(
+            query_dim=query_dim,
+            context_dim=context_dim,
+            heads=n_heads,
+            dim_head=d_head)
+        self.ff = FeedForward(query_dim, glu=True)
+
+        self.norm1 = nn.LayerNorm(query_dim)
+        self.norm2 = nn.LayerNorm(query_dim)
+
+        self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+        self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+        # this can be useful: we can externally change magnitude of tanh(alpha)
+        # for example, when it is set to 0, then the entire model is same as
+        # original one
+        self.scale = 1
+
+    def forward(self, x, objs):
+
+        x = x + self.scale * \
+            torch.tanh(self.alpha_attn) * self.attn(self.norm1(x), objs, objs)
+        x = x + self.scale * \
+            torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+        return x
+
+
+class GatedSelfAttentionDense(nn.Module):
+    def __init__(self, query_dim, context_dim, n_heads, d_head):
+        super().__init__()
+
+        # we need a linear projection since we need cat visual feature and obj
+        # feature
+        self.linear = nn.Linear(context_dim, query_dim)
+
+        self.attn = CrossAttention(
+            query_dim=query_dim,
+            context_dim=query_dim,
+            heads=n_heads,
+            dim_head=d_head)
+        self.ff = FeedForward(query_dim, glu=True)
+
+        self.norm1 = nn.LayerNorm(query_dim)
+        self.norm2 = nn.LayerNorm(query_dim)
+
+        self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+        self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+        # this can be useful: we can externally change magnitude of tanh(alpha)
+        # for example, when it is set to 0, then the entire model is same as
+        # original one
+        self.scale = 1
+
+    def forward(self, x, objs):
+
+        N_visual = x.shape[1]
+        objs = self.linear(objs)
+
+        x = x + self.scale * torch.tanh(self.alpha_attn) * self.attn(
+            self.norm1(torch.cat([x, objs], dim=1)))[:, 0:N_visual, :]
+        x = x + self.scale * \
+            torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+        return x
+
+
+class GatedSelfAttentionDense2(nn.Module):
+    def __init__(self, query_dim, context_dim, n_heads, d_head):
+        super().__init__()
+
+        # we need a linear projection since we need cat visual feature and obj
+        # feature
+        self.linear = nn.Linear(context_dim, query_dim)
+
+        self.attn = CrossAttention(
+            query_dim=query_dim, context_dim=query_dim, dim_head=d_head)
+        self.ff = FeedForward(query_dim, glu=True)
+
+        self.norm1 = nn.LayerNorm(query_dim)
+        self.norm2 = nn.LayerNorm(query_dim)
+
+        self.register_parameter('alpha_attn', nn.Parameter(torch.tensor(0.)))
+        self.register_parameter('alpha_dense', nn.Parameter(torch.tensor(0.)))
+
+        # this can be useful: we can externally change magnitude of tanh(alpha)
+        # for example, when it is set to 0, then the entire model is same as
+        # original one
+        self.scale = 1
+
+    def forward(self, x, objs):
+
+        B, N_visual, _ = x.shape
+        B, N_ground, _ = objs.shape
+
+        objs = self.linear(objs)
+
+        # sanity check
+        size_v = math.sqrt(N_visual)
+        size_g = math.sqrt(N_ground)
+        assert int(size_v) == size_v, "Visual tokens must be square rootable"
+        assert int(size_g) == size_g, "Grounding tokens must be square rootable"
+        size_v = int(size_v)
+        size_g = int(size_g)
+
+        # select grounding token and resize it to visual token size as residual
+        out = self.attn(self.norm1(torch.cat([x, objs], dim=1)))[
+            :, N_visual:, :]
+        out = out.permute(0, 2, 1).reshape(B, -1, size_g, size_g)
+        out = torch.nn.functional.interpolate(
+            out, (size_v, size_v), mode='bicubic')
+        residual = out.reshape(B, -1, N_visual).permute(0, 2, 1)
+
+        # add residual to visual feature
+        x = x + self.scale * torch.tanh(self.alpha_attn) * residual
+        x = x + self.scale * \
+            torch.tanh(self.alpha_dense) * self.ff(self.norm2(x))
+
+        return x
+
+
+class FourierEmbedder():
+    def __init__(self, num_freqs=64, temperature=100):
+
+        self.num_freqs = num_freqs
+        self.temperature = temperature
+        self.freq_bands = temperature ** (torch.arange(num_freqs) / num_freqs)
+
+    @torch.no_grad()
+    def __call__(self, x, cat_dim=-1):
+        "x: arbitrary shape of tensor. dim: cat dim"
+        out = []
+        for freq in self.freq_bands:
+            out.append(torch.sin(freq * x))
+            out.append(torch.cos(freq * x))
+        return torch.cat(out, cat_dim)
+
+
+class PositionNet(nn.Module):
+    def __init__(self, in_dim, out_dim, fourier_freqs=8):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+
+        self.fourier_embedder = FourierEmbedder(num_freqs=fourier_freqs)
+        self.position_dim = fourier_freqs * 2 * 4  # 2 is sin&cos, 4 is xyxy
+
+        self.linears = nn.Sequential(
+            nn.Linear(self.in_dim + self.position_dim, 512),
+            nn.SiLU(),
+            nn.Linear(512, 512),
+            nn.SiLU(),
+            nn.Linear(512, out_dim),
+        )
+
+        self.null_positive_feature = torch.nn.Parameter(
+            torch.zeros([self.in_dim]))
+        self.null_position_feature = torch.nn.Parameter(
+            torch.zeros([self.position_dim]))
+
+    def forward(self, boxes, masks, positive_embeddings):
+        B, N, _ = boxes.shape
+        masks = masks.unsqueeze(-1)
+
+        # embedding position (it may includes padding as placeholder)
+        xyxy_embedding = self.fourier_embedder(boxes)  # B*N*4 --> B*N*C
+
+        # learnable null embedding
+        positive_null = self.null_positive_feature.view(1, 1, -1)
+        xyxy_null = self.null_position_feature.view(1, 1, -1)
+
+        # replace padding with learnable null embedding
+        positive_embeddings = positive_embeddings * \
+            masks + (1 - masks) * positive_null
+        xyxy_embedding = xyxy_embedding * masks + (1 - masks) * xyxy_null
+
+        objs = self.linears(
+            torch.cat([positive_embeddings, xyxy_embedding], dim=-1))
+        assert objs.shape == torch.Size([B, N, self.out_dim])
+        return objs
+
+
+class Gligen(nn.Module):
+    def __init__(self, modules, position_net, key_dim):
+        super().__init__()
+        self.module_list = nn.ModuleList(modules)
+        self.position_net = position_net
+        self.key_dim = key_dim
+        self.max_objs = 30
+
+    def _set_position(self, boxes, masks, positive_embeddings):
+        objs = self.position_net(boxes, masks, positive_embeddings)
+
+        def func(key, x):
+            module = self.module_list[key]
+            return module(x, objs)
+        return func
+
+    def set_position(self, latent_image_shape, position_params, device):
+        batch, c, h, w = latent_image_shape
+        masks = torch.zeros([self.max_objs], device="cpu")
+        boxes = []
+        positive_embeddings = []
+        for p in position_params:
+            x1 = (p[4]) / w
+            y1 = (p[3]) / h
+            x2 = (p[4] + p[2]) / w
+            y2 = (p[3] + p[1]) / h
+            masks[len(boxes)] = 1.0
+            boxes += [torch.tensor((x1, y1, x2, y2)).unsqueeze(0)]
+            positive_embeddings += [p[0]]
+        append_boxes = []
+        append_conds = []
+        if len(boxes) < self.max_objs:
+            append_boxes = [torch.zeros(
+                [self.max_objs - len(boxes), 4], device="cpu")]
+            append_conds = [torch.zeros(
+                [self.max_objs - len(boxes), self.key_dim], device="cpu")]
+
+        box_out = torch.cat(
+            boxes + append_boxes).unsqueeze(0).repeat(batch, 1, 1)
+        masks = masks.unsqueeze(0).repeat(batch, 1)
+        conds = torch.cat(positive_embeddings +
+                          append_conds).unsqueeze(0).repeat(batch, 1, 1)
+        return self._set_position(
+            box_out.to(device),
+            masks.to(device),
+            conds.to(device))
+
+    def set_empty(self, latent_image_shape, device):
+        batch, c, h, w = latent_image_shape
+        masks = torch.zeros([self.max_objs], device="cpu").repeat(batch, 1)
+        box_out = torch.zeros([self.max_objs, 4],
+                              device="cpu").repeat(batch, 1, 1)
+        conds = torch.zeros([self.max_objs, self.key_dim],
+                            device="cpu").repeat(batch, 1, 1)
+        return self._set_position(
+            box_out.to(device),
+            masks.to(device),
+            conds.to(device))
+
+    def cleanup(self):
+        pass
+
+    def get_models(self):
+        return [self]
+
+def load_gligen(sd):
+    sd_k = sd.keys()
+    output_list = []
+    key_dim = 768
+    for a in ["input_blocks", "middle_block", "output_blocks"]:
+        for b in range(20):
+            k_temp = filter(lambda k: "{}.{}.".format(a, b)
+                            in k and ".fuser." in k, sd_k)
+            k_temp = map(lambda k: (k, k.split(".fuser.")[-1]), k_temp)
+
+            n_sd = {}
+            for k in k_temp:
+                n_sd[k[1]] = sd[k[0]]
+            if len(n_sd) > 0:
+                query_dim = n_sd["linear.weight"].shape[0]
+                key_dim = n_sd["linear.weight"].shape[1]
+
+                if key_dim == 768:  # SD1.x
+                    n_heads = 8
+                    d_head = query_dim // n_heads
+                else:
+                    d_head = 64
+                    n_heads = query_dim // d_head
+
+                gated = GatedSelfAttentionDense(
+                    query_dim, key_dim, n_heads, d_head)
+                gated.load_state_dict(n_sd, strict=False)
+                output_list.append(gated)
+
+    if "position_net.null_positive_feature" in sd_k:
+        in_dim = sd["position_net.null_positive_feature"].shape[0]
+        out_dim = sd["position_net.linears.4.weight"].shape[0]
+
+        class WeightsLoader(torch.nn.Module):
+            pass
+        w = WeightsLoader()
+        w.position_net = PositionNet(in_dim, out_dim)
+        w.load_state_dict(sd, strict=False)
+
+    gligen = Gligen(output_list, w.position_net, key_dim)
+    return gligen
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@ -163,13 +163,17 @@ class CrossAttentionBirchSan(nn.Module):
            nn.Dropout(dropout)
        )

-    def forward(self, x, context=None, mask=None):
+    def forward(self, x, context=None, value=None, mask=None):
        h = self.heads

        query = self.to_q(x)
        context = default(context, x)
        key = self.to_k(context)
-        value = self.to_v(context)
+        if value is not None:
+            value = self.to_v(value)
+        else:
+            value = self.to_v(context)
+
        del context, x

        query = query.unflatten(-1, (self.heads, -1)).transpose(1,2).flatten(end_dim=1)
@ -256,13 +260,17 @@ class CrossAttentionDoggettx(nn.Module):
            nn.Dropout(dropout)
        )

-    def forward(self, x, context=None, mask=None):
+    def forward(self, x, context=None, value=None, mask=None):
        h = self.heads

        q_in = self.to_q(x)
        context = default(context, x)
        k_in = self.to_k(context)
-        v_in = self.to_v(context)
+        if value is not None:
+            v_in = self.to_v(value)
+            del value
+        else:
+            v_in = self.to_v(context)
        del context, x

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q_in, k_in, v_in))
@ -350,13 +358,17 @@ class CrossAttention(nn.Module):
            nn.Dropout(dropout)
        )

-    def forward(self, x, context=None, mask=None):
+    def forward(self, x, context=None, value=None, mask=None):
        h = self.heads

        q = self.to_q(x)
        context = default(context, x)
        k = self.to_k(context)
-        v = self.to_v(context)
+        if value is not None:
+            v = self.to_v(value)
+            del value
+        else:
+            v = self.to_v(context)

        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))

@ -402,11 +414,15 @@ class MemoryEfficientCrossAttention(nn.Module):
        self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout))
        self.attention_op: Optional[Any] = None

-    def forward(self, x, context=None, mask=None):
+    def forward(self, x, context=None, value=None, mask=None):
        q = self.to_q(x)
        context = default(context, x)
        k = self.to_k(context)
-        v = self.to_v(context)
+        if value is not None:
+            v = self.to_v(value)
+            del value
+        else:
+            v = self.to_v(context)

        b, _, _ = q.shape
        q, k, v = map(
@ -447,19 +463,19 @@ class CrossAttentionPytorch(nn.Module):
        self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout))
        self.attention_op: Optional[Any] = None

-    def forward(self, x, context=None, mask=None):
+    def forward(self, x, context=None, value=None, mask=None):
        q = self.to_q(x)
        context = default(context, x)
        k = self.to_k(context)
-        v = self.to_v(context)
+        if value is not None:
+            v = self.to_v(value)
+            del value
+        else:
+            v = self.to_v(context)

        b, _, _ = q.shape
        q, k, v = map(
-            lambda t: t.unsqueeze(3)
-            .reshape(b, t.shape[1], self.heads, self.dim_head)
-            .permute(0, 2, 1, 3)
-            .reshape(b * self.heads, t.shape[1], self.dim_head)
-            .contiguous(),
+            lambda t: t.view(b, -1, self.heads, self.dim_head).transpose(1, 2),
            (q, k, v),
        )

@ -468,10 +484,7 @@ class CrossAttentionPytorch(nn.Module):
        if exists(mask):
            raise NotImplementedError
        out = (
-            out.unsqueeze(0)
-            .reshape(b, self.heads, out.shape[1], self.dim_head)
-            .permute(0, 2, 1, 3)
-            .reshape(b, out.shape[1], self.heads * self.dim_head)
+            out.transpose(1, 2).reshape(b, -1, self.heads * self.dim_head)
        )

        return self.to_out(out)
@ -510,19 +523,58 @@ class BasicTransformerBlock(nn.Module):
        return checkpoint(self._forward, (x, context, transformer_options), self.parameters(), self.checkpoint)

    def _forward(self, x, context=None, transformer_options={}):
+        current_index = None
+        if "current_index" in transformer_options:
+            current_index = transformer_options["current_index"]
+        if "patches" in transformer_options:
+            transformer_patches = transformer_options["patches"]
+        else:
+            transformer_patches = {}
+
        n = self.norm1(x)
+        if self.disable_self_attn:
+            context_attn1 = context
+        else:
+            context_attn1 = None
+        value_attn1 = None
+
+        if "attn1_patch" in transformer_patches:
+            patch = transformer_patches["attn1_patch"]
+            if context_attn1 is None:
+                context_attn1 = n
+            value_attn1 = context_attn1
+            for p in patch:
+                n, context_attn1, value_attn1 = p(current_index, n, context_attn1, value_attn1)
+
        if "tomesd" in transformer_options:
            m, u = tomesd.get_functions(x, transformer_options["tomesd"]["ratio"], transformer_options["original_shape"])
-            n = u(self.attn1(m(n), context=context if self.disable_self_attn else None))
+            n = u(self.attn1(m(n), context=context_attn1, value=value_attn1))
        else:
-            n = self.attn1(n, context=context if self.disable_self_attn else None)
+            n = self.attn1(n, context=context_attn1, value=value_attn1)

        x += n
+        if "middle_patch" in transformer_patches:
+            patch = transformer_patches["middle_patch"]
+            for p in patch:
+                x = p(current_index, x)
+
        n = self.norm2(x)
-        n = self.attn2(n, context=context)
+
+        context_attn2 = context
+        value_attn2 = None
+        if "attn2_patch" in transformer_patches:
+            patch = transformer_patches["attn2_patch"]
+            value_attn2 = context_attn2
+            for p in patch:
+                n, context_attn2, value_attn2 = p(current_index, n, context_attn2, value_attn2)
+
+        n = self.attn2(n, context=context_attn2, value=value_attn2)

        x += n
        x = self.ff(self.norm3(x)) + x
+
+        if current_index is not None:
+            transformer_options["current_index"] += 1
        return x


--- a/comfy/ldm/modules/diffusionmodules/openaimodel.py
+++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py
@ -782,6 +782,8 @@ class UNetModel(nn.Module):
        :return: an [N x C x ...] Tensor of outputs.
        """
        transformer_options["original_shape"] = list(x.shape)
+        transformer_options["current_index"] = 0
+
        assert (y is not None) == (
            self.num_classes is not None
        ), "must specify y if and only if the model is class-conditional"
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -133,6 +133,7 @@ def unload_model():
        #never unload models from GPU on high vram
        if vram_state != VRAMState.HIGH_VRAM:
            current_loaded_model.model.cpu()
+            current_loaded_model.model_patches_to("cpu")
        current_loaded_model.unpatch_model()
        current_loaded_model = None

@ -156,6 +157,8 @@ def load_model_gpu(model):
    except Exception as e:
        model.unpatch_model()
        raise e
+
+    model.model_patches_to(get_torch_device())
    current_loaded_model = model
    if vram_state == VRAMState.CPU:
        pass
@ -176,7 +179,7 @@ def load_model_gpu(model):
        model_accelerated = True
    return current_loaded_model

-def load_controlnet_gpu(models):
+def load_controlnet_gpu(control_models):
    global current_gpu_controlnets
    global vram_state
    if vram_state == VRAMState.CPU:
@ -186,6 +189,10 @@ def load_controlnet_gpu(models):
        #don't load controlnets like this if low vram because they will be loaded right before running and unloaded right after
        return

+    models = []
+    for m in control_models:
+        models += m.get_models()
+
    for m in current_gpu_controlnets:
        if m not in models:
            m.cpu()
--- a/comfy/sample.py
+++ b/comfy/sample.py
@ -0,0 +1,83 @@
+import torch
+import comfy.model_management
+import comfy.samplers
+import math
+
+def prepare_noise(latent_image, seed, skip=0):
+    """
+    creates random noise given a latent image and a seed.
+    optional arg skip can be used to skip and discard x number of noise generations for a given seed
+    """
+    generator = torch.manual_seed(seed)
+    for _ in range(skip):
+        noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
+    noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
+    return noise
+
+def prepare_mask(noise_mask, shape, device):
+    """ensures noise mask is of proper dimensions"""
+    noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear")
+    noise_mask = noise_mask.round()
+    noise_mask = torch.cat([noise_mask] * shape[1], dim=1)
+    if noise_mask.shape[0] < shape[0]:
+        noise_mask = noise_mask.repeat(math.ceil(shape[0] / noise_mask.shape[0]), 1, 1, 1)[:shape[0]]
+    noise_mask = noise_mask.to(device)
+    return noise_mask
+
+def broadcast_cond(cond, batch, device):
+    """broadcasts conditioning to the batch size"""
+    copy = []
+    for p in cond:
+        t = p[0]
+        if t.shape[0] < batch:
+            t = torch.cat([t] * batch)
+        t = t.to(device)
+        copy += [[t] + p[1:]]
+    return copy
+
+def get_models_from_cond(cond, model_type):
+    models = []
+    for c in cond:
+        if model_type in c[1]:
+            models += [c[1][model_type]]
+    return models
+
+def load_additional_models(positive, negative):
+    """loads additional models in positive and negative conditioning"""
+    control_nets = get_models_from_cond(positive, "control") + get_models_from_cond(negative, "control")
+    gligen = get_models_from_cond(positive, "gligen") + get_models_from_cond(negative, "gligen")
+    gligen = [x[1] for x in gligen]
+    models = control_nets + gligen
+    comfy.model_management.load_controlnet_gpu(models)
+    return models
+
+def cleanup_additional_models(models):
+    """cleanup additional models that were loaded"""
+    for m in models:
+        m.cleanup()
+
+def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False, noise_mask=None, sigmas=None):
+    device = comfy.model_management.get_torch_device()
+
+    if noise_mask is not None:
+        noise_mask = prepare_mask(noise_mask, noise.shape, device)
+
+    real_model = None
+    comfy.model_management.load_model_gpu(model)
+    real_model = model.model
+
+    noise = noise.to(device)
+    latent_image = latent_image.to(device)
+
+    positive_copy = broadcast_cond(positive, noise.shape[0], device)
+    negative_copy = broadcast_cond(negative, noise.shape[0], device)
+
+    models = load_additional_models(positive, negative)
+
+    sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
+
+    samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas)
+    samples = samples.cpu()
+
+    cleanup_additional_models(models)
+    return samples
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -7,23 +7,6 @@ from comfy import model_management
 from .ldm.models.diffusion.ddim import DDIMSampler
 from .ldm.modules.diffusionmodules.util import make_ddim_timesteps

-class CFGDenoiser(torch.nn.Module):
-    def __init__(self, model):
-        super().__init__()
-        self.inner_model = model
-
-    def forward(self, x, sigma, uncond, cond, cond_scale):
-        if len(uncond[0]) == len(cond[0]) and x.shape[0] * x.shape[2] * x.shape[3] < (96 * 96): #TODO check memory instead
-            x_in = torch.cat([x] * 2)
-            sigma_in = torch.cat([sigma] * 2)
-            cond_in = torch.cat([uncond, cond])
-            uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
-        else:
-            cond = self.inner_model(x, sigma, cond=cond)
-            uncond = self.inner_model(x, sigma, cond=uncond)
-        return uncond + (cond - uncond) * cond_scale
-
-
 #The main sampling function shared by all the samplers
 #Returns predicted noise
 def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, cond_concat=None, model_options={}):
@ -36,8 +19,8 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                strength = cond[1]['strength']

            adm_cond = None
-            if 'adm' in cond[1]:
-                adm_cond = cond[1]['adm']
+            if 'adm_encoded' in cond[1]:
+                adm_cond = cond[1]['adm_encoded']

            input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
            mult = torch.ones_like(input_x) * strength
@ -70,7 +53,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
            control = None
            if 'control' in cond[1]:
                control = cond[1]['control']
-            return (input_x, mult, conditionning, area, control)
+
+            patches = None
+            if 'gligen' in cond[1]:
+                gligen = cond[1]['gligen']
+                patches = {}
+                gligen_type = gligen[0]
+                gligen_model = gligen[1]
+                if gligen_type == "position":
+                    gligen_patch = gligen_model.set_position(input_x.shape, gligen[2], input_x.device)
+                else:
+                    gligen_patch = gligen_model.set_empty(input_x.shape, input_x.device)
+
+                patches['middle_patch'] = [gligen_patch]
+
+            return (input_x, mult, conditionning, area, control, patches)

        def cond_equal_size(c1, c2):
            if c1 is c2:
@ -91,12 +88,21 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
        def can_concat_cond(c1, c2):
            if c1[0].shape != c2[0].shape:
                return False
+
+            #control
            if (c1[4] is None) != (c2[4] is None):
                return False
            if c1[4] is not None:
                if c1[4] is not c2[4]:
                    return False

+            #patches
+            if (c1[5] is None) != (c2[5] is None):
+                return False
+            if (c1[5] is not None):
+                if c1[5] is not c2[5]:
+                    return False
+
            return cond_equal_size(c1[2], c2[2])

        def cond_cat(c_list):
@ -166,6 +172,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                cond_or_uncond = []
                area = []
                control = None
+                patches = None
                for x in to_batch:
                    o = to_run.pop(x)
                    p = o[0]
@ -175,6 +182,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                    area += [p[3]]
                    cond_or_uncond += [o[1]]
                    control = p[4]
+                    patches = p[5]

                batch_chunks = len(cond_or_uncond)
                input_x = torch.cat(input_x)
@ -184,8 +192,22 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                if control is not None:
                    c['control'] = control.get_control(input_x, timestep_, c['c_crossattn'], len(cond_or_uncond))

+                transformer_options = {}
                if 'transformer_options' in model_options:
-                    c['transformer_options'] = model_options['transformer_options']
+                    transformer_options = model_options['transformer_options'].copy()
+
+                if patches is not None:
+                    if "patches" in transformer_options:
+                        cur_patches = transformer_options["patches"].copy()
+                        for p in patches:
+                            if p in cur_patches:
+                                cur_patches[p] = cur_patches[p] + patches[p]
+                            else:
+                                cur_patches[p] = patches[p]
+                    else:
+                        transformer_options["patches"] = patches
+
+                c['transformer_options'] = transformer_options

                output = model_function(input_x, timestep_, cond=c).chunk(batch_chunks)
                del input_x
@ -211,7 +233,10 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con

        max_total_area = model_management.maximum_batch_area()
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
-        return uncond + (cond - uncond) * cond_scale
+        if "sampler_cfg_function" in model_options:
+            return model_options["sampler_cfg_function"](cond, uncond, cond_scale)
+        else:
+            return uncond + (cond - uncond) * cond_scale


 class CompVisVDenoiser(k_diffusion_external.DiscreteVDDPMDenoiser):
@ -306,8 +331,7 @@ def create_cond_with_same_area_if_none(conds, c):
    n = c[1].copy()
    conds += [[smallest[0], n]]

-
-def apply_control_net_to_equal_area(conds, uncond):
+def apply_empty_x_to_equal_area(conds, uncond, name, uncond_fill_func):
    cond_cnets = []
    cond_other = []
    uncond_cnets = []
@ -315,15 +339,15 @@ def apply_control_net_to_equal_area(conds, uncond):
    for t in range(len(conds)):
        x = conds[t]
        if 'area' not in x[1]:
-            if 'control' in x[1] and x[1]['control'] is not None:
-                cond_cnets.append(x[1]['control'])
+            if name in x[1] and x[1][name] is not None:
+                cond_cnets.append(x[1][name])
            else:
                cond_other.append((x, t))
    for t in range(len(uncond)):
        x = uncond[t]
        if 'area' not in x[1]:
-            if 'control' in x[1] and x[1]['control'] is not None:
-                uncond_cnets.append(x[1]['control'])
+            if name in x[1] and x[1][name] is not None:
+                uncond_cnets.append(x[1][name])
            else:
                uncond_other.append((x, t))

@ -333,15 +357,16 @@ def apply_control_net_to_equal_area(conds, uncond):
    for x in range(len(cond_cnets)):
        temp = uncond_other[x % len(uncond_other)]
        o = temp[0]
-        if 'control' in o[1] and o[1]['control'] is not None:
+        if name in o[1] and o[1][name] is not None:
            n = o[1].copy()
-            n['control'] = cond_cnets[x]
+            n[name] = uncond_fill_func(cond_cnets, x)
            uncond += [[o[0], n]]
        else:
            n = o[1].copy()
-            n['control'] = cond_cnets[x]
+            n[name] = uncond_fill_func(cond_cnets, x)
            uncond[temp[1]] = [o[0], n]

+
 def encode_adm(noise_augmentor, conds, batch_size, device):
    for t in range(len(conds)):
        x = conds[t]
@ -371,10 +396,11 @@ def encode_adm(noise_augmentor, conds, batch_size, device):
        else:
            adm_out = torch.zeros((1, noise_augmentor.time_embed.dim * 2), device=device)
        x[1] = x[1].copy()
-        x[1]["adm"] = torch.cat([adm_out] * batch_size)
+        x[1]["adm_encoded"] = torch.cat([adm_out] * batch_size)

    return conds

+
 class KSampler:
    SCHEDULERS = ["karras", "normal", "simple", "ddim_uniform"]
    SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
@ -403,7 +429,7 @@ class KSampler:
        self.denoise = denoise
        self.model_options = model_options

-    def _calculate_sigmas(self, steps):
+    def calculate_sigmas(self, steps):
        sigmas = None

        discard_penultimate_sigma = False
@ -412,13 +438,13 @@ class KSampler:
            discard_penultimate_sigma = True

        if self.scheduler == "karras":
-            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max, device=self.device)
+            sigmas = k_diffusion_sampling.get_sigmas_karras(n=steps, sigma_min=self.sigma_min, sigma_max=self.sigma_max)
        elif self.scheduler == "normal":
-            sigmas = self.model_wrap.get_sigmas(steps).to(self.device)
+            sigmas = self.model_wrap.get_sigmas(steps)
        elif self.scheduler == "simple":
-            sigmas = simple_scheduler(self.model_wrap, steps).to(self.device)
+            sigmas = simple_scheduler(self.model_wrap, steps)
        elif self.scheduler == "ddim_uniform":
-            sigmas = ddim_scheduler(self.model_wrap, steps).to(self.device)
+            sigmas = ddim_scheduler(self.model_wrap, steps)
        else:
            print("error invalid scheduler", self.scheduler)

@ -429,15 +455,16 @@ class KSampler:
    def set_steps(self, steps, denoise=None):
        self.steps = steps
        if denoise is None or denoise > 0.9999:
-            self.sigmas = self._calculate_sigmas(steps)
+            self.sigmas = self.calculate_sigmas(steps).to(self.device)
        else:
            new_steps = int(steps/denoise)
-            sigmas = self._calculate_sigmas(new_steps)
+            sigmas = self.calculate_sigmas(new_steps).to(self.device)
            self.sigmas = sigmas[-(steps + 1):]


-    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None):
-        sigmas = self.sigmas
+    def sample(self, noise, positive, negative, cfg, latent_image=None, start_step=None, last_step=None, force_full_denoise=False, denoise_mask=None, sigmas=None):
+        if sigmas is None:
+            sigmas = self.sigmas
        sigma_min = self.sigma_min

        if last_step is not None and last_step < (len(sigmas) - 1):
@ -463,7 +490,8 @@ class KSampler:
        for c in negative:
            create_cond_with_same_area_if_none(positive, c)

-        apply_control_net_to_equal_area(positive, negative)
+        apply_empty_x_to_equal_area(positive, negative, 'control', lambda cond_cnets, x: cond_cnets[x])
+        apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])

        if self.model.model.diffusion_model.dtype == torch.float16:
            precision_scope = torch.autocast
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -13,6 +13,7 @@ from .t2i_adapter import adapter

 from . import utils
 from . import clip_vision
+from . import gligen

 def load_model_weights(model, sd, verbose=False, load_state_dict_to=[]):
    m, u = model.load_state_dict(sd, strict=False)
@ -250,6 +251,32 @@ class ModelPatcher:
    def set_model_tomesd(self, ratio):
        self.model_options["transformer_options"]["tomesd"] = {"ratio": ratio}

+    def set_model_sampler_cfg_function(self, sampler_cfg_function):
+        self.model_options["sampler_cfg_function"] = sampler_cfg_function
+
+
+    def set_model_patch(self, patch, name):
+        to = self.model_options["transformer_options"]
+        if "patches" not in to:
+            to["patches"] = {}
+        to["patches"][name] = to["patches"].get(name, []) + [patch]
+
+    def set_model_attn1_patch(self, patch):
+        self.set_model_patch(patch, "attn1_patch")
+
+    def set_model_attn2_patch(self, patch):
+        self.set_model_patch(patch, "attn2_patch")
+
+    def model_patches_to(self, device):
+        to = self.model_options["transformer_options"]
+        if "patches" in to:
+            patches = to["patches"]
+            for name in patches:
+                patch_list = patches[name]
+                for i in range(len(patch_list)):
+                    if hasattr(patch_list[i], "to"):
+                        patch_list[i] = patch_list[i].to(device)
+
    def model_dtype(self):
        return self.model.diffusion_model.dtype

@ -375,7 +402,7 @@ class CLIP:
    def tokenize(self, text, return_word_ids=False):
        return self.tokenizer.tokenize_with_weights(text, return_word_ids)

-    def encode_from_tokens(self, tokens):
+    def encode_from_tokens(self, tokens, return_pooled=False):
        if self.layer_idx is not None:
            self.cond_stage_model.clip_layer(self.layer_idx)
        try:
@ -385,6 +412,10 @@ class CLIP:
        except Exception as e:
            self.patcher.unpatch_model()
            raise e
+        if return_pooled:
+            eos_token_index = max(range(len(tokens[0])), key=tokens[0].__getitem__)
+            pooled = cond[:, eos_token_index]
+            return cond, pooled
        return cond

    def encode(self, text):
@ -561,10 +592,10 @@ class ControlNet:
        c.strength = self.strength
        return c

-    def get_control_models(self):
+    def get_models(self):
        out = []
        if self.previous_controlnet is not None:
-            out += self.previous_controlnet.get_control_models()
+            out += self.previous_controlnet.get_models()
        out.append(self.control_model)
        return out

@ -734,10 +765,10 @@ class T2IAdapter:
            del self.cond_hint
            self.cond_hint = None

-    def get_control_models(self):
+    def get_models(self):
        out = []
        if self.previous_controlnet is not None:
-            out += self.previous_controlnet.get_control_models()
+            out += self.previous_controlnet.get_models()
        return out

 def load_t2i_adapter(t2i_data):
@ -784,6 +815,13 @@ def load_clip(ckpt_path, embedding_directory=None):
    clip.load_from_state_dict(clip_data)
    return clip

+def load_gligen(ckpt_path):
+    data = utils.load_torch_file(ckpt_path)
+    model = gligen.load_gligen(data)
+    if model_management.should_use_fp16():
+        model = model.half()
+    return model
+
 def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=None):
    with open(config_path, 'r') as stream:
        config = yaml.safe_load(stream)
--- a/comfy/utils.py
+++ b/comfy/utils.py
@ -1,11 +1,14 @@
 import torch

-def load_torch_file(ckpt):
+def load_torch_file(ckpt, safe_load=False):
    if ckpt.lower().endswith(".safetensors"):
        import safetensors.torch
        sd = safetensors.torch.load_file(ckpt, device="cpu")
    else:
-        pl_sd = torch.load(ckpt, map_location="cpu")
+        if safe_load:
+            pl_sd = torch.load(ckpt, map_location="cpu", weights_only=True)
+        else:
+            pl_sd = torch.load(ckpt, map_location="cpu")
        if "global_step" in pl_sd:
            print(f"Global Step: {pl_sd['global_step']}")
        if "state_dict" in pl_sd:
--- a/comfy_extras/chainner_models/architecture/block.py
+++ b/comfy_extras/chainner_models/architecture/block.py
@ -4,7 +4,10 @@
 from __future__ import annotations

 from collections import OrderedDict
-from typing import Literal
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal

 import torch
 import torch.nn as nn
--- a/comfy_extras/nodes_hypernetwork.py
+++ b/comfy_extras/nodes_hypernetwork.py
@ -0,0 +1,109 @@
+import comfy.utils
+import folder_paths
+import torch
+
+def load_hypernetwork_patch(path, strength):
+    sd = comfy.utils.load_torch_file(path, safe_load=True)
+    activation_func = sd.get('activation_func', 'linear')
+    is_layer_norm = sd.get('is_layer_norm', False)
+    use_dropout = sd.get('use_dropout', False)
+    activate_output = sd.get('activate_output', False)
+    last_layer_dropout = sd.get('last_layer_dropout', False)
+
+    valid_activation = {
+        "linear": torch.nn.Identity,
+        "relu": torch.nn.ReLU,
+        "leakyrelu": torch.nn.LeakyReLU,
+        "elu": torch.nn.ELU,
+        "swish": torch.nn.Hardswish,
+        "tanh": torch.nn.Tanh,
+        "sigmoid": torch.nn.Sigmoid,
+    }
+
+    if activation_func not in valid_activation:
+        print("Unsupported Hypernetwork format, if you report it I might implement it.", path, " ", activation_func, is_layer_norm, use_dropout, activate_output, last_layer_dropout)
+        return None
+
+    out = {}
+
+    for d in sd:
+        try:
+            dim = int(d)
+        except:
+            continue
+
+        output = []
+        for index in [0, 1]:
+            attn_weights = sd[dim][index]
+            keys = attn_weights.keys()
+
+            linears = filter(lambda a: a.endswith(".weight"), keys)
+            linears = list(map(lambda a: a[:-len(".weight")], linears))
+            layers = []
+
+            for i in range(len(linears)):
+                lin_name = linears[i]
+                last_layer = (i == (len(linears) - 1))
+                penultimate_layer = (i == (len(linears) - 2))
+
+                lin_weight = attn_weights['{}.weight'.format(lin_name)]
+                lin_bias = attn_weights['{}.bias'.format(lin_name)]
+                layer = torch.nn.Linear(lin_weight.shape[1], lin_weight.shape[0])
+                layer.load_state_dict({"weight": lin_weight, "bias": lin_bias})
+                layers.append(layer)
+                if activation_func != "linear":
+                    if (not last_layer) or (activate_output):
+                        layers.append(valid_activation[activation_func]())
+                if is_layer_norm:
+                    layers.append(torch.nn.LayerNorm(lin_weight.shape[0]))
+                if use_dropout:
+                    if (not last_layer) and (not penultimate_layer or last_layer_dropout):
+                        layers.append(torch.nn.Dropout(p=0.3))
+
+            output.append(torch.nn.Sequential(*layers))
+        out[dim] = torch.nn.ModuleList(output)
+
+    class hypernetwork_patch:
+        def __init__(self, hypernet, strength):
+            self.hypernet = hypernet
+            self.strength = strength
+        def __call__(self, current_index, q, k, v):
+            dim = k.shape[-1]
+            if dim in self.hypernet:
+                hn = self.hypernet[dim]
+                k = k + hn[0](k) * self.strength
+                v = v + hn[1](v) * self.strength
+
+            return q, k, v
+
+        def to(self, device):
+            for d in self.hypernet.keys():
+                self.hypernet[d] = self.hypernet[d].to(device)
+            return self
+
+    return hypernetwork_patch(out, strength)
+
+class HypernetworkLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "hypernetwork_name": (folder_paths.get_filename_list("hypernetworks"), ),
+                              "strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "load_hypernetwork"
+
+    CATEGORY = "loaders"
+
+    def load_hypernetwork(self, model, hypernetwork_name, strength):
+        hypernetwork_path = folder_paths.get_full_path("hypernetworks", hypernetwork_name)
+        model_hypernetwork = model.clone()
+        patch = load_hypernetwork_patch(hypernetwork_path, strength)
+        if patch is not None:
+            model_hypernetwork.set_model_attn1_patch(patch)
+            model_hypernetwork.set_model_attn2_patch(patch)
+        return (model_hypernetwork,)
+
+NODE_CLASS_MAPPINGS = {
+    "HypernetworkLoader": HypernetworkLoader
+}
--- a/execution.py
+++ b/execution.py
@ -40,15 +40,13 @@ def get_input_data(inputs, class_def, unique_id, outputs={}, prompt={}, extra_da
                input_data_all[x] = unique_id
    return input_data_all

-def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
+def recursive_execute(server, prompt, outputs, current_item, extra_data, executed):
    unique_id = current_item
    inputs = prompt[unique_id]['inputs']
    class_type = prompt[unique_id]['class_type']
    class_def = nodes.NODE_CLASS_MAPPINGS[class_type]
    if unique_id in outputs:
-        return []
-
-    executed = []
+        return

    for x in inputs:
        input_data = inputs[x]
@ -57,7 +55,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
            input_unique_id = input_data[0]
            output_index = input_data[1]
            if input_unique_id not in outputs:
-                executed += recursive_execute(server, prompt, outputs, input_unique_id, extra_data)
+                recursive_execute(server, prompt, outputs, input_unique_id, extra_data, executed)

    input_data_all = get_input_data(inputs, class_def, unique_id, outputs, prompt, extra_data)
    if server.client_id is not None:
@ -72,7 +70,7 @@ def recursive_execute(server, prompt, outputs, current_item, extra_data={}):
            server.send_sync("executed", { "node": unique_id, "output": outputs[unique_id]["ui"] }, server.client_id)
        if "result" in outputs[unique_id]:
            outputs[unique_id] = outputs[unique_id]["result"]
-    return executed + [unique_id]
+    executed.add(unique_id)

 def recursive_will_execute(prompt, outputs, current_item):
    unique_id = current_item
@ -158,7 +156,7 @@ class PromptExecutor:
                recursive_output_delete_if_changed(prompt, self.old_prompt, self.outputs, x)

            current_outputs = set(self.outputs.keys())
-            executed = []
+            executed = set()
            try:
                to_execute = []
                for x in prompt:
@ -181,12 +179,12 @@ class PromptExecutor:
                            except:
                                valid = False
                            if valid:
-                                executed += recursive_execute(self.server, prompt, self.outputs, x, extra_data)
+                                recursive_execute(self.server, prompt, self.outputs, x, extra_data, executed)
            except Exception as e:
                print(traceback.format_exc())
                to_delete = []
                for o in self.outputs:
-                    if o not in current_outputs:
+                    if (o not in current_outputs) and (o not in executed):
                        to_delete += [o]
                        if o in self.old_prompt:
                            d = self.old_prompt.pop(o)
@ -194,11 +192,9 @@ class PromptExecutor:
                for o in to_delete:
                    d = self.outputs.pop(o)
                    del d
-            else:
-                executed = set(executed)
+            finally:
                for x in executed:
                    self.old_prompt[x] = copy.deepcopy(prompt[x])
-            finally:
                self.server.last_node_id = None
                if self.server.client_id is not None:
                    self.server.send_sync("executing", { "node": None }, self.server.client_id)
@ -249,9 +245,15 @@ def validate_inputs(prompt, item):
                if "max" in info[1] and val > info[1]["max"]:
                    return (False, "Value bigger than max. {}, {}".format(class_type, x))

-            if isinstance(type_input, list):
-                if val not in type_input:
-                    return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input))
+            if hasattr(obj_class, "VALIDATE_INPUTS"):
+                input_data_all = get_input_data(inputs, obj_class, unique_id)
+                ret = obj_class.VALIDATE_INPUTS(**input_data_all)
+                if ret != True:
+                    return (False, "{}, {}".format(class_type, ret))
+            else:
+                if isinstance(type_input, list):
+                    if val not in type_input:
+                        return (False, "Value not in list. {}, {}: {} not in {}".format(class_type, x, val, type_input))
    return (True, "")

 def validate_prompt(prompt):
@ -273,7 +275,8 @@ def validate_prompt(prompt):
            m = validate_inputs(prompt, o)
            valid = m[0]
            reason = m[1]
-        except:
+        except Exception as e:
+            print(traceback.format_exc())
            valid = False
            reason = "Parsing error"

--- a/extra_model_paths.yaml.example
+++ b/extra_model_paths.yaml.example
@ -13,11 +13,13 @@ a111:
                  models/ESRGAN
                  models/SwinIR
    embeddings: embeddings
+    hypernetworks: models/hypernetworks
    controlnet: models/ControlNet

 #other_ui:
 #    base_path: path/to/ui
 #    checkpoints: models/checkpoints
+#    gligen: models/gligen
 #    custom_nodes: path/custom_nodes


--- a/folder_paths.py
+++ b/folder_paths.py
@ -26,10 +26,13 @@ folder_names_and_paths["embeddings"] = ([os.path.join(models_dir, "embeddings")]
 folder_names_and_paths["diffusers"] = ([os.path.join(models_dir, "diffusers")], ["folder"])

 folder_names_and_paths["controlnet"] = ([os.path.join(models_dir, "controlnet"), os.path.join(models_dir, "t2i_adapter")], supported_pt_extensions)
+folder_names_and_paths["gligen"] = ([os.path.join(models_dir, "gligen")], supported_pt_extensions)
+
 folder_names_and_paths["upscale_models"] = ([os.path.join(models_dir, "upscale_models")], supported_pt_extensions)

 folder_names_and_paths["custom_nodes"] = ([os.path.join(base_path, "custom_nodes")], [])

+folder_names_and_paths["hypernetworks"] = ([os.path.join(models_dir, "hypernetworks")], supported_pt_extensions)

 output_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "output")
 temp_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp")
@ -66,6 +69,46 @@ def get_directory_by_type(type_name):
    return None


+# determine base_dir rely on annotation if name is 'filename.ext [annotation]' format
+# otherwise use default_path as base_dir
+def annotated_filepath(name):
+    if name.endswith("[output]"):
+        base_dir = get_output_directory()
+        name = name[:-9]
+    elif name.endswith("[input]"):
+        base_dir = get_input_directory()
+        name = name[:-8]
+    elif name.endswith("[temp]"):
+        base_dir = get_temp_directory()
+        name = name[:-7]
+    else:
+        return name, None
+
+    return name, base_dir
+
+
+def get_annotated_filepath(name, default_dir=None):
+    name, base_dir = annotated_filepath(name)
+
+    if base_dir is None:
+        if default_dir is not None:
+            base_dir = default_dir
+        else:
+            base_dir = get_input_directory()  # fallback path
+
+    return os.path.join(base_dir, name)
+
+
+def exists_annotated_filepath(name):
+    name, base_dir = annotated_filepath(name)
+
+    if base_dir is None:
+        base_dir = get_input_directory()  # fallback path
+
+    filepath = os.path.join(base_dir, name)
+    return os.path.exists(filepath)
+
+
 def add_model_folder_path(folder_name, full_folder_path):
    global folder_names_and_paths
    if folder_name in folder_names_and_paths:
--- a/models/gligen/put_gligen_models_here
+++ b/models/gligen/put_gligen_models_here
--- a/models/hypernetworks/put_hypernetworks_here
+++ b/models/hypernetworks/put_hypernetworks_here
--- a/nodes.py
+++ b/nodes.py
@ -16,6 +16,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "co

 import comfy.diffusers_convert
 import comfy.samplers
+import comfy.sample
 import comfy.sd
 import comfy.utils

@ -171,24 +172,24 @@ class VAEEncodeForInpaint:
    def encode(self, vae, pixels, mask):
        x = (pixels.shape[1] // 64) * 64
        y = (pixels.shape[2] // 64) * 64
-        mask = torch.nn.functional.interpolate(mask[None,None,], size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")[0][0]
+        mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(pixels.shape[1], pixels.shape[2]), mode="bilinear")

        pixels = pixels.clone()
        if pixels.shape[1] != x or pixels.shape[2] != y:
            pixels = pixels[:,:x,:y,:]
-            mask = mask[:x,:y]
+            mask = mask[:,:,:x,:y]

        #grow mask by a few pixels to keep things seamless in latent space
        kernel_tensor = torch.ones((1, 1, 6, 6))
-        mask_erosion = torch.clamp(torch.nn.functional.conv2d((mask.round())[None], kernel_tensor, padding=3), 0, 1)
-        m = (1.0 - mask.round())
+        mask_erosion = torch.clamp(torch.nn.functional.conv2d(mask.round(), kernel_tensor, padding=3), 0, 1)
+        m = (1.0 - mask.round()).squeeze(1)
        for i in range(3):
            pixels[:,:,:,i] -= 0.5
            pixels[:,:,:,i] *= m
            pixels[:,:,:,i] += 0.5
        t = vae.encode(pixels)

-        return ({"samples":t, "noise_mask": (mask_erosion[0][:x,:y].round())}, )
+        return ({"samples":t, "noise_mask": (mask_erosion[:,:,:x,:y].round())}, )

 class CheckpointLoader:
    @classmethod
@ -490,6 +491,51 @@ class unCLIPConditioning:
            c.append(n)
        return (c, )

+class GLIGENLoader:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "gligen_name": (folder_paths.get_filename_list("gligen"), )}}
+
+    RETURN_TYPES = ("GLIGEN",)
+    FUNCTION = "load_gligen"
+
+    CATEGORY = "loaders"
+
+    def load_gligen(self, gligen_name):
+        gligen_path = folder_paths.get_full_path("gligen", gligen_name)
+        gligen = comfy.sd.load_gligen(gligen_path)
+        return (gligen,)
+
+class GLIGENTextBoxApply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"conditioning_to": ("CONDITIONING", ),
+                              "clip": ("CLIP", ),
+                              "gligen_textbox_model": ("GLIGEN", ),
+                              "text": ("STRING", {"multiline": True}),
+                              "width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
+                              "height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}),
+                              "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
+                              "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}),
+                             }}
+    RETURN_TYPES = ("CONDITIONING",)
+    FUNCTION = "append"
+
+    CATEGORY = "conditioning/gligen"
+
+    def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y):
+        c = []
+        cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True)
+        for t in conditioning_to:
+            n = [t[0], t[1].copy()]
+            position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)]
+            prev = []
+            if "gligen" in n[1]:
+                prev = n[1]['gligen'][2]
+
+            n[1]['gligen'] = ("position", gligen_textbox_model, prev + position_params)
+            c.append(n)
+        return (c, )

 class EmptyLatentImage:
    def __init__(self, device="cpu"):
@ -510,6 +556,24 @@ class EmptyLatentImage:
        return ({"samples":latent}, )


+class LatentFromBatch:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "samples": ("LATENT",),
+                              "batch_index": ("INT", {"default": 0, "min": 0, "max": 63}),
+                              }}
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "rotate"
+
+    CATEGORY = "latent"
+
+    def rotate(self, samples, batch_index):
+        s = samples.copy()
+        s_in = samples["samples"]
+        batch_index = min(s_in.shape[0] - 1, batch_index)
+        s["samples"] = s_in[batch_index:batch_index + 1].clone()
+        s["batch_index"] = batch_index
+        return (s,)

 class LatentUpscale:
    upscale_methods = ["nearest-exact", "bilinear", "area"]
@ -676,69 +740,23 @@ class SetLatentNoiseMask:
        s["noise_mask"] = mask
        return (s,)

-
 def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
-    latent_image = latent["samples"]
-    noise_mask = None
    device = comfy.model_management.get_torch_device()
+    latent_image = latent["samples"]

    if disable_noise:
        noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
    else:
-        noise = torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=torch.manual_seed(seed), device="cpu")
+        skip = latent["batch_index"] if "batch_index" in latent else 0
+        noise = comfy.sample.prepare_noise(latent_image, seed, skip)

+    noise_mask = None
    if "noise_mask" in latent:
-        noise_mask = latent['noise_mask']
-        noise_mask = torch.nn.functional.interpolate(noise_mask[None,None,], size=(noise.shape[2], noise.shape[3]), mode="bilinear")
-        noise_mask = noise_mask.round()
-        noise_mask = torch.cat([noise_mask] * noise.shape[1], dim=1)
-        noise_mask = torch.cat([noise_mask] * noise.shape[0])
-        noise_mask = noise_mask.to(device)
-
-    real_model = None
-    comfy.model_management.load_model_gpu(model)
-    real_model = model.model
-
-    noise = noise.to(device)
-    latent_image = latent_image.to(device)
-
-    positive_copy = []
-    negative_copy = []
-
-    control_nets = []
-    for p in positive:
-        t = p[0]
-        if t.shape[0] < noise.shape[0]:
-            t = torch.cat([t] * noise.shape[0])
-        t = t.to(device)
-        if 'control' in p[1]:
-            control_nets += [p[1]['control']]
-        positive_copy += [[t] + p[1:]]
-    for n in negative:
-        t = n[0]
-        if t.shape[0] < noise.shape[0]:
-            t = torch.cat([t] * noise.shape[0])
-        t = t.to(device)
-        if 'control' in n[1]:
-            control_nets += [n[1]['control']]
-        negative_copy += [[t] + n[1:]]
-
-    control_net_models = []
-    for x in control_nets:
-        control_net_models += x.get_control_models()
-    comfy.model_management.load_controlnet_gpu(control_net_models)
-
-    if sampler_name in comfy.samplers.KSampler.SAMPLERS:
-        sampler = comfy.samplers.KSampler(real_model, steps=steps, device=device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
-    else:
-        #other samplers
-        pass
-
-    samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask)
-    samples = samples.cpu()
-    for c in control_nets:
-        c.cleanup()
+        noise_mask = latent["noise_mask"]

+    samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
+                                  denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step,
+                                  force_full_denoise=force_full_denoise, noise_mask=noise_mask)
    out = latent.copy()
    out["samples"] = samples
    return (out, )
@ -901,8 +919,7 @@ class LoadImage:
    RETURN_TYPES = ("IMAGE", "MASK")
    FUNCTION = "load_image"
    def load_image(self, image):
-        input_dir = folder_paths.get_input_directory()
-        image_path = os.path.join(input_dir, image)
+        image_path = folder_paths.get_annotated_filepath(image)
        i = Image.open(image_path)
        image = i.convert("RGB")
        image = np.array(image).astype(np.float32) / 255.0
@ -916,20 +933,27 @@ class LoadImage:

    @classmethod
    def IS_CHANGED(s, image):
-        input_dir = folder_paths.get_input_directory()
-        image_path = os.path.join(input_dir, image)
+        image_path = folder_paths.get_annotated_filepath(image)
        m = hashlib.sha256()
        with open(image_path, 'rb') as f:
            m.update(f.read())
        return m.digest().hex()

+    @classmethod
+    def VALIDATE_INPUTS(s, image):
+        if not folder_paths.exists_annotated_filepath(image):
+            return "Invalid image file: {}".format(image)
+
+        return True
+
 class LoadImageMask:
+    _color_channels = ["alpha", "red", "green", "blue"]
    @classmethod
    def INPUT_TYPES(s):
        input_dir = folder_paths.get_input_directory()
        return {"required":
                    {"image": (sorted(os.listdir(input_dir)), ),
-                    "channel": (["alpha", "red", "green", "blue"], ),}
+                    "channel": (s._color_channels, ),}
                }

    CATEGORY = "mask"
@ -937,8 +961,7 @@ class LoadImageMask:
    RETURN_TYPES = ("MASK",)
    FUNCTION = "load_image"
    def load_image(self, image, channel):
-        input_dir = folder_paths.get_input_directory()
-        image_path = os.path.join(input_dir, image)
+        image_path = folder_paths.get_annotated_filepath(image)
        i = Image.open(image_path)
        if i.getbands() != ("R", "G", "B", "A"):
            i = i.convert("RGBA")
@ -955,13 +978,22 @@ class LoadImageMask:

    @classmethod
    def IS_CHANGED(s, image, channel):
-        input_dir = folder_paths.get_input_directory()
-        image_path = os.path.join(input_dir, image)
+        image_path = folder_paths.get_annotated_filepath(image)
        m = hashlib.sha256()
        with open(image_path, 'rb') as f:
            m.update(f.read())
        return m.digest().hex()

+    @classmethod
+    def VALIDATE_INPUTS(s, image, channel):
+        if not folder_paths.exists_annotated_filepath(image):
+            return "Invalid image file: {}".format(image)
+
+        if channel not in s._color_channels:
+            return "Invalid color channel: {}".format(channel)
+
+        return True
+
 class ImageScale:
    upscale_methods = ["nearest-exact", "bilinear", "area"]
    crop_methods = ["disabled", "center"]
@ -1073,6 +1105,7 @@ NODE_CLASS_MAPPINGS = {
    "VAELoader": VAELoader,
    "EmptyLatentImage": EmptyLatentImage,
    "LatentUpscale": LatentUpscale,
+    "LatentFromBatch": LatentFromBatch,
    "SaveImage": SaveImage,
    "PreviewImage": PreviewImage,
    "LoadImage": LoadImage,
@ -1102,6 +1135,9 @@ NODE_CLASS_MAPPINGS = {
    "VAEEncodeTiled": VAEEncodeTiled,
    "TomePatchModel": TomePatchModel,
    "unCLIPCheckpointLoader": unCLIPCheckpointLoader,
+    "GLIGENLoader": GLIGENLoader,
+    "GLIGENTextBoxApply": GLIGENTextBoxApply,
+
    "CheckpointLoader": CheckpointLoader,
    "DiffusersLoader": DiffusersLoader,
 }
@ -1191,6 +1227,7 @@ def load_custom_nodes():

 def init_custom_nodes():
    load_custom_nodes()
+    load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_hypernetwork.py"))
    load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_upscale_model.py"))
    load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_post_processing.py"))
    load_custom_node(os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy_extras"), "nodes_mask.py"))
--- a/notebooks/comfyui_colab.ipynb
+++ b/notebooks/comfyui_colab.ipynb
@ -138,6 +138,11 @@
        "# Controlnet Preprocessor nodes by Fannovel16\n",
        "#!cd custom_nodes && git clone https://github.com/Fannovel16/comfy_controlnet_preprocessors; cd comfy_controlnet_preprocessors && python install.py\n",
        "\n",
+        "\n",
+        "# GLIGEN\n",
+        "#!wget -c https://huggingface.co/comfyanonymous/GLIGEN_pruned_safetensors/resolve/main/gligen_sd14_textbox_pruned_fp16.safetensors -P ./models/gligen/\n",
+        "\n",
+        "\n",
        "# ESRGAN upscale model\n",
        "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth -P ./models/upscale_models/\n",
        "#!wget -c https://huggingface.co/sberbank-ai/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth -P ./models/upscale_models/\n",
--- a/server.py
+++ b/server.py
@ -112,13 +112,20 @@ class PromptServer():

        @routes.post("/upload/image")
        async def upload_image(request):
-            upload_dir = folder_paths.get_input_directory()
+            post = await request.post()
+            image = post.get("image")
+
+            if post.get("type") is None:
+                upload_dir = folder_paths.get_input_directory()
+            elif post.get("type") == "input":
+                upload_dir = folder_paths.get_input_directory()
+            elif post.get("type") == "temp":
+                upload_dir = folder_paths.get_temp_directory()
+            elif post.get("type") == "output":
+                upload_dir = folder_paths.get_output_directory()

            if not os.path.exists(upload_dir):
                os.makedirs(upload_dir)
-            
-            post = await request.post()
-            image = post.get("image")

            if image and image.file:
                filename = image.filename
--- a/web/extensions/core/editAttention.js
+++ b/web/extensions/core/editAttention.js
@ -0,0 +1,144 @@
+import { app } from "/scripts/app.js";
+
+// Allows you to edit the attention weight by holding ctrl (or cmd) and using the up/down arrow keys
+
+app.registerExtension({
+    name: "Comfy.EditAttention",
+    init() {
+        const editAttentionDelta = app.ui.settings.addSetting({
+            id: "Comfy.EditAttention.Delta",
+            name: "Ctrl+up/down precision",
+            type: "slider",
+            attrs: {
+                min: 0.01,
+                max: 0.5,
+                step: 0.01,
+            },
+            defaultValue: 0.05,
+        });
+
+        function incrementWeight(weight, delta) {
+            const floatWeight = parseFloat(weight);
+            if (isNaN(floatWeight)) return weight;
+            const newWeight = floatWeight + delta;
+            if (newWeight < 0) return "0";
+            return String(Number(newWeight.toFixed(10)));
+        }
+
+        function findNearestEnclosure(text, cursorPos) {
+            let start = cursorPos, end = cursorPos;
+            let openCount = 0, closeCount = 0;
+
+            // Find opening parenthesis before cursor
+            while (start >= 0) {
+                start--;
+                if (text[start] === "(" && openCount === closeCount) break;
+                if (text[start] === "(") openCount++;
+                if (text[start] === ")") closeCount++;
+            }
+            if (start < 0) return false;
+
+            openCount = 0;
+            closeCount = 0;
+
+            // Find closing parenthesis after cursor
+            while (end < text.length) {
+                if (text[end] === ")" && openCount === closeCount) break;
+                if (text[end] === "(") openCount++;
+                if (text[end] === ")") closeCount++;
+                end++;
+            }
+            if (end === text.length) return false;
+
+            return { start: start + 1, end: end };
+        }
+
+        function addWeightToParentheses(text) {
+            const parenRegex = /^\((.*)\)$/;
+            const parenMatch = text.match(parenRegex);
+
+            const floatRegex = /:([+-]?(\d*\.)?\d+([eE][+-]?\d+)?)/;
+            const floatMatch = text.match(floatRegex);
+
+            if (parenMatch && !floatMatch) {
+                return `(${parenMatch[1]}:1.0)`;
+            } else {
+                return text;
+            }
+        };
+
+        function editAttention(event) {
+            const inputField = event.composedPath()[0];
+            const delta = parseFloat(editAttentionDelta.value);
+
+            if (inputField.tagName !== "TEXTAREA") return;
+            if (!(event.key === "ArrowUp" || event.key === "ArrowDown")) return;
+            if (!event.ctrlKey && !event.metaKey) return;
+
+            event.preventDefault();
+
+            let start = inputField.selectionStart;
+            let end = inputField.selectionEnd;
+            let selectedText = inputField.value.substring(start, end);
+
+            // If there is no selection, attempt to find the nearest enclosure, or select the current word
+            if (!selectedText) {
+                const nearestEnclosure = findNearestEnclosure(inputField.value, start);
+                if (nearestEnclosure) {
+                    start = nearestEnclosure.start;
+                    end = nearestEnclosure.end;
+                    selectedText = inputField.value.substring(start, end);
+                } else {
+                    // Select the current word, find the start and end of the word
+                    const delimiters = " .,\\/!?%^*;:{}=-_`~()\r\n\t";
+                    
+                    while (!delimiters.includes(inputField.value[start - 1]) && start > 0) {
+                        start--;
+                    }
+                    
+                    while (!delimiters.includes(inputField.value[end]) && end < inputField.value.length) {
+                        end++;
+                    }
+
+                    selectedText = inputField.value.substring(start, end);
+                    if (!selectedText) return;
+                }
+            }
+
+            // If the selection ends with a space, remove it
+            if (selectedText[selectedText.length - 1] === " ") {
+                selectedText = selectedText.substring(0, selectedText.length - 1);
+                end -= 1;
+            }
+
+            // If there are parentheses left and right of the selection, select them
+            if (inputField.value[start - 1] === "(" && inputField.value[end] === ")") {
+                start -= 1;
+                end += 1;
+                selectedText = inputField.value.substring(start, end);
+            }
+
+            // If the selection is not enclosed in parentheses, add them
+            if (selectedText[0] !== "(" || selectedText[selectedText.length - 1] !== ")") {
+                selectedText = `(${selectedText})`;
+            }
+
+            // If the selection does not have a weight, add a weight of 1.0
+            selectedText = addWeightToParentheses(selectedText);
+
+            // Increment the weight
+            const weightDelta = event.key === "ArrowUp" ? delta : -delta;
+            const updatedText = selectedText.replace(/\((.*):(\d+(?:\.\d+)?)\)/, (match, text, weight) => {
+                weight = incrementWeight(weight, weightDelta);
+                if (weight == 1) {
+                    return text;
+                } else {
+                    return `(${text}:${weight})`;
+                }
+            });
+
+            inputField.setRangeText(updatedText, start, end, "select");
+        }
+        window.addEventListener("keydown", editAttention);
+    },
+});
--- a/web/extensions/core/keybinds.js
+++ b/web/extensions/core/keybinds.js
@ -5,12 +5,6 @@ app.registerExtension({
 	name: id,
 	init() {
 		const keybindListener = function(event) {
-			const target = event.composedPath()[0];
-
-			if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") {
-				return;
-			}
-
 			const modifierPressed = event.ctrlKey || event.metaKey;

 			// Queue prompt using ctrl or command + enter
@ -19,6 +13,12 @@ app.registerExtension({
 				return;
 			}

+			const target = event.composedPath()[0];
+
+			if (target.tagName === "INPUT" || target.tagName === "TEXTAREA") {
+				return;
+			}
+
 			const modifierKeyIdMap = {
 				"s": "#comfy-save-button",
 				83: "#comfy-save-button",
--- a/web/extensions/core/slotDefaults.js
+++ b/web/extensions/core/slotDefaults.js
@ -1,21 +1,72 @@
 import { app } from "/scripts/app.js";
-
+import { ComfyWidgets } from "/scripts/widgets.js";
 // Adds defaults for quickly adding nodes with middle click on the input/output

 app.registerExtension({
 	name: "Comfy.SlotDefaults",
+	suggestionsNumber: null,
 	init() {
 		LiteGraph.middle_click_slot_add_default_node = true;
-		LiteGraph.slot_types_default_in = {
-			MODEL: "CheckpointLoaderSimple",
-			LATENT: "EmptyLatentImage",
-			VAE: "VAELoader",
-		};
-
-		LiteGraph.slot_types_default_out = {
-			LATENT: "VAEDecode",
-			IMAGE: "SaveImage",
-			CLIP: "CLIPTextEncode",
-		};
+		this.suggestionsNumber = app.ui.settings.addSetting({
+			id: "Comfy.NodeSuggestions.number",
+			name: "number of nodes suggestions",
+			type: "slider",
+			attrs: {
+				min: 1,
+				max: 100,
+				step: 1,
+			},
+			defaultValue: 5,
+			onChange: (newVal, oldVal) => {
+				this.setDefaults(newVal);
+			}
+		});
 	},
+	slot_types_default_out: {},
+	slot_types_default_in: {},
+	async beforeRegisterNodeDef(nodeType, nodeData, app) {
+				var nodeId = nodeData.name;
+		var inputs = [];
+		inputs = nodeData["input"]["required"]; //only show required inputs to reduce the mess also not logical to create node with optional inputs
+		for (const inputKey in inputs) {
+			var input = (inputs[inputKey]);
+			if (typeof input[0] !== "string") continue;
+
+			var type = input[0]
+			if (type in ComfyWidgets) {
+				var customProperties = input[1]
+				if (!(customProperties?.forceInput)) continue; //ignore widgets that don't force input
+			}
+
+			if (!(type in this.slot_types_default_out)) {
+				this.slot_types_default_out[type] = ["Reroute"];
+			}
+			if (this.slot_types_default_out[type].includes(nodeId)) continue;
+			this.slot_types_default_out[type].push(nodeId);
+		} 
+
+		var outputs = nodeData["output"];
+		for (const key in outputs) {
+			var type = outputs[key];
+			if (!(type in this.slot_types_default_in)) {
+				this.slot_types_default_in[type] = ["Reroute"];// ["Reroute", "Primitive"];  primitive doesn't always work :'()
+			}
+
+			this.slot_types_default_in[type].push(nodeId);
+		}
+		var maxNum = this.suggestionsNumber.value;
+		this.setDefaults(maxNum);
+	},
+	setDefaults(maxNum) {
+
+		LiteGraph.slot_types_default_out = {};
+		LiteGraph.slot_types_default_in = {};
+
+		for (const type in this.slot_types_default_out) {
+			LiteGraph.slot_types_default_out[type] = this.slot_types_default_out[type].slice(0, maxNum);
+		}
+		for (const type in this.slot_types_default_in) {
+			LiteGraph.slot_types_default_in[type] = this.slot_types_default_in[type].slice(0, maxNum);
+		}
+	}
 });
--- a/web/extensions/core/snapToGrid.js
+++ b/web/extensions/core/snapToGrid.js
@ -9,7 +9,7 @@ app.registerExtension({
 		app.ui.settings.addSetting({
 			id: "Comfy.SnapToGrid.GridSize",
 			name: "Grid Size",
-			type: "number",
+			type: "slider",
 			attrs: {
 				min: 1,
 				max: 500,
--- a/web/scripts/api.js
+++ b/web/scripts/api.js
@ -35,7 +35,7 @@ class ComfyApi extends EventTarget {
 		}

 		let opened = false;
-		let existingSession = sessionStorage["Comfy.SessionId"] || "";
+		let existingSession = window.name;
 		if (existingSession) {
 			existingSession = "?clientId=" + existingSession;
 		}
@ -75,7 +75,7 @@ class ComfyApi extends EventTarget {
 					case "status":
 						if (msg.data.sid) {
 							this.clientId = msg.data.sid;
-							sessionStorage["Comfy.SessionId"] = this.clientId;
+							window.name = this.clientId;
 						}
 						this.dispatchEvent(new CustomEvent("status", { detail: msg.data.status }));
 						break;
--- a/web/scripts/app.js
+++ b/web/scripts/app.js
@ -20,6 +20,12 @@ export class ComfyApp {
 	 */
 	#processingQueue = false;

+	/**
+	 * Content Clipboard
+	 * @type {serialized node object}
+	 */
+	static clipspace = null;
+
 	constructor() {
 		this.ui = new ComfyUI(this);

@ -130,6 +136,83 @@ export class ComfyApp {
 					);
 				}
 			}
+
+			options.push(
+				{
+					content: "Copy (Clipspace)",
+					callback: (obj) => {
+						var widgets = null;
+						if(this.widgets) {
+						    widgets = this.widgets.map(({ type, name, value }) => ({ type, name, value }));
+						}
+						
+						let img = new Image();
+						var imgs = undefined;
+						if(this.imgs != undefined) {
+							img.src = this.imgs[0].src;
+							imgs = [img];
+						}
+
+						ComfyApp.clipspace = {
+							'widgets': widgets,
+							'imgs': imgs,
+							'original_imgs': imgs,
+							'images': this.images
+							};
+					}
+				});
+
+			if(ComfyApp.clipspace != null) {
+				options.push(
+					{
+						content: "Paste (Clipspace)",
+						callback: () => {
+							if(ComfyApp.clipspace != null) {
+								if(ComfyApp.clipspace.widgets != null && this.widgets != null) {
+									ComfyApp.clipspace.widgets.forEach(({ type, name, value }) => {
+										const prop = Object.values(this.widgets).find(obj => obj.type === type && obj.name === name);
+											if (prop) {
+												prop.callback(value);
+											}
+									});
+								}
+
+								// image paste
+								if(ComfyApp.clipspace.imgs != undefined && this.imgs != undefined && this.widgets != null) {
+									var filename = "";
+									if(this.images && ComfyApp.clipspace.images) {
+										this.images = ComfyApp.clipspace.images;
+									}
+
+									if(ComfyApp.clipspace.images != undefined) {
+										const clip_image = ComfyApp.clipspace.images[0];
+										if(clip_image.subfolder != '')
+											filename = `${clip_image.subfolder}/`;
+										filename += `${clip_image.filename} [${clip_image.type}]`;
+									}
+									else if(ComfyApp.clipspace.widgets != undefined) {
+										const index_in_clip = ComfyApp.clipspace.widgets.findIndex(obj => obj.name === 'image');
+										if(index_in_clip >= 0) {
+											filename = `${ComfyApp.clipspace.widgets[index_in_clip].value}`;
+										}
+									}
+
+									const index = this.widgets.findIndex(obj => obj.name === 'image');
+									if(index >= 0 && filename != "" && ComfyApp.clipspace.imgs != undefined) {
+										this.imgs = ComfyApp.clipspace.imgs;
+
+										this.widgets[index].value = filename;
+										if(this.widgets_values != undefined) {
+											this.widgets_values[index] = filename;
+										}
+									}
+								}
+								this.trigger('changed');
+							}
+						}
+					}
+				);
+			}
 		};
 	}

--- a/web/scripts/ui.js
+++ b/web/scripts/ui.js
@ -270,6 +270,30 @@ class ComfySettingsDialog extends ComfyDialog {
 								]),
 							]);
 							break;
+						case "slider":
+							element = $el("div", [
+								$el("label", { textContent: name }, [
+									$el("input", {
+										type: "range",
+										value,
+										oninput: (e) => {
+											setter(e.target.value);
+											e.target.nextElementSibling.value = e.target.value;
+										},
+										...attrs
+									}),
+									$el("input", {
+										type: "number",
+										value,
+										oninput: (e) => {
+											setter(e.target.value);
+											e.target.previousElementSibling.value = e.target.value;
+										},
+										...attrs
+									}),
+								]),
+							]);
+							break;
 						default:
 							console.warn("Unsupported setting type, defaulting to text");
 							element = $el("div", [
--- a/web/scripts/widgets.js
+++ b/web/scripts/widgets.js
@ -272,6 +272,9 @@ export const ComfyWidgets = {
 				app.graph.setDirtyCanvas(true);
 			};
 			img.src = `/view?filename=${name}&type=input`;
+			if ((node.size[1] - node.imageOffset) < 100) {
+				node.size[1] = 250 + node.imageOffset;
+			}
 		}

 		// Add our own callback to the combo widget to render an image when it changes
--- a/web/style.css
+++ b/web/style.css
@ -217,6 +217,14 @@ button.comfy-queue-btn {
 	z-index: 99;
 }

+.comfy-modal.comfy-settings input[type="range"] {
+	vertical-align: middle;
+}
+
+.comfy-modal.comfy-settings input[type="range"] + input[type="number"] {
+	width: 3.5em;
+}
+
 .comfy-modal input,
 .comfy-modal select {
 	color: var(--input-text);