Merge branch 'master' of github.com:comfyanonymous/ComfyUI

2026-07-18 12:28:17 +08:00 · 2024-04-29 13:37:03 -07:00 · 2024-04-29 13:37:03 -07:00 · 0862863bc0
commit 0862863bc0
parent 46a712b4d6 059773a6df
4 changed files with 131 additions and 3 deletions
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -37,7 +37,7 @@ def get_area_and_mult(conds, x_in, timestep_in):
        mask = conds['mask']
        assert(mask.shape[1] == x_in.shape[2])
        assert(mask.shape[2] == x_in.shape[3])
-        mask = mask[:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength
+        mask = mask[:input_x.shape[0],area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength
        mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1)
    else:
        mask = torch.ones_like(input_x)
--- a/comfy_extras/nodes/nodes_align_your_steps.py
+++ b/comfy_extras/nodes/nodes_align_your_steps.py
@ -25,6 +25,7 @@ class AlignYourStepsScheduler:
        return {"required":
                    {"model_type": (["SD1", "SDXL", "SVD"], ),
                     "steps": ("INT", {"default": 10, "min": 10, "max": 10000}),
+                     "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
                      }
               }
    RETURN_TYPES = ("SIGMAS",)
@ -32,11 +33,18 @@ class AlignYourStepsScheduler:

    FUNCTION = "get_sigmas"

-    def get_sigmas(self, model_type, steps):
+    def get_sigmas(self, model_type, steps, denoise):
+        total_steps = steps
+        if denoise < 1.0:
+            if denoise <= 0.0:
+                return (torch.FloatTensor([]),)
+            total_steps = round(steps * denoise)
+
        sigmas = NOISE_LEVELS[model_type][:]
        if (steps + 1) != len(sigmas):
            sigmas = loglinear_interp(sigmas, steps + 1)

+        sigmas = sigmas[-(total_steps + 1):]
        sigmas[-1] = 0
        return (torch.FloatTensor(sigmas), )

--- a/comfy_extras/nodes/nodes_attention_multiply.py
+++ b/comfy_extras/nodes/nodes_attention_multiply.py
@ -0,0 +1,120 @@
+
+def attention_multiply(attn, model, q, k, v, out):
+    m = model.clone()
+    sd = model.model_state_dict()
+
+    for key in sd:
+        if key.endswith("{}.to_q.bias".format(attn)) or key.endswith("{}.to_q.weight".format(attn)):
+            m.add_patches({key: (None,)}, 0.0, q)
+        if key.endswith("{}.to_k.bias".format(attn)) or key.endswith("{}.to_k.weight".format(attn)):
+            m.add_patches({key: (None,)}, 0.0, k)
+        if key.endswith("{}.to_v.bias".format(attn)) or key.endswith("{}.to_v.weight".format(attn)):
+            m.add_patches({key: (None,)}, 0.0, v)
+        if key.endswith("{}.to_out.0.bias".format(attn)) or key.endswith("{}.to_out.0.weight".format(attn)):
+            m.add_patches({key: (None,)}, 0.0, out)
+
+    return m
+
+
+class UNetSelfAttentionMultiply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing/attention_experiments"
+
+    def patch(self, model, q, k, v, out):
+        m = attention_multiply("attn1", model, q, k, v, out)
+        return (m, )
+
+class UNetCrossAttentionMultiply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing/attention_experiments"
+
+    def patch(self, model, q, k, v, out):
+        m = attention_multiply("attn2", model, q, k, v, out)
+        return (m, )
+
+class CLIPAttentionMultiply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "clip": ("CLIP",),
+                              "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("CLIP",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing/attention_experiments"
+
+    def patch(self, clip, q, k, v, out):
+        m = clip.clone()
+        sd = m.patcher.model_state_dict()
+
+        for key in sd:
+            if key.endswith("self_attn.q_proj.weight") or key.endswith("self_attn.q_proj.bias"):
+                m.add_patches({key: (None,)}, 0.0, q)
+            if key.endswith("self_attn.k_proj.weight") or key.endswith("self_attn.k_proj.bias"):
+                m.add_patches({key: (None,)}, 0.0, k)
+            if key.endswith("self_attn.v_proj.weight") or key.endswith("self_attn.v_proj.bias"):
+                m.add_patches({key: (None,)}, 0.0, v)
+            if key.endswith("self_attn.out_proj.weight") or key.endswith("self_attn.out_proj.bias"):
+                m.add_patches({key: (None,)}, 0.0, out)
+        return (m, )
+
+class UNetTemporalAttentionMultiply:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "model": ("MODEL",),
+                              "self_structural": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "self_temporal": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "cross_structural": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              "cross_temporal": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                              }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing/attention_experiments"
+
+    def patch(self, model, self_structural, self_temporal, cross_structural, cross_temporal):
+        m = model.clone()
+        sd = model.model_state_dict()
+
+        for k in sd:
+            if (k.endswith("attn1.to_out.0.bias") or k.endswith("attn1.to_out.0.weight")):
+                if '.time_stack.' in k:
+                    m.add_patches({k: (None,)}, 0.0, self_temporal)
+                else:
+                    m.add_patches({k: (None,)}, 0.0, self_structural)
+            elif (k.endswith("attn2.to_out.0.bias") or k.endswith("attn2.to_out.0.weight")):
+                if '.time_stack.' in k:
+                    m.add_patches({k: (None,)}, 0.0, cross_temporal)
+                else:
+                    m.add_patches({k: (None,)}, 0.0, cross_structural)
+        return (m, )
+
+NODE_CLASS_MAPPINGS = {
+    "UNetSelfAttentionMultiply": UNetSelfAttentionMultiply,
+    "UNetCrossAttentionMultiply": UNetCrossAttentionMultiply,
+    "CLIPAttentionMultiply": CLIPAttentionMultiply,
+    "UNetTemporalAttentionMultiply": UNetTemporalAttentionMultiply,
+}
--- a/comfy_extras/nodes/nodes_upscale_model.py
+++ b/comfy_extras/nodes/nodes_upscale_model.py
@ -42,7 +42,7 @@ class ImageUpscaleWithModel:
        device = model_management.get_torch_device()

        memory_required = model_management.module_size(upscale_model)
-        memory_required += (512 * 512 * 3) * image.element_size() * max(upscale_model.scale, 1.0) * 256.0  # The 256.0 is an estimate of how much some of these models take, TODO: make it more accurate
+        memory_required += (512 * 512 * 3) * image.element_size() * max(upscale_model.scale, 1.0) * 384.0  # The 384.0 is an estimate of how much some of these models take, TODO: make it more accurate
        memory_required += image.nelement() * image.element_size()
        model_management.free_memory(memory_required, device)