diff --git a/comfy/samplers.py b/comfy/samplers.py index 2a81ef43e..8df139efb 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -37,7 +37,7 @@ def get_area_and_mult(conds, x_in, timestep_in): mask = conds['mask'] assert(mask.shape[1] == x_in.shape[2]) assert(mask.shape[2] == x_in.shape[3]) - mask = mask[:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength + mask = mask[:input_x.shape[0],area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1) else: mask = torch.ones_like(input_x) diff --git a/comfy_extras/nodes/nodes_align_your_steps.py b/comfy_extras/nodes/nodes_align_your_steps.py index b59f6945b..3ffe53187 100644 --- a/comfy_extras/nodes/nodes_align_your_steps.py +++ b/comfy_extras/nodes/nodes_align_your_steps.py @@ -25,6 +25,7 @@ class AlignYourStepsScheduler: return {"required": {"model_type": (["SD1", "SDXL", "SVD"], ), "steps": ("INT", {"default": 10, "min": 10, "max": 10000}), + "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), } } RETURN_TYPES = ("SIGMAS",) @@ -32,11 +33,18 @@ class AlignYourStepsScheduler: FUNCTION = "get_sigmas" - def get_sigmas(self, model_type, steps): + def get_sigmas(self, model_type, steps, denoise): + total_steps = steps + if denoise < 1.0: + if denoise <= 0.0: + return (torch.FloatTensor([]),) + total_steps = round(steps * denoise) + sigmas = NOISE_LEVELS[model_type][:] if (steps + 1) != len(sigmas): sigmas = loglinear_interp(sigmas, steps + 1) + sigmas = sigmas[-(total_steps + 1):] sigmas[-1] = 0 return (torch.FloatTensor(sigmas), ) diff --git a/comfy_extras/nodes/nodes_attention_multiply.py b/comfy_extras/nodes/nodes_attention_multiply.py new file mode 100644 index 000000000..4747eb395 --- /dev/null +++ b/comfy_extras/nodes/nodes_attention_multiply.py @@ -0,0 +1,120 @@ + +def attention_multiply(attn, model, q, k, v, out): + m = model.clone() + sd = model.model_state_dict() + + for key in sd: + if key.endswith("{}.to_q.bias".format(attn)) or key.endswith("{}.to_q.weight".format(attn)): + m.add_patches({key: (None,)}, 0.0, q) + if key.endswith("{}.to_k.bias".format(attn)) or key.endswith("{}.to_k.weight".format(attn)): + m.add_patches({key: (None,)}, 0.0, k) + if key.endswith("{}.to_v.bias".format(attn)) or key.endswith("{}.to_v.weight".format(attn)): + m.add_patches({key: (None,)}, 0.0, v) + if key.endswith("{}.to_out.0.bias".format(attn)) or key.endswith("{}.to_out.0.weight".format(attn)): + m.add_patches({key: (None,)}, 0.0, out) + + return m + + +class UNetSelfAttentionMultiply: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing/attention_experiments" + + def patch(self, model, q, k, v, out): + m = attention_multiply("attn1", model, q, k, v, out) + return (m, ) + +class UNetCrossAttentionMultiply: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing/attention_experiments" + + def patch(self, model, q, k, v, out): + m = attention_multiply("attn2", model, q, k, v, out) + return (m, ) + +class CLIPAttentionMultiply: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip": ("CLIP",), + "q": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "k": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "v": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "out": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "patch" + + CATEGORY = "_for_testing/attention_experiments" + + def patch(self, clip, q, k, v, out): + m = clip.clone() + sd = m.patcher.model_state_dict() + + for key in sd: + if key.endswith("self_attn.q_proj.weight") or key.endswith("self_attn.q_proj.bias"): + m.add_patches({key: (None,)}, 0.0, q) + if key.endswith("self_attn.k_proj.weight") or key.endswith("self_attn.k_proj.bias"): + m.add_patches({key: (None,)}, 0.0, k) + if key.endswith("self_attn.v_proj.weight") or key.endswith("self_attn.v_proj.bias"): + m.add_patches({key: (None,)}, 0.0, v) + if key.endswith("self_attn.out_proj.weight") or key.endswith("self_attn.out_proj.bias"): + m.add_patches({key: (None,)}, 0.0, out) + return (m, ) + +class UNetTemporalAttentionMultiply: + @classmethod + def INPUT_TYPES(s): + return {"required": { "model": ("MODEL",), + "self_structural": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "self_temporal": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "cross_structural": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + "cross_temporal": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing/attention_experiments" + + def patch(self, model, self_structural, self_temporal, cross_structural, cross_temporal): + m = model.clone() + sd = model.model_state_dict() + + for k in sd: + if (k.endswith("attn1.to_out.0.bias") or k.endswith("attn1.to_out.0.weight")): + if '.time_stack.' in k: + m.add_patches({k: (None,)}, 0.0, self_temporal) + else: + m.add_patches({k: (None,)}, 0.0, self_structural) + elif (k.endswith("attn2.to_out.0.bias") or k.endswith("attn2.to_out.0.weight")): + if '.time_stack.' in k: + m.add_patches({k: (None,)}, 0.0, cross_temporal) + else: + m.add_patches({k: (None,)}, 0.0, cross_structural) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "UNetSelfAttentionMultiply": UNetSelfAttentionMultiply, + "UNetCrossAttentionMultiply": UNetCrossAttentionMultiply, + "CLIPAttentionMultiply": CLIPAttentionMultiply, + "UNetTemporalAttentionMultiply": UNetTemporalAttentionMultiply, +} diff --git a/comfy_extras/nodes/nodes_upscale_model.py b/comfy_extras/nodes/nodes_upscale_model.py index 4baf726b0..544ed9e9b 100644 --- a/comfy_extras/nodes/nodes_upscale_model.py +++ b/comfy_extras/nodes/nodes_upscale_model.py @@ -42,7 +42,7 @@ class ImageUpscaleWithModel: device = model_management.get_torch_device() memory_required = model_management.module_size(upscale_model) - memory_required += (512 * 512 * 3) * image.element_size() * max(upscale_model.scale, 1.0) * 256.0 # The 256.0 is an estimate of how much some of these models take, TODO: make it more accurate + memory_required += (512 * 512 * 3) * image.element_size() * max(upscale_model.scale, 1.0) * 384.0 # The 384.0 is an estimate of how much some of these models take, TODO: make it more accurate memory_required += image.nelement() * image.element_size() model_management.free_memory(memory_required, device)