From cb8d0ebccc93d3df6e00da1a57718a86d3dde300 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 3 Jun 2024 19:48:27 -0400
Subject: [PATCH 1/9] Don't load the view coordinates when loading a workflow
 from the history.

I think this makes things slightly less annoying for some users.
---
 web/scripts/app.js | 4 ++--
 web/scripts/ui.js  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/web/scripts/app.js b/web/scripts/app.js
index 4dc011b9f..f96d197a8 100644
--- a/web/scripts/app.js
+++ b/web/scripts/app.js
@@ -1800,7 +1800,7 @@ export class ComfyApp {
 	 * @param {*} graphData A serialized graph object
 	 * @param { boolean } clean If the graph state, e.g. images, should be cleared
 	 */
-	async loadGraphData(graphData, clean = true) {
+	async loadGraphData(graphData, clean = true, restore_view = true) {
 		if (clean !== false) {
 			this.clean();
 		}
@@ -1836,7 +1836,7 @@ export class ComfyApp {
 
 		try {
 			this.graph.configure(graphData);
-			if (this.enableWorkflowViewRestore.value && graphData.extra?.ds) {
+			if (restore_view && this.enableWorkflowViewRestore.value && graphData.extra?.ds) {
 				this.canvas.ds.offset = graphData.extra.ds.offset;
 				this.canvas.ds.scale = graphData.extra.ds.scale;
 			}
diff --git a/web/scripts/ui.js b/web/scripts/ui.js
index 36fed3238..72e43d357 100644
--- a/web/scripts/ui.js
+++ b/web/scripts/ui.js
@@ -228,7 +228,7 @@ class ComfyList {
 							$el("button", {
 								textContent: "Load",
 								onclick: async () => {
-									await app.loadGraphData(item.prompt[3].extra_pnginfo.workflow);
+									await app.loadGraphData(item.prompt[3].extra_pnginfo.workflow, true, false);
 									if (item.outputs) {
 										app.nodeOutputs = item.outputs;
 									}

From 20447e9ec92b7e7e3544a6fd2932c31c90333991 Mon Sep 17 00:00:00 2001
From: Denys Smirnov <dennwc@protonmail.com>
Date: Tue, 4 Jun 2024 23:37:11 +0300
Subject: [PATCH 2/9] Fix alpha in PorterDuffImageComposite. (#3411)

There were two bugs in PorterDuffImageComposite.

The first one is the fact that it uses the mask input directly as alpha, missing the conversion (`1-a`). The fix is similar to c16f5744.

The second one is that all color composition formulas assume alpha premultiplied values, while the input is not premultiplied.

This change fixes both of these issue.
---
 comfy_extras/nodes_compositing.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/comfy_extras/nodes_compositing.py b/comfy_extras/nodes_compositing.py
index 181b36ed6..48fe5e3dd 100644
--- a/comfy_extras/nodes_compositing.py
+++ b/comfy_extras/nodes_compositing.py
@@ -28,6 +28,14 @@ class PorterDuffMode(Enum):
 
 
 def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_image: torch.Tensor, dst_alpha: torch.Tensor, mode: PorterDuffMode):
+    # convert mask to alpha
+    src_alpha = 1 - src_alpha
+    dst_alpha = 1 - dst_alpha
+    # premultiply alpha
+    src_image = src_image * src_alpha
+    dst_image = dst_image * dst_alpha
+
+    # composite ops below assume alpha-premultiplied images
     if mode == PorterDuffMode.ADD:
         out_alpha = torch.clamp(src_alpha + dst_alpha, 0, 1)
         out_image = torch.clamp(src_image + dst_image, 0, 1)
@@ -35,7 +43,7 @@ def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_
         out_alpha = torch.zeros_like(dst_alpha)
         out_image = torch.zeros_like(dst_image)
     elif mode == PorterDuffMode.DARKEN:
-        out_alpha = src_alpha + dst_alpha  - src_alpha * dst_alpha
+        out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha
         out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image + torch.min(src_image, dst_image)
     elif mode == PorterDuffMode.DST:
         out_alpha = dst_alpha
@@ -84,8 +92,13 @@ def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_
         out_alpha = (1 - dst_alpha) * src_alpha + (1 - src_alpha) * dst_alpha
         out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image
     else:
-        out_alpha = None
-        out_image = None
+        return None, None
+
+    # back to non-premultiplied alpha
+    out_image = torch.where(out_alpha > 1e-5, out_image / out_alpha, torch.zeros_like(out_image))
+    out_image = torch.clamp(out_image, 0, 1)
+    # convert alpha to mask
+    out_alpha = 1 - out_alpha
     return out_image, out_alpha
 
 

From b1fd26fe9e55163f780bf9e5f56bf9bf5f035c93 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Tue, 4 Jun 2024 17:44:14 -0400
Subject: [PATCH 3/9] pytorch xpu should be flash or mem efficient attention?

---
 comfy/model_management.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index 3b9fad362..a5142d305 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -693,6 +693,8 @@ def pytorch_attention_flash_attention():
         #TODO: more reliable way of checking for flash attention?
         if is_nvidia(): #pytorch flash attention only works on Nvidia
             return True
+        if is_intel_xpu():
+            return True
     return False
 
 def force_upcast_attention_dtype():

From 104fcea0c8672b138a9bdd1ae00603c9240867c1 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Wed, 5 Jun 2024 19:14:56 -0400
Subject: [PATCH 4/9] Add function to get the list of currently loaded models.

---
 comfy/model_management.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/comfy/model_management.py b/comfy/model_management.py
index a5142d305..57aa8bca2 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -276,6 +276,7 @@ class LoadedModel:
         self.device = model.load_device
         self.weights_loaded = False
         self.real_model = None
+        self.currently_used = True
 
     def model_memory(self):
         return self.model.model_size()
@@ -365,6 +366,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
         if shift_model.device == device:
             if shift_model not in keep_loaded:
                 can_unload.append((sys.getrefcount(shift_model.model), shift_model.model_memory(), i))
+                shift_model.currently_used = False
 
     for x in sorted(can_unload):
         i = x[-1]
@@ -410,6 +412,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False):
                 current_loaded_models.pop(loaded_model_index).model_unload(unpatch_weights=True)
                 loaded = None
             else:
+                loaded.currently_used = True
                 models_already_loaded.append(loaded)
 
         if loaded is None:
@@ -466,6 +469,16 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False):
 def load_model_gpu(model):
     return load_models_gpu([model])
 
+def loaded_models(only_currently_used=False):
+    output = []
+    for m in current_loaded_models:
+        if only_currently_used:
+            if not m.currently_used:
+                continue
+
+        output.append(m.model)
+    return output
+
 def cleanup_models(keep_clone_weights_loaded=False):
     to_delete = []
     for i in range(len(current_loaded_models)):

From 0dccb4617de61b81763321f01ae527dbe3b01202 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Thu, 6 Jun 2024 14:49:45 -0400
Subject: [PATCH 5/9] Remove some unnecessary arguments.

---
 nodes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nodes.py b/nodes.py
index 34821ca3f..f454ff8cd 100644
--- a/nodes.py
+++ b/nodes.py
@@ -496,7 +496,7 @@ class CheckpointLoader:
 
     CATEGORY = "advanced/loaders"
 
-    def load_checkpoint(self, config_name, ckpt_name, output_vae=True, output_clip=True):
+    def load_checkpoint(self, config_name, ckpt_name):
         config_path = folder_paths.get_full_path("configs", config_name)
         ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
         return comfy.sd.load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))
@@ -511,7 +511,7 @@ class CheckpointLoaderSimple:
 
     CATEGORY = "loaders"
 
-    def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True):
+    def load_checkpoint(self, ckpt_name):
         ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
         out = comfy.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings"))
         return out[:3]

From 56333d48508f95bdef23870cad3239ba0ebdb8a9 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Jun 2024 03:05:23 -0400
Subject: [PATCH 6/9] Use the end token for the text encoder attention mask.

---
 comfy/sd1_clip.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
index ff6db0d20..e7ebf046d 100644
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -168,11 +168,11 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         attention_mask = None
         if self.enable_attention_masks:
             attention_mask = torch.zeros_like(tokens)
-            max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
+            end_token = self.special_tokens.get("end", -1)
             for x in range(attention_mask.shape[0]):
                 for y in range(attention_mask.shape[1]):
                     attention_mask[x, y] = 1
-                    if tokens[x, y] == max_token:
+                    if tokens[x, y] == end_token:
                         break
 
         outputs = self.transformer(tokens, attention_mask, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state)

From 6cd8ffc465ed363b078249b081ea3f975e77cf15 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sat, 8 Jun 2024 02:16:55 -0400
Subject: [PATCH 7/9] Reshape the empty latent image to the right amount of
 channels if needed.

---
 comfy/latent_formats.py              |  2 ++
 comfy/sample.py                      |  6 ++++++
 comfy/utils.py                       | 10 +++++-----
 comfy_extras/nodes_custom_sampler.py |  2 ++
 nodes.py                             |  2 ++
 5 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py
index 4ca466d9a..69192bc62 100644
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -2,6 +2,7 @@ import torch
 
 class LatentFormat:
     scale_factor = 1.0
+    latent_channels = 4
     latent_rgb_factors = None
     taesd_decoder_name = None
 
@@ -72,6 +73,7 @@ class SD_X4(LatentFormat):
         ]
 
 class SC_Prior(LatentFormat):
+    latent_channels = 16
     def __init__(self):
         self.scale_factor = 1.0
         self.latent_rgb_factors = [
diff --git a/comfy/sample.py b/comfy/sample.py
index e51bd67d6..98dcaca7f 100644
--- a/comfy/sample.py
+++ b/comfy/sample.py
@@ -24,6 +24,12 @@ def prepare_noise(latent_image, seed, noise_inds=None):
     noises = torch.cat(noises, axis=0)
     return noises
 
+def fix_empty_latent_channels(model, latent_image):
+    latent_channels = model.get_model_object("latent_format").latent_channels #Resize the empty latent image so it has the right number of channels
+    if latent_channels != latent_image.shape[1] and torch.count_nonzero(latent_image) == 0:
+        latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_channels, dim=1)
+    return latent_image
+
 def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
     logging.warning("Warning: comfy.sample.prepare_sampling isn't used anymore and can be removed")
     return model, positive, negative, noise_mask, []
diff --git a/comfy/utils.py b/comfy/utils.py
index ab47b8f28..884404cce 100644
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -249,11 +249,11 @@ def unet_to_diffusers(unet_config):
 
     return diffusers_unet_map
 
-def repeat_to_batch_size(tensor, batch_size):
-    if tensor.shape[0] > batch_size:
-        return tensor[:batch_size]
-    elif tensor.shape[0] < batch_size:
-        return tensor.repeat([math.ceil(batch_size / tensor.shape[0])] + [1] * (len(tensor.shape) - 1))[:batch_size]
+def repeat_to_batch_size(tensor, batch_size, dim=0):
+    if tensor.shape[dim] > batch_size:
+        return tensor.narrow(dim, 0, batch_size)
+    elif tensor.shape[dim] < batch_size:
+        return tensor.repeat(dim * [1] + [math.ceil(batch_size / tensor.shape[dim])] + [1] * (len(tensor.shape) - 1 - dim)).narrow(dim, 0, batch_size)
     return tensor
 
 def resize_to_batch_size(tensor, batch_size):
diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py
index 47f08bf60..45ef8cf40 100644
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@@ -380,6 +380,7 @@ class SamplerCustom:
     def sample(self, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image):
         latent = latent_image
         latent_image = latent["samples"]
+        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)
         if not add_noise:
             noise = Noise_EmptyNoise().generate_noise(latent)
         else:
@@ -538,6 +539,7 @@ class SamplerCustomAdvanced:
     def sample(self, noise, guider, sampler, sigmas, latent_image):
         latent = latent_image
         latent_image = latent["samples"]
+        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image)
 
         noise_mask = None
         if "noise_mask" in latent:
diff --git a/nodes.py b/nodes.py
index f454ff8cd..b744b53f0 100644
--- a/nodes.py
+++ b/nodes.py
@@ -1299,6 +1299,8 @@ class SetLatentNoiseMask:
 
 def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
     latent_image = latent["samples"]
+    latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)
+
     if disable_noise:
         noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
     else:

From 742d5720d1b128c78266bfd7156fb578d664a95a Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sun, 9 Jun 2024 16:41:04 -0400
Subject: [PATCH 8/9] Support zeroing out text embeddings with the attention
 mask.

---
 comfy/sd1_clip.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py
index e7ebf046d..2729f14d8 100644
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -68,7 +68,8 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
     ]
     def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
                  freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=comfy.clip_model.CLIPTextModel,
-                 special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, return_projected_pooled=True):  # clip-vit-base-patch32
+                 special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False,
+                 return_projected_pooled=True):  # clip-vit-base-patch32
         super().__init__()
         assert layer in self.LAYERS
 
@@ -90,6 +91,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
 
         self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055))
         self.enable_attention_masks = enable_attention_masks
+        self.zero_out_masked = zero_out_masked
 
         self.layer_norm_hidden_state = layer_norm_hidden_state
         self.return_projected_pooled = return_projected_pooled
@@ -179,9 +181,12 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
         self.transformer.set_input_embeddings(backup_embeds)
 
         if self.layer == "last":
-            z = outputs[0]
+            z = outputs[0].float()
         else:
-            z = outputs[1]
+            z = outputs[1].float()
+
+        if self.zero_out_masked and attention_mask is not None:
+            z *= attention_mask.unsqueeze(-1).float()
 
         pooled_output = None
         if len(outputs) >= 3:
@@ -190,7 +195,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
             elif outputs[2] is not None:
                 pooled_output = outputs[2].float()
 
-        return z.float(), pooled_output
+        return z, pooled_output
 
     def encode(self, tokens):
         return self(tokens)

From a5e6a632f9f16e5b3c72c428820bce67b05446bf Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Mon, 10 Jun 2024 01:05:53 -0400
Subject: [PATCH 9/9] Support sampling non 2D latents.

---
 comfy/samplers.py | 93 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 63 insertions(+), 30 deletions(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 29962a916..656e0a28f 100644
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -8,7 +8,8 @@ import logging
 import comfy.sampler_helpers
 
 def get_area_and_mult(conds, x_in, timestep_in):
-    area = (x_in.shape[2], x_in.shape[3], 0, 0)
+    dims = tuple(x_in.shape[2:])
+    area = None
     strength = 1.0
 
     if 'timestep_start' in conds:
@@ -20,11 +21,16 @@ def get_area_and_mult(conds, x_in, timestep_in):
         if timestep_in[0] < timestep_end:
             return None
     if 'area' in conds:
-        area = conds['area']
+        area = list(conds['area'])
     if 'strength' in conds:
         strength = conds['strength']
 
-    input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]]
+    input_x = x_in
+    if area is not None:
+        for i in range(len(dims)):
+            area[i] = min(input_x.shape[i + 2] - area[len(dims) + i], area[i])
+            input_x = input_x.narrow(i + 2, area[len(dims) + i], area[i])
+
     if 'mask' in conds:
         # Scale the mask to the size of the input
         # The mask should have been resized as we began the sampling process
@@ -32,28 +38,30 @@ def get_area_and_mult(conds, x_in, timestep_in):
         if "mask_strength" in conds:
             mask_strength = conds["mask_strength"]
         mask = conds['mask']
-        assert(mask.shape[1] == x_in.shape[2])
-        assert(mask.shape[2] == x_in.shape[3])
-        mask = mask[:input_x.shape[0],area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength
+        assert(mask.shape[1:] == x_in.shape[2:])
+
+        mask = mask[:input_x.shape[0]]
+        if area is not None:
+            for i in range(len(dims)):
+                mask = mask.narrow(i + 1, area[len(dims) + i], area[i])
+
+        mask = mask * mask_strength
         mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1)
     else:
         mask = torch.ones_like(input_x)
     mult = mask * strength
 
-    if 'mask' not in conds:
+    if 'mask' not in conds and area is not None:
         rr = 8
-        if area[2] != 0:
-            for t in range(rr):
-                mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1))
-        if (area[0] + area[2]) < x_in.shape[2]:
-            for t in range(rr):
-                mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1))
-        if area[3] != 0:
-            for t in range(rr):
-                mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1))
-        if (area[1] + area[3]) < x_in.shape[3]:
-            for t in range(rr):
-                mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1))
+        for i in range(len(dims)):
+            if area[len(dims) + i] != 0:
+                for t in range(rr):
+                    m = mult.narrow(i + 2, t, 1)
+                    m *= ((1.0/rr) * (t + 1))
+            if (area[i] + area[len(dims) + i]) < x_in.shape[i + 2]:
+                for t in range(rr):
+                    m = mult.narrow(i + 2, area[i] - 1 - t, 1)
+                    m *= ((1.0/rr) * (t + 1))
 
     conditioning = {}
     model_conds = conds["model_conds"]
@@ -219,8 +227,19 @@ def calc_cond_batch(model, conds, x_in, timestep, model_options):
 
         for o in range(batch_chunks):
             cond_index = cond_or_uncond[o]
-            out_conds[cond_index][:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o]
-            out_counts[cond_index][:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o]
+            a = area[o]
+            if a is None:
+                out_conds[cond_index] += output[o] * mult[o]
+                out_counts[cond_index] += mult[o]
+            else:
+                out_c = out_conds[cond_index]
+                out_cts = out_counts[cond_index]
+                dims = len(a) // 2
+                for i in range(dims):
+                    out_c = out_c.narrow(i + 2, a[i + dims], a[i])
+                    out_cts = out_cts.narrow(i + 2, a[i + dims], a[i])
+                out_c += output[o] * mult[o]
+                out_cts += mult[o]
 
     for i in range(len(out_conds)):
         out_conds[i] /= out_counts[i]
@@ -335,7 +354,7 @@ def get_mask_aabb(masks):
 
     return bounding_boxes, is_empty
 
-def resolve_areas_and_cond_masks(conditions, h, w, device):
+def resolve_areas_and_cond_masks_multidim(conditions, dims, device):
     # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes.
     # While we're doing this, we can also resolve the mask device and scaling for performance reasons
     for i in range(len(conditions)):
@@ -344,7 +363,14 @@ def resolve_areas_and_cond_masks(conditions, h, w, device):
             area = c['area']
             if area[0] == "percentage":
                 modified = c.copy()
-                area = (max(1, round(area[1] * h)), max(1, round(area[2] * w)), round(area[3] * h), round(area[4] * w))
+                a = area[1:]
+                a_len = len(a) // 2
+                area = ()
+                for d in range(len(dims)):
+                    area += (max(1, round(a[d] * dims[d])),)
+                for d in range(len(dims)):
+                    area += (round(a[d + a_len] * dims[d]),)
+
                 modified['area'] = area
                 c = modified
                 conditions[i] = c
@@ -353,12 +379,12 @@ def resolve_areas_and_cond_masks(conditions, h, w, device):
             mask = c['mask']
             mask = mask.to(device=device)
             modified = c.copy()
-            if len(mask.shape) == 2:
+            if len(mask.shape) == len(dims):
                 mask = mask.unsqueeze(0)
-            if mask.shape[1] != h or mask.shape[2] != w:
-                mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(h, w), mode='bilinear', align_corners=False).squeeze(1)
+            if mask.shape[1:] != dims:
+                mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=dims, mode='bilinear', align_corners=False).squeeze(1)
 
-            if modified.get("set_area_to_bounds", False):
+            if modified.get("set_area_to_bounds", False): #TODO: handle dim != 2
                 bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0)
                 boxes, is_empty = get_mask_aabb(bounds)
                 if is_empty[0]:
@@ -375,7 +401,11 @@ def resolve_areas_and_cond_masks(conditions, h, w, device):
             modified['mask'] = mask
             conditions[i] = modified
 
-def create_cond_with_same_area_if_none(conds, c):
+def resolve_areas_and_cond_masks(conditions, h, w, device):
+    logging.warning("WARNING: The comfy.samplers.resolve_areas_and_cond_masks function is deprecated please use the resolve_areas_and_cond_masks_multidim one instead.")
+    return resolve_areas_and_cond_masks_multidim(conditions, [h, w], device)
+
+def create_cond_with_same_area_if_none(conds, c): #TODO: handle dim != 2
     if 'area' not in c:
         return
 
@@ -479,7 +509,10 @@ def encode_model_conds(model_function, conds, noise, device, prompt_type, **kwar
         params = x.copy()
         params["device"] = device
         params["noise"] = noise
-        params["width"] = params.get("width", noise.shape[3] * 8)
+        default_width = None
+        if len(noise.shape) >= 4: #TODO: 8 multiple should be set by the model
+            default_width = noise.shape[3] * 8
+        params["width"] = params.get("width", default_width)
         params["height"] = params.get("height", noise.shape[2] * 8)
         params["prompt_type"] = params.get("prompt_type", prompt_type)
         for k in kwargs:
@@ -567,7 +600,7 @@ def ksampler(sampler_name, extra_options={}, inpaint_options={}):
 def process_conds(model, noise, conds, device, latent_image=None, denoise_mask=None, seed=None):
     for k in conds:
         conds[k] = conds[k][:]
-        resolve_areas_and_cond_masks(conds[k], noise.shape[2], noise.shape[3], device)
+        resolve_areas_and_cond_masks_multidim(conds[k], noise.shape[2:], device)
 
     for k in conds:
         calculate_start_end_timesteps(model, conds[k])