From cb8d0ebccc93d3df6e00da1a57718a86d3dde300 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 3 Jun 2024 19:48:27 -0400 Subject: [PATCH 1/9] Don't load the view coordinates when loading a workflow from the history. I think this makes things slightly less annoying for some users. --- web/scripts/app.js | 4 ++-- web/scripts/ui.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/web/scripts/app.js b/web/scripts/app.js index 4dc011b9f..f96d197a8 100644 --- a/web/scripts/app.js +++ b/web/scripts/app.js @@ -1800,7 +1800,7 @@ export class ComfyApp { * @param {*} graphData A serialized graph object * @param { boolean } clean If the graph state, e.g. images, should be cleared */ - async loadGraphData(graphData, clean = true) { + async loadGraphData(graphData, clean = true, restore_view = true) { if (clean !== false) { this.clean(); } @@ -1836,7 +1836,7 @@ export class ComfyApp { try { this.graph.configure(graphData); - if (this.enableWorkflowViewRestore.value && graphData.extra?.ds) { + if (restore_view && this.enableWorkflowViewRestore.value && graphData.extra?.ds) { this.canvas.ds.offset = graphData.extra.ds.offset; this.canvas.ds.scale = graphData.extra.ds.scale; } diff --git a/web/scripts/ui.js b/web/scripts/ui.js index 36fed3238..72e43d357 100644 --- a/web/scripts/ui.js +++ b/web/scripts/ui.js @@ -228,7 +228,7 @@ class ComfyList { $el("button", { textContent: "Load", onclick: async () => { - await app.loadGraphData(item.prompt[3].extra_pnginfo.workflow); + await app.loadGraphData(item.prompt[3].extra_pnginfo.workflow, true, false); if (item.outputs) { app.nodeOutputs = item.outputs; } From 20447e9ec92b7e7e3544a6fd2932c31c90333991 Mon Sep 17 00:00:00 2001 From: Denys Smirnov Date: Tue, 4 Jun 2024 23:37:11 +0300 Subject: [PATCH 2/9] Fix alpha in PorterDuffImageComposite. (#3411) There were two bugs in PorterDuffImageComposite. The first one is the fact that it uses the mask input directly as alpha, missing the conversion (`1-a`). The fix is similar to c16f5744. The second one is that all color composition formulas assume alpha premultiplied values, while the input is not premultiplied. This change fixes both of these issue. --- comfy_extras/nodes_compositing.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/comfy_extras/nodes_compositing.py b/comfy_extras/nodes_compositing.py index 181b36ed6..48fe5e3dd 100644 --- a/comfy_extras/nodes_compositing.py +++ b/comfy_extras/nodes_compositing.py @@ -28,6 +28,14 @@ class PorterDuffMode(Enum): def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_image: torch.Tensor, dst_alpha: torch.Tensor, mode: PorterDuffMode): + # convert mask to alpha + src_alpha = 1 - src_alpha + dst_alpha = 1 - dst_alpha + # premultiply alpha + src_image = src_image * src_alpha + dst_image = dst_image * dst_alpha + + # composite ops below assume alpha-premultiplied images if mode == PorterDuffMode.ADD: out_alpha = torch.clamp(src_alpha + dst_alpha, 0, 1) out_image = torch.clamp(src_image + dst_image, 0, 1) @@ -35,7 +43,7 @@ def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_ out_alpha = torch.zeros_like(dst_alpha) out_image = torch.zeros_like(dst_image) elif mode == PorterDuffMode.DARKEN: - out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha + out_alpha = src_alpha + dst_alpha - src_alpha * dst_alpha out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image + torch.min(src_image, dst_image) elif mode == PorterDuffMode.DST: out_alpha = dst_alpha @@ -84,8 +92,13 @@ def porter_duff_composite(src_image: torch.Tensor, src_alpha: torch.Tensor, dst_ out_alpha = (1 - dst_alpha) * src_alpha + (1 - src_alpha) * dst_alpha out_image = (1 - dst_alpha) * src_image + (1 - src_alpha) * dst_image else: - out_alpha = None - out_image = None + return None, None + + # back to non-premultiplied alpha + out_image = torch.where(out_alpha > 1e-5, out_image / out_alpha, torch.zeros_like(out_image)) + out_image = torch.clamp(out_image, 0, 1) + # convert alpha to mask + out_alpha = 1 - out_alpha return out_image, out_alpha From b1fd26fe9e55163f780bf9e5f56bf9bf5f035c93 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 4 Jun 2024 17:44:14 -0400 Subject: [PATCH 3/9] pytorch xpu should be flash or mem efficient attention? --- comfy/model_management.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comfy/model_management.py b/comfy/model_management.py index 3b9fad362..a5142d305 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -693,6 +693,8 @@ def pytorch_attention_flash_attention(): #TODO: more reliable way of checking for flash attention? if is_nvidia(): #pytorch flash attention only works on Nvidia return True + if is_intel_xpu(): + return True return False def force_upcast_attention_dtype(): From 104fcea0c8672b138a9bdd1ae00603c9240867c1 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 5 Jun 2024 19:14:56 -0400 Subject: [PATCH 4/9] Add function to get the list of currently loaded models. --- comfy/model_management.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/comfy/model_management.py b/comfy/model_management.py index a5142d305..57aa8bca2 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -276,6 +276,7 @@ class LoadedModel: self.device = model.load_device self.weights_loaded = False self.real_model = None + self.currently_used = True def model_memory(self): return self.model.model_size() @@ -365,6 +366,7 @@ def free_memory(memory_required, device, keep_loaded=[]): if shift_model.device == device: if shift_model not in keep_loaded: can_unload.append((sys.getrefcount(shift_model.model), shift_model.model_memory(), i)) + shift_model.currently_used = False for x in sorted(can_unload): i = x[-1] @@ -410,6 +412,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False): current_loaded_models.pop(loaded_model_index).model_unload(unpatch_weights=True) loaded = None else: + loaded.currently_used = True models_already_loaded.append(loaded) if loaded is None: @@ -466,6 +469,16 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False): def load_model_gpu(model): return load_models_gpu([model]) +def loaded_models(only_currently_used=False): + output = [] + for m in current_loaded_models: + if only_currently_used: + if not m.currently_used: + continue + + output.append(m.model) + return output + def cleanup_models(keep_clone_weights_loaded=False): to_delete = [] for i in range(len(current_loaded_models)): From 0dccb4617de61b81763321f01ae527dbe3b01202 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Jun 2024 14:49:45 -0400 Subject: [PATCH 5/9] Remove some unnecessary arguments. --- nodes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nodes.py b/nodes.py index 34821ca3f..f454ff8cd 100644 --- a/nodes.py +++ b/nodes.py @@ -496,7 +496,7 @@ class CheckpointLoader: CATEGORY = "advanced/loaders" - def load_checkpoint(self, config_name, ckpt_name, output_vae=True, output_clip=True): + def load_checkpoint(self, config_name, ckpt_name): config_path = folder_paths.get_full_path("configs", config_name) ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name) return comfy.sd.load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings")) @@ -511,7 +511,7 @@ class CheckpointLoaderSimple: CATEGORY = "loaders" - def load_checkpoint(self, ckpt_name, output_vae=True, output_clip=True): + def load_checkpoint(self, ckpt_name): ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name) out = comfy.sd.load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, embedding_directory=folder_paths.get_folder_paths("embeddings")) return out[:3] From 56333d48508f95bdef23870cad3239ba0ebdb8a9 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Jun 2024 03:05:23 -0400 Subject: [PATCH 6/9] Use the end token for the text encoder attention mask. --- comfy/sd1_clip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index ff6db0d20..e7ebf046d 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -168,11 +168,11 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): attention_mask = None if self.enable_attention_masks: attention_mask = torch.zeros_like(tokens) - max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + end_token = self.special_tokens.get("end", -1) for x in range(attention_mask.shape[0]): for y in range(attention_mask.shape[1]): attention_mask[x, y] = 1 - if tokens[x, y] == max_token: + if tokens[x, y] == end_token: break outputs = self.transformer(tokens, attention_mask, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) From 6cd8ffc465ed363b078249b081ea3f975e77cf15 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 8 Jun 2024 02:16:55 -0400 Subject: [PATCH 7/9] Reshape the empty latent image to the right amount of channels if needed. --- comfy/latent_formats.py | 2 ++ comfy/sample.py | 6 ++++++ comfy/utils.py | 10 +++++----- comfy_extras/nodes_custom_sampler.py | 2 ++ nodes.py | 2 ++ 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 4ca466d9a..69192bc62 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -2,6 +2,7 @@ import torch class LatentFormat: scale_factor = 1.0 + latent_channels = 4 latent_rgb_factors = None taesd_decoder_name = None @@ -72,6 +73,7 @@ class SD_X4(LatentFormat): ] class SC_Prior(LatentFormat): + latent_channels = 16 def __init__(self): self.scale_factor = 1.0 self.latent_rgb_factors = [ diff --git a/comfy/sample.py b/comfy/sample.py index e51bd67d6..98dcaca7f 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -24,6 +24,12 @@ def prepare_noise(latent_image, seed, noise_inds=None): noises = torch.cat(noises, axis=0) return noises +def fix_empty_latent_channels(model, latent_image): + latent_channels = model.get_model_object("latent_format").latent_channels #Resize the empty latent image so it has the right number of channels + if latent_channels != latent_image.shape[1] and torch.count_nonzero(latent_image) == 0: + latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_channels, dim=1) + return latent_image + def prepare_sampling(model, noise_shape, positive, negative, noise_mask): logging.warning("Warning: comfy.sample.prepare_sampling isn't used anymore and can be removed") return model, positive, negative, noise_mask, [] diff --git a/comfy/utils.py b/comfy/utils.py index ab47b8f28..884404cce 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -249,11 +249,11 @@ def unet_to_diffusers(unet_config): return diffusers_unet_map -def repeat_to_batch_size(tensor, batch_size): - if tensor.shape[0] > batch_size: - return tensor[:batch_size] - elif tensor.shape[0] < batch_size: - return tensor.repeat([math.ceil(batch_size / tensor.shape[0])] + [1] * (len(tensor.shape) - 1))[:batch_size] +def repeat_to_batch_size(tensor, batch_size, dim=0): + if tensor.shape[dim] > batch_size: + return tensor.narrow(dim, 0, batch_size) + elif tensor.shape[dim] < batch_size: + return tensor.repeat(dim * [1] + [math.ceil(batch_size / tensor.shape[dim])] + [1] * (len(tensor.shape) - 1 - dim)).narrow(dim, 0, batch_size) return tensor def resize_to_batch_size(tensor, batch_size): diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 47f08bf60..45ef8cf40 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -380,6 +380,7 @@ class SamplerCustom: def sample(self, model, add_noise, noise_seed, cfg, positive, negative, sampler, sigmas, latent_image): latent = latent_image latent_image = latent["samples"] + latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image) if not add_noise: noise = Noise_EmptyNoise().generate_noise(latent) else: @@ -538,6 +539,7 @@ class SamplerCustomAdvanced: def sample(self, noise, guider, sampler, sigmas, latent_image): latent = latent_image latent_image = latent["samples"] + latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image) noise_mask = None if "noise_mask" in latent: diff --git a/nodes.py b/nodes.py index f454ff8cd..b744b53f0 100644 --- a/nodes.py +++ b/nodes.py @@ -1299,6 +1299,8 @@ class SetLatentNoiseMask: def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): latent_image = latent["samples"] + latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image) + if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") else: From 742d5720d1b128c78266bfd7156fb578d664a95a Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 9 Jun 2024 16:41:04 -0400 Subject: [PATCH 8/9] Support zeroing out text embeddings with the attention mask. --- comfy/sd1_clip.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index e7ebf046d..2729f14d8 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -68,7 +68,8 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): ] def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77, freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=comfy.clip_model.CLIPTextModel, - special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, return_projected_pooled=True): # clip-vit-base-patch32 + special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False, + return_projected_pooled=True): # clip-vit-base-patch32 super().__init__() assert layer in self.LAYERS @@ -90,6 +91,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) self.enable_attention_masks = enable_attention_masks + self.zero_out_masked = zero_out_masked self.layer_norm_hidden_state = layer_norm_hidden_state self.return_projected_pooled = return_projected_pooled @@ -179,9 +181,12 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): self.transformer.set_input_embeddings(backup_embeds) if self.layer == "last": - z = outputs[0] + z = outputs[0].float() else: - z = outputs[1] + z = outputs[1].float() + + if self.zero_out_masked and attention_mask is not None: + z *= attention_mask.unsqueeze(-1).float() pooled_output = None if len(outputs) >= 3: @@ -190,7 +195,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): elif outputs[2] is not None: pooled_output = outputs[2].float() - return z.float(), pooled_output + return z, pooled_output def encode(self, tokens): return self(tokens) From a5e6a632f9f16e5b3c72c428820bce67b05446bf Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 10 Jun 2024 01:05:53 -0400 Subject: [PATCH 9/9] Support sampling non 2D latents. --- comfy/samplers.py | 93 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index 29962a916..656e0a28f 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -8,7 +8,8 @@ import logging import comfy.sampler_helpers def get_area_and_mult(conds, x_in, timestep_in): - area = (x_in.shape[2], x_in.shape[3], 0, 0) + dims = tuple(x_in.shape[2:]) + area = None strength = 1.0 if 'timestep_start' in conds: @@ -20,11 +21,16 @@ def get_area_and_mult(conds, x_in, timestep_in): if timestep_in[0] < timestep_end: return None if 'area' in conds: - area = conds['area'] + area = list(conds['area']) if 'strength' in conds: strength = conds['strength'] - input_x = x_in[:,:,area[2]:area[0] + area[2],area[3]:area[1] + area[3]] + input_x = x_in + if area is not None: + for i in range(len(dims)): + area[i] = min(input_x.shape[i + 2] - area[len(dims) + i], area[i]) + input_x = input_x.narrow(i + 2, area[len(dims) + i], area[i]) + if 'mask' in conds: # Scale the mask to the size of the input # The mask should have been resized as we began the sampling process @@ -32,28 +38,30 @@ def get_area_and_mult(conds, x_in, timestep_in): if "mask_strength" in conds: mask_strength = conds["mask_strength"] mask = conds['mask'] - assert(mask.shape[1] == x_in.shape[2]) - assert(mask.shape[2] == x_in.shape[3]) - mask = mask[:input_x.shape[0],area[2]:area[0] + area[2],area[3]:area[1] + area[3]] * mask_strength + assert(mask.shape[1:] == x_in.shape[2:]) + + mask = mask[:input_x.shape[0]] + if area is not None: + for i in range(len(dims)): + mask = mask.narrow(i + 1, area[len(dims) + i], area[i]) + + mask = mask * mask_strength mask = mask.unsqueeze(1).repeat(input_x.shape[0] // mask.shape[0], input_x.shape[1], 1, 1) else: mask = torch.ones_like(input_x) mult = mask * strength - if 'mask' not in conds: + if 'mask' not in conds and area is not None: rr = 8 - if area[2] != 0: - for t in range(rr): - mult[:,:,t:1+t,:] *= ((1.0/rr) * (t + 1)) - if (area[0] + area[2]) < x_in.shape[2]: - for t in range(rr): - mult[:,:,area[0] - 1 - t:area[0] - t,:] *= ((1.0/rr) * (t + 1)) - if area[3] != 0: - for t in range(rr): - mult[:,:,:,t:1+t] *= ((1.0/rr) * (t + 1)) - if (area[1] + area[3]) < x_in.shape[3]: - for t in range(rr): - mult[:,:,:,area[1] - 1 - t:area[1] - t] *= ((1.0/rr) * (t + 1)) + for i in range(len(dims)): + if area[len(dims) + i] != 0: + for t in range(rr): + m = mult.narrow(i + 2, t, 1) + m *= ((1.0/rr) * (t + 1)) + if (area[i] + area[len(dims) + i]) < x_in.shape[i + 2]: + for t in range(rr): + m = mult.narrow(i + 2, area[i] - 1 - t, 1) + m *= ((1.0/rr) * (t + 1)) conditioning = {} model_conds = conds["model_conds"] @@ -219,8 +227,19 @@ def calc_cond_batch(model, conds, x_in, timestep, model_options): for o in range(batch_chunks): cond_index = cond_or_uncond[o] - out_conds[cond_index][:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += output[o] * mult[o] - out_counts[cond_index][:,:,area[o][2]:area[o][0] + area[o][2],area[o][3]:area[o][1] + area[o][3]] += mult[o] + a = area[o] + if a is None: + out_conds[cond_index] += output[o] * mult[o] + out_counts[cond_index] += mult[o] + else: + out_c = out_conds[cond_index] + out_cts = out_counts[cond_index] + dims = len(a) // 2 + for i in range(dims): + out_c = out_c.narrow(i + 2, a[i + dims], a[i]) + out_cts = out_cts.narrow(i + 2, a[i + dims], a[i]) + out_c += output[o] * mult[o] + out_cts += mult[o] for i in range(len(out_conds)): out_conds[i] /= out_counts[i] @@ -335,7 +354,7 @@ def get_mask_aabb(masks): return bounding_boxes, is_empty -def resolve_areas_and_cond_masks(conditions, h, w, device): +def resolve_areas_and_cond_masks_multidim(conditions, dims, device): # We need to decide on an area outside the sampling loop in order to properly generate opposite areas of equal sizes. # While we're doing this, we can also resolve the mask device and scaling for performance reasons for i in range(len(conditions)): @@ -344,7 +363,14 @@ def resolve_areas_and_cond_masks(conditions, h, w, device): area = c['area'] if area[0] == "percentage": modified = c.copy() - area = (max(1, round(area[1] * h)), max(1, round(area[2] * w)), round(area[3] * h), round(area[4] * w)) + a = area[1:] + a_len = len(a) // 2 + area = () + for d in range(len(dims)): + area += (max(1, round(a[d] * dims[d])),) + for d in range(len(dims)): + area += (round(a[d + a_len] * dims[d]),) + modified['area'] = area c = modified conditions[i] = c @@ -353,12 +379,12 @@ def resolve_areas_and_cond_masks(conditions, h, w, device): mask = c['mask'] mask = mask.to(device=device) modified = c.copy() - if len(mask.shape) == 2: + if len(mask.shape) == len(dims): mask = mask.unsqueeze(0) - if mask.shape[1] != h or mask.shape[2] != w: - mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=(h, w), mode='bilinear', align_corners=False).squeeze(1) + if mask.shape[1:] != dims: + mask = torch.nn.functional.interpolate(mask.unsqueeze(1), size=dims, mode='bilinear', align_corners=False).squeeze(1) - if modified.get("set_area_to_bounds", False): + if modified.get("set_area_to_bounds", False): #TODO: handle dim != 2 bounds = torch.max(torch.abs(mask),dim=0).values.unsqueeze(0) boxes, is_empty = get_mask_aabb(bounds) if is_empty[0]: @@ -375,7 +401,11 @@ def resolve_areas_and_cond_masks(conditions, h, w, device): modified['mask'] = mask conditions[i] = modified -def create_cond_with_same_area_if_none(conds, c): +def resolve_areas_and_cond_masks(conditions, h, w, device): + logging.warning("WARNING: The comfy.samplers.resolve_areas_and_cond_masks function is deprecated please use the resolve_areas_and_cond_masks_multidim one instead.") + return resolve_areas_and_cond_masks_multidim(conditions, [h, w], device) + +def create_cond_with_same_area_if_none(conds, c): #TODO: handle dim != 2 if 'area' not in c: return @@ -479,7 +509,10 @@ def encode_model_conds(model_function, conds, noise, device, prompt_type, **kwar params = x.copy() params["device"] = device params["noise"] = noise - params["width"] = params.get("width", noise.shape[3] * 8) + default_width = None + if len(noise.shape) >= 4: #TODO: 8 multiple should be set by the model + default_width = noise.shape[3] * 8 + params["width"] = params.get("width", default_width) params["height"] = params.get("height", noise.shape[2] * 8) params["prompt_type"] = params.get("prompt_type", prompt_type) for k in kwargs: @@ -567,7 +600,7 @@ def ksampler(sampler_name, extra_options={}, inpaint_options={}): def process_conds(model, noise, conds, device, latent_image=None, denoise_mask=None, seed=None): for k in conds: conds[k] = conds[k][:] - resolve_areas_and_cond_masks(conds[k], noise.shape[2], noise.shape[3], device) + resolve_areas_and_cond_masks_multidim(conds[k], noise.shape[2:], device) for k in conds: calculate_start_end_timesteps(model, conds[k])