diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 81bbc4796..ec7d34a55 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -38,6 +38,7 @@ parser.add_argument("--port", type=int, default=8188, help="Set the listen port. parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.") parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") +parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).") parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.") parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.") parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index dd234435f..beaa623f3 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -631,23 +631,78 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl elif solver_type == 'midpoint': x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised) - x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise + if eta: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise old_denoised = denoised h_last = h return x +@torch.no_grad() +def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + """DPM-Solver++(3M) SDE.""" + + seed = extra_args.get("seed", None) + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler + extra_args = {} if extra_args is None else extra_args + s_in = x.new_ones([x.shape[0]]) + + denoised_1, denoised_2 = None, None + h_1, h_2 = None, None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + if sigmas[i + 1] == 0: + # Denoising step + x = denoised + else: + t, s = -sigmas[i].log(), -sigmas[i + 1].log() + h = s - t + h_eta = h * (eta + 1) + + x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised + + if h_2 is not None: + r0 = h_1 / h + r1 = h_2 / h + d1_0 = (denoised - denoised_1) / r0 + d1_1 = (denoised_1 - denoised_2) / r1 + d1 = d1_0 + (d1_0 - d1_1) * r0 / (r0 + r1) + d2 = (d1_0 - d1_1) / (r0 + r1) + phi_2 = h_eta.neg().expm1() / h_eta + 1 + phi_3 = phi_2 / h_eta - 0.5 + x = x + phi_2 * d1 - phi_3 * d2 + elif h_1 is not None: + r = h_1 / h + d = (denoised - denoised_1) / r + phi_2 = h_eta.neg().expm1() / h_eta + 1 + x = x + phi_2 * d + + if eta: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise + + denoised_1, denoised_2 = denoised, denoised_1 + h_1, h_2 = h, h_1 + return x + +@torch.no_grad() +def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler + return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler) + @torch.no_grad() def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type) - @torch.no_grad() def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r) - diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 691d4c6c4..49ee9ea70 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -113,6 +113,7 @@ def model_config_from_unet_config(unet_config): if model_config.matches(unet_config): return model_config(unet_config) + print("no match", unet_config) return None def model_config_from_unet(state_dict, unet_key_prefix, use_fp16): diff --git a/comfy/samplers.py b/comfy/samplers.py index 044d518a5..28cd46667 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -189,12 +189,13 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con continue to_run += [(p, COND)] - for x in uncond: - p = get_area_and_mult(x, x_in, cond_concat_in, timestep) - if p is None: - continue + if uncond is not None: + for x in uncond: + p = get_area_and_mult(x, x_in, cond_concat_in, timestep) + if p is None: + continue - to_run += [(p, UNCOND)] + to_run += [(p, UNCOND)] while len(to_run) > 0: first = to_run[0] @@ -282,6 +283,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con max_total_area = model_management.maximum_batch_area() + if math.isclose(cond_scale, 1.0): + uncond = None + cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options) if "sampler_cfg_function" in model_options: args = {"cond": cond, "uncond": uncond, "cond_scale": cond_scale, "timestep": timestep} @@ -343,6 +347,17 @@ def ddim_scheduler(model, steps): sigs += [0.0] return torch.FloatTensor(sigs) +def sgm_scheduler(model, steps): + sigs = [] + timesteps = torch.linspace(model.inner_model.inner_model.num_timesteps - 1, 0, steps + 1)[:-1].type(torch.int) + for x in range(len(timesteps)): + ts = timesteps[x] + if ts > 999: + ts = 999 + sigs.append(model.t_to_sigma(torch.tensor(ts))) + sigs += [0.0] + return torch.FloatTensor(sigs) + def blank_inpaint_image_like(latent_image): blank_image = torch.ones_like(latent_image) # these are the values for "zero" in pixel space translated to latent space @@ -521,10 +536,10 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type): class KSampler: - SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"] + SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"] SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"] + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"] def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}): self.model = model @@ -566,6 +581,8 @@ class KSampler: sigmas = simple_scheduler(self.model_wrap, steps) elif self.scheduler == "ddim_uniform": sigmas = ddim_scheduler(self.model_wrap, steps) + elif self.scheduler == "sgm_uniform": + sigmas = sgm_scheduler(self.model_wrap, steps) else: print("error invalid scheduler", self.scheduler) diff --git a/comfy/sd.py b/comfy/sd.py index 2996a938b..bff9ee141 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -72,6 +72,7 @@ def load_lora(lora, to_load): regular_lora = "{}.lora_up.weight".format(x) diffusers_lora = "{}_lora.up.weight".format(x) + transformers_lora = "{}.lora_linear_layer.up.weight".format(x) A_name = None if regular_lora in lora.keys(): @@ -82,6 +83,10 @@ def load_lora(lora, to_load): A_name = diffusers_lora B_name = "{}_lora.down.weight".format(x) mid_name = None + elif transformers_lora in lora.keys(): + A_name = transformers_lora + B_name ="{}.lora_linear_layer.down.weight".format(x) + mid_name = None if A_name is not None: mid = None @@ -181,20 +186,29 @@ def model_lora_keys_clip(model, key_map={}): key_map[lora_key] = k lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) key_map[lora_key] = k + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) if k in sdk: lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base key_map[lora_key] = k clip_l_present = True + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c) if k in sdk: if clip_l_present: lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base + key_map[lora_key] = k + lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k else: lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner - key_map[lora_key] = k + key_map[lora_key] = k + lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora + key_map[lora_key] = k return key_map @@ -209,13 +223,16 @@ def model_lora_keys_unet(model, key_map={}): diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config) for k in diffusers_keys: if k.endswith(".weight"): + unet_key = "diffusion_model.{}".format(diffusers_keys[k]) key_lora = k[:-len(".weight")].replace(".", "_") - key_map["lora_unet_{}".format(key_lora)] = "diffusion_model.{}".format(diffusers_keys[k]) + key_map["lora_unet_{}".format(key_lora)] = unet_key - diffusers_lora_key = "unet.{}".format(k[:-len(".weight")].replace(".to_", ".processor.to_")) - if diffusers_lora_key.endswith(".to_out.0"): - diffusers_lora_key = diffusers_lora_key[:-2] - key_map[diffusers_lora_key] = "diffusion_model.{}".format(diffusers_keys[k]) + diffusers_lora_prefix = ["", "unet."] + for p in diffusers_lora_prefix: + diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_")) + if diffusers_lora_key.endswith(".to_out.0"): + diffusers_lora_key = diffusers_lora_key[:-2] + key_map[diffusers_lora_key] = unet_key return key_map def set_attr(obj, attr, value): diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index 15377af14..b80c8b9a2 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -2,6 +2,35 @@ import torch from nodes import MAX_RESOLUTION +def composite(destination, source, x, y, mask = None, multiplier = 8): + x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier)) + y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier)) + + left, top = (x // multiplier, y // multiplier) + right, bottom = (left + source.shape[3], top + source.shape[2],) + + + if mask is None: + mask = torch.ones_like(source) + else: + mask = mask.clone() + mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear") + mask = mask.repeat((source.shape[0], source.shape[1], 1, 1)) + + # calculate the bounds of the source that will be overlapping the destination + # this prevents the source trying to overwrite latent pixels that are out of bounds + # of the destination + visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),) + + mask = mask[:, :, :visible_height, :visible_width] + inverse_mask = torch.ones_like(mask) - mask + + source_portion = mask * source[:, :, :visible_height, :visible_width] + destination_portion = inverse_mask * destination[:, :, top:bottom, left:right] + + destination[:, :, top:bottom, left:right] = source_portion + destination_portion + return destination + class LatentCompositeMasked: @classmethod def INPUT_TYPES(s): @@ -25,36 +54,31 @@ class LatentCompositeMasked: output = destination.copy() destination = destination["samples"].clone() source = source["samples"] + output["samples"] = composite(destination, source, x, y, mask, 8) + return (output,) - x = max(-source.shape[3] * 8, min(x, destination.shape[3] * 8)) - y = max(-source.shape[2] * 8, min(y, destination.shape[2] * 8)) +class ImageCompositeMasked: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "destination": ("IMAGE",), + "source": ("IMAGE",), + "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}), + }, + "optional": { + "mask": ("MASK",), + } + } + RETURN_TYPES = ("IMAGE",) + FUNCTION = "composite" - left, top = (x // 8, y // 8) - right, bottom = (left + source.shape[3], top + source.shape[2],) - - - if mask is None: - mask = torch.ones_like(source) - else: - mask = mask.clone() - mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear") - mask = mask.repeat((source.shape[0], source.shape[1], 1, 1)) - - # calculate the bounds of the source that will be overlapping the destination - # this prevents the source trying to overwrite latent pixels that are out of bounds - # of the destination - visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),) - - mask = mask[:, :, :visible_height, :visible_width] - inverse_mask = torch.ones_like(mask) - mask - - source_portion = mask * source[:, :, :visible_height, :visible_width] - destination_portion = inverse_mask * destination[:, :, top:bottom, left:right] - - destination[:, :, top:bottom, left:right] = source_portion + destination_portion - - output["samples"] = destination + CATEGORY = "image" + def composite(self, destination, source, x, y, mask = None): + destination = destination.clone().movedim(-1, 1) + output = composite(destination, source.movedim(-1, 1), x, y, mask, 1).movedim(1, -1) return (output,) class MaskToImage: @@ -253,6 +277,7 @@ class FeatherMask: NODE_CLASS_MAPPINGS = { "LatentCompositeMasked": LatentCompositeMasked, + "ImageCompositeMasked": ImageCompositeMasked, "MaskToImage": MaskToImage, "ImageToMask": ImageToMask, "SolidMask": SolidMask, diff --git a/cuda_malloc.py b/cuda_malloc.py index d033529cc..144cdacd3 100644 --- a/cuda_malloc.py +++ b/cuda_malloc.py @@ -36,13 +36,15 @@ def get_gpu_names(): else: return set() -def cuda_malloc_supported(): - blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", - "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", - "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", - "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", - "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"} +blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M", + "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620", + "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000", + "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000", + "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M", + "GeForce GTX 1650", "GeForce GTX 1630" + } +def cuda_malloc_supported(): try: names = get_gpu_names() except: diff --git a/folder_paths.py b/folder_paths.py index eb7d39b88..e321690dd 100644 --- a/folder_paths.py +++ b/folder_paths.py @@ -43,6 +43,10 @@ def set_output_directory(output_dir): global output_directory output_directory = output_dir +def set_temp_directory(temp_dir): + global temp_directory + temp_directory = temp_dir + def get_output_directory(): global output_directory return output_directory @@ -111,6 +115,8 @@ def add_model_folder_path(folder_name, full_folder_path): global folder_names_and_paths if folder_name in folder_names_and_paths: folder_names_and_paths[folder_name][0].append(full_folder_path) + else: + folder_names_and_paths[folder_name] = ([full_folder_path], set()) def get_folder_paths(folder_name): return folder_names_and_paths[folder_name][0][:] diff --git a/main.py b/main.py index 07ebbd701..a4038db4b 100644 --- a/main.py +++ b/main.py @@ -72,6 +72,17 @@ from server import BinaryEventTypes from nodes import init_custom_nodes import comfy.model_management +def cuda_malloc_warning(): + device = comfy.model_management.get_torch_device() + device_name = comfy.model_management.get_torch_device_name(device) + cuda_malloc_warning = False + if "cudaMallocAsync" in device_name: + for b in cuda_malloc.blacklist: + if b in device_name: + cuda_malloc_warning = True + if cuda_malloc_warning: + print("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n") + def prompt_worker(q, server): e = execution.PromptExecutor(server) while True: @@ -100,7 +111,7 @@ def hijack_progress(server): def cleanup_temp(): - temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp") + temp_dir = folder_paths.get_temp_directory() if os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) @@ -127,6 +138,10 @@ def load_extra_path_config(yaml_path): if __name__ == "__main__": + if args.temp_directory: + temp_dir = os.path.join(os.path.abspath(args.temp_directory), "temp") + print(f"Setting temp directory to: {temp_dir}") + folder_paths.set_temp_directory(temp_dir) cleanup_temp() loop = asyncio.new_event_loop() @@ -143,6 +158,9 @@ if __name__ == "__main__": load_extra_path_config(config_path) init_custom_nodes() + + cuda_malloc_warning() + server.add_routes() hijack_progress(server) diff --git a/web/lib/litegraph.core.js b/web/lib/litegraph.core.js index 2682ff309..356c71ac2 100644 --- a/web/lib/litegraph.core.js +++ b/web/lib/litegraph.core.js @@ -9766,6 +9766,7 @@ LGraphNode.prototype.executeAction = function(action) switch (w.type) { case "button": + ctx.fillStyle = background_color; if (w.clicked) { ctx.fillStyle = "#AAA"; w.clicked = false;