Merge branch 'master' into rework-dynamic-prompts

2026-06-22 07:49:33 +08:00 · 2023-08-15 22:10:22 +10:00 · 2023-08-15 22:10:22 +10:00 · 7cdeffe955
commit 7cdeffe955
parent 1504f55cdf a2ce9655ca
20 changed files with 269 additions and 89 deletions
--- a/.github/workflows/windows_release_cu118_dependencies_2.yml
+++ b/.github/workflows/windows_release_cu118_dependencies_2.yml
@ -2,6 +2,13 @@ name: "Windows Release cu118 dependencies 2"
 on:
  workflow_dispatch:
    inputs:
      xformers:
        description: 'xformers version'
        required: true
        type: string
        default: "xformers"
 #  push:
 #    branches:
 #      - master
@ -17,7 +24,7 @@ jobs:
        - shell: bash
          run: |
-            python -m pip wheel --no-cache-dir torch torchvision torchaudio xformers --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
+            python -m pip wheel --no-cache-dir torch torchvision torchaudio ${{ inputs.xformers }} --extra-index-url https://download.pytorch.org/whl/cu118 -r requirements.txt pygit2 -w ./temp_wheel_dir
            python -m pip install --no-cache-dir ./temp_wheel_dir/*
            echo installed basic
            ls -lah temp_wheel_dir
--- a/1
+++ b/1
@ -0,0 +1 @@
 *       @comfyanonymous
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -38,7 +38,9 @@ parser.add_argument("--port", type=int, default=8188, help="Set the listen port.
 parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
 parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
 parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.")
 parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).")
 parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.")
 parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
 parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
 cm_group = parser.add_mutually_exclusive_group()
 cm_group.add_argument("--cuda-malloc", action="store_true", help="Enable cudaMallocAsync (enabled by default for torch 2.0 and up).")
@ -90,3 +92,6 @@ args = parser.parse_args()
 if args.windows_standalone_build:
    args.auto_launch = True
 if args.disable_auto_launch:
    args.auto_launch = False
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@ -24,8 +24,8 @@ class ClipVisionModel():
        return self.model.load_state_dict(sd, strict=False)
    def encode_image(self, image):
-        img = torch.clip((255. * image[0]), 0, 255).round().int()
+        img = torch.clip((255. * image), 0, 255).round().int()
-        inputs = self.processor(images=[img], return_tensors="pt")
+        inputs = self.processor(images=img, return_tensors="pt")
        outputs = self.model(**inputs)
        return outputs
--- a/comfy/k_diffusion/sampling.py
+++ b/comfy/k_diffusion/sampling.py
@ -631,23 +631,78 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl
                elif solver_type == 'midpoint':
                    x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
            if eta:
                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
        old_denoised = denoised
        h_last = h
    return x
@torch.no_grad()
 def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    """DPM-Solver++(3M) SDE."""
    seed = extra_args.get("seed", None)
    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler
    extra_args = {} if extra_args is None else extra_args
    s_in = x.new_ones([x.shape[0]])
    denoised_1, denoised_2 = None, None
    h_1, h_2 = None, None
    for i in trange(len(sigmas) - 1, disable=disable):
        denoised = model(x, sigmas[i] * s_in, **extra_args)
        if callback is not None:
            callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
        if sigmas[i + 1] == 0:
            # Denoising step
            x = denoised
        else:
            t, s = -sigmas[i].log(), -sigmas[i + 1].log()
            h = s - t
            h_eta = h * (eta + 1)
            x = torch.exp(-h_eta) * x + (-h_eta).expm1().neg() * denoised
            if h_2 is not None:
                r0 = h_1 / h
                r1 = h_2 / h
                d1_0 = (denoised - denoised_1) / r0
                d1_1 = (denoised_1 - denoised_2) / r1
                d1 = d1_0 + (d1_0 - d1_1) * r0 / (r0 + r1)
                d2 = (d1_0 - d1_1) / (r0 + r1)
                phi_2 = h_eta.neg().expm1() / h_eta + 1
                phi_3 = phi_2 / h_eta - 0.5
                x = x + phi_2 * d1 - phi_3 * d2
            elif h_1 is not None:
                r = h_1 / h
                d = (denoised - denoised_1) / r
                phi_2 = h_eta.neg().expm1() / h_eta + 1
                x = x + phi_2 * d
            if eta:
                x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * h * eta).expm1().neg().sqrt() * s_noise
        denoised_1, denoised_2 = denoised, denoised_1
        h_1, h_2 = h, h_1
    return x
@torch.no_grad()
 def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler)
@torch.no_grad()
 def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type)
@torch.no_grad()
 def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
    sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
    noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler
    return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r)
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@ -105,6 +105,29 @@ class BaseModel(torch.nn.Module):
        return {**unet_state_dict, **vae_state_dict, **clip_state_dict}
 def unclip_adm(unclip_conditioning, device, noise_augmentor, noise_augment_merge=0.0):
    adm_inputs = []
    weights = []
    noise_aug = []
    for unclip_cond in unclip_conditioning:
        for adm_cond in unclip_cond["clip_vision_output"].image_embeds:
            weight = unclip_cond["strength"]
            noise_augment = unclip_cond["noise_augmentation"]
            noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
            c_adm, noise_level_emb = noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
            adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
            weights.append(weight)
            noise_aug.append(noise_augment)
            adm_inputs.append(adm_out)
    if len(noise_aug) > 1:
        adm_out = torch.stack(adm_inputs).sum(0)
        noise_augment = noise_augment_merge
        noise_level = round((noise_augmentor.max_noise_level - 1) * noise_augment)
        c_adm, noise_level_emb = noise_augmentor(adm_out[:, :noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
        adm_out = torch.cat((c_adm, noise_level_emb), 1)
    return adm_out
 class SD21UNCLIP(BaseModel):
    def __init__(self, model_config, noise_aug_config, model_type=ModelType.V_PREDICTION, device=None):
@ -114,33 +137,11 @@ class SD21UNCLIP(BaseModel):
    def encode_adm(self, **kwargs):
        unclip_conditioning = kwargs.get("unclip_conditioning", None)
        device = kwargs["device"]
-
+        if unclip_conditioning is None:
-        if unclip_conditioning is not None:
+            return torch.zeros((1, self.adm_channels))
            adm_inputs = []
            weights = []
            noise_aug = []
            for unclip_cond in unclip_conditioning:
                adm_cond = unclip_cond["clip_vision_output"].image_embeds
                weight = unclip_cond["strength"]
                noise_augment = unclip_cond["noise_augmentation"]
                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = self.noise_augmentor(adm_cond.to(device), noise_level=torch.tensor([noise_level], device=device))
                adm_out = torch.cat((c_adm, noise_level_emb), 1) * weight
                weights.append(weight)
                noise_aug.append(noise_augment)
                adm_inputs.append(adm_out)
            if len(noise_aug) > 1:
                adm_out = torch.stack(adm_inputs).sum(0)
                #TODO: add a way to control this
                noise_augment = 0.05
                noise_level = round((self.noise_augmentor.max_noise_level - 1) * noise_augment)
                c_adm, noise_level_emb = self.noise_augmentor(adm_out[:, :self.noise_augmentor.time_embed.dim], noise_level=torch.tensor([noise_level], device=device))
                adm_out = torch.cat((c_adm, noise_level_emb), 1)
        else:
-            adm_out = torch.zeros((1, self.adm_channels))
+            return unclip_adm(unclip_conditioning, device, self.noise_augmentor, kwargs.get("unclip_noise_augment_merge", 0.05))
        return adm_out
 class SDInpaint(BaseModel):
    def __init__(self, model_config, model_type=ModelType.EPS, device=None):
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -113,6 +113,7 @@ def model_config_from_unet_config(unet_config):
        if model_config.matches(unet_config):
            return model_config(unet_config)
    print("no match", unet_config)
    return None
 def model_config_from_unet(state_dict, unet_key_prefix, use_fp16):
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -189,6 +189,7 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
                    continue
                to_run += [(p, COND)]
            if uncond is not None:
                for x in uncond:
                    p = get_area_and_mult(x, x_in, cond_concat_in, timestep)
                    if p is None:
@ -282,6 +283,9 @@ def sampling_function(model_function, x, timestep, uncond, cond, cond_scale, con
        max_total_area = model_management.maximum_batch_area()
        if math.isclose(cond_scale, 1.0):
            uncond = None
        cond, uncond = calc_cond_uncond_batch(model_function, cond, uncond, x, timestep, max_total_area, cond_concat, model_options)
        if "sampler_cfg_function" in model_options:
            args = {"cond": cond, "uncond": uncond, "cond_scale": cond_scale, "timestep": timestep}
@ -343,6 +347,17 @@ def ddim_scheduler(model, steps):
    sigs += [0.0]
    return torch.FloatTensor(sigs)
 def sgm_scheduler(model, steps):
    sigs = []
    timesteps = torch.linspace(model.inner_model.inner_model.num_timesteps - 1, 0, steps + 1)[:-1].type(torch.int)
    for x in range(len(timesteps)):
        ts = timesteps[x]
        if ts > 999:
            ts = 999
        sigs.append(model.t_to_sigma(torch.tensor(ts)))
    sigs += [0.0]
    return torch.FloatTensor(sigs)
 def blank_inpaint_image_like(latent_image):
    blank_image = torch.ones_like(latent_image)
    # these are the values for "zero" in pixel space translated to latent space
@ -521,10 +536,10 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type):
 class KSampler:
-    SCHEDULERS = ["normal", "karras", "exponential", "simple", "ddim_uniform"]
+    SCHEDULERS = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform"]
    SAMPLERS = ["euler", "euler_ancestral", "heun", "dpm_2", "dpm_2_ancestral",
                "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu",
-                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
+                "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddim", "uni_pc", "uni_pc_bh2"]
    def __init__(self, model, steps, device, sampler=None, scheduler=None, denoise=None, model_options={}):
        self.model = model
@ -566,6 +581,8 @@ class KSampler:
            sigmas = simple_scheduler(self.model_wrap, steps)
        elif self.scheduler == "ddim_uniform":
            sigmas = ddim_scheduler(self.model_wrap, steps)
        elif self.scheduler == "sgm_uniform":
            sigmas = sgm_scheduler(self.model_wrap, steps)
        else:
            print("error invalid scheduler", self.scheduler)
--- a/comfy/sd.py
+++ b/comfy/sd.py
@ -72,6 +72,7 @@ def load_lora(lora, to_load):
        regular_lora = "{}.lora_up.weight".format(x)
        diffusers_lora = "{}_lora.up.weight".format(x)
        transformers_lora = "{}.lora_linear_layer.up.weight".format(x)
        A_name = None
        if regular_lora in lora.keys():
@ -82,6 +83,10 @@ def load_lora(lora, to_load):
            A_name = diffusers_lora
            B_name = "{}_lora.down.weight".format(x)
            mid_name = None
        elif transformers_lora in lora.keys():
            A_name = transformers_lora
            B_name ="{}.lora_linear_layer.down.weight".format(x)
            mid_name = None
        if A_name is not None:
            mid = None
@ -181,20 +186,29 @@ def model_lora_keys_clip(model, key_map={}):
                key_map[lora_key] = k
                lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c])
                key_map[lora_key] = k
                lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
                key_map[lora_key] = k
            k = "clip_l.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
            if k in sdk:
                lora_key = "lora_te1_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
                key_map[lora_key] = k
                clip_l_present = True
                lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
                key_map[lora_key] = k
            k = "clip_g.transformer.text_model.encoder.layers.{}.{}.weight".format(b, c)
            if k in sdk:
                if clip_l_present:
                    lora_key = "lora_te2_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #SDXL base
                    key_map[lora_key] = k
                    lora_key = "text_encoder_2.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
                    key_map[lora_key] = k
                else:
                    lora_key = "lora_te_text_model_encoder_layers_{}_{}".format(b, LORA_CLIP_MAP[c]) #TODO: test if this is correct for SDXL-Refiner
                    key_map[lora_key] = k
                    lora_key = "text_encoder.text_model.encoder.layers.{}.{}".format(b, c) #diffusers lora
                    key_map[lora_key] = k
    return key_map
@ -209,13 +223,16 @@ def model_lora_keys_unet(model, key_map={}):
    diffusers_keys = utils.unet_to_diffusers(model.model_config.unet_config)
    for k in diffusers_keys:
        if k.endswith(".weight"):
            unet_key = "diffusion_model.{}".format(diffusers_keys[k])
            key_lora = k[:-len(".weight")].replace(".", "_")
-            key_map["lora_unet_{}".format(key_lora)] = "diffusion_model.{}".format(diffusers_keys[k])
+            key_map["lora_unet_{}".format(key_lora)] = unet_key
-            diffusers_lora_key = "unet.{}".format(k[:-len(".weight")].replace(".to_", ".processor.to_"))
+            diffusers_lora_prefix = ["", "unet."]
            for p in diffusers_lora_prefix:
                diffusers_lora_key = "{}{}".format(p, k[:-len(".weight")].replace(".to_", ".processor.to_"))
                if diffusers_lora_key.endswith(".to_out.0"):
                    diffusers_lora_key = diffusers_lora_key[:-2]
-            key_map[diffusers_lora_key] = "diffusion_model.{}".format(diffusers_keys[k])
+                key_map[diffusers_lora_key] = unet_key
    return key_map
 def set_attr(obj, attr, value):
@ -878,7 +895,7 @@ def load_controlnet(ckpt_path, model=None):
        use_fp16 = model_management.should_use_fp16()
        controlnet_config = model_detection.model_config_from_unet(controlnet_data, prefix, use_fp16).unet_config
    controlnet_config.pop("out_channels")
-    controlnet_config["hint_channels"] = 3
+    controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1]
    control_model = cldm.ControlNet(**controlnet_config)
    if pth:
--- a/comfy_extras/nodes_mask.py
+++ b/comfy_extras/nodes_mask.py
@ -2,6 +2,35 @@ import torch
 from nodes import MAX_RESOLUTION
 def composite(destination, source, x, y, mask = None, multiplier = 8):
    x = max(-source.shape[3] * multiplier, min(x, destination.shape[3] * multiplier))
    y = max(-source.shape[2] * multiplier, min(y, destination.shape[2] * multiplier))
    left, top = (x // multiplier, y // multiplier)
    right, bottom = (left + source.shape[3], top + source.shape[2],)
    if mask is None:
        mask = torch.ones_like(source)
    else:
        mask = mask.clone()
        mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
        mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
    # calculate the bounds of the source that will be overlapping the destination
    # this prevents the source trying to overwrite latent pixels that are out of bounds
    # of the destination
    visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
    mask = mask[:, :, :visible_height, :visible_width]
    inverse_mask = torch.ones_like(mask) - mask
    source_portion = mask * source[:, :, :visible_height, :visible_width]
    destination_portion = inverse_mask  * destination[:, :, top:bottom, left:right]
    destination[:, :, top:bottom, left:right] = source_portion + destination_portion
    return destination
 class LatentCompositeMasked:
    @classmethod
    def INPUT_TYPES(s):
@ -25,36 +54,31 @@ class LatentCompositeMasked:
        output = destination.copy()
        destination = destination["samples"].clone()
        source = source["samples"]
        output["samples"] = composite(destination, source, x, y, mask, 8)
        return (output,)
-        x = max(-source.shape[3] * 8, min(x, destination.shape[3] * 8))
+class ImageCompositeMasked:
-        y = max(-source.shape[2] * 8, min(y, destination.shape[2] * 8))
+    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "destination": ("IMAGE",),
                "source": ("IMAGE",),
                "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
                "y": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 1}),
            },
            "optional": {
                "mask": ("MASK",),
            }
        }
    RETURN_TYPES = ("IMAGE",)
    FUNCTION = "composite"
-        left, top = (x // 8, y // 8)
+    CATEGORY = "image"
        right, bottom = (left + source.shape[3], top + source.shape[2],)
        if mask is None:
            mask = torch.ones_like(source)
        else:
            mask = mask.clone()
            mask = torch.nn.functional.interpolate(mask[None, None], size=(source.shape[2], source.shape[3]), mode="bilinear")
            mask = mask.repeat((source.shape[0], source.shape[1], 1, 1))
        # calculate the bounds of the source that will be overlapping the destination
        # this prevents the source trying to overwrite latent pixels that are out of bounds
        # of the destination
        visible_width, visible_height = (destination.shape[3] - left + min(0, x), destination.shape[2] - top + min(0, y),)
        mask = mask[:, :, :visible_height, :visible_width]
        inverse_mask = torch.ones_like(mask) - mask
        source_portion = mask * source[:, :, :visible_height, :visible_width]
        destination_portion = inverse_mask  * destination[:, :, top:bottom, left:right]
        destination[:, :, top:bottom, left:right] = source_portion + destination_portion
        output["samples"] = destination
    def composite(self, destination, source, x, y, mask = None):
        destination = destination.clone().movedim(-1, 1)
        output = composite(destination, source.movedim(-1, 1), x, y, mask, 1).movedim(1, -1)
        return (output,)
 class MaskToImage:
@ -253,6 +277,7 @@ class FeatherMask:
 NODE_CLASS_MAPPINGS = {
    "LatentCompositeMasked": LatentCompositeMasked,
    "ImageCompositeMasked": ImageCompositeMasked,
    "MaskToImage": MaskToImage,
    "ImageToMask": ImageToMask,
    "SolidMask": SolidMask,
--- a/comfy_extras/nodes_post_processing.py
+++ b/comfy_extras/nodes_post_processing.py
@ -59,8 +59,8 @@ class Blend:
    def g(self, x):
        return torch.where(x <= 0.25, ((16 * x - 12) * x + 4) * x, torch.sqrt(x))
-def gaussian_kernel(kernel_size: int, sigma: float):
+def gaussian_kernel(kernel_size: int, sigma: float, device=None):
-    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size), torch.linspace(-1, 1, kernel_size), indexing="ij")
+    x, y = torch.meshgrid(torch.linspace(-1, 1, kernel_size, device=device), torch.linspace(-1, 1, kernel_size, device=device), indexing="ij")
    d = torch.sqrt(x * x + y * y)
    g = torch.exp(-(d * d) / (2.0 * sigma * sigma))
    return g / g.sum()
@ -101,7 +101,7 @@ class Blur:
        batch_size, height, width, channels = image.shape
        kernel_size = blur_radius * 2 + 1
-        kernel = gaussian_kernel(kernel_size, sigma).repeat(channels, 1, 1).unsqueeze(1)
+        kernel = gaussian_kernel(kernel_size, sigma, device=image.device).repeat(channels, 1, 1).unsqueeze(1)
        image = image.permute(0, 3, 1, 2) # Torch wants (B, C, H, W) we use (B, H, W, C)
        padded_image = F.pad(image, (blur_radius,blur_radius,blur_radius,blur_radius), 'reflect')
--- a/cuda_malloc.py
+++ b/cuda_malloc.py
@ -36,13 +36,15 @@ def get_gpu_names():
    else:
        return set()
 def cuda_malloc_supported():
 blacklist = {"GeForce GTX TITAN X", "GeForce GTX 980", "GeForce GTX 970", "GeForce GTX 960", "GeForce GTX 950", "GeForce 945M",
                "GeForce 940M", "GeForce 930M", "GeForce 920M", "GeForce 910M", "GeForce GTX 750", "GeForce GTX 745", "Quadro K620",
                "Quadro K1200", "Quadro K2200", "Quadro M500", "Quadro M520", "Quadro M600", "Quadro M620", "Quadro M1000",
                "Quadro M1200", "Quadro M2000", "Quadro M2200", "Quadro M3000", "Quadro M4000", "Quadro M5000", "Quadro M5500", "Quadro M6000",
-                 "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M"}
+                "GeForce MX110", "GeForce MX130", "GeForce 830M", "GeForce 840M", "GeForce GTX 850M", "GeForce GTX 860M",
                "GeForce GTX 1650", "GeForce GTX 1630"
                }
 def cuda_malloc_supported():
    try:
        names = get_gpu_names()
    except:
--- a/custom_nodes/example_node.py.example
+++ b/custom_nodes/example_node.py.example
@ -51,9 +51,10 @@ class Example:
                    "default": 0, 
                    "min": 0, #Minimum value
                    "max": 4096, #Maximum value
-                    "step": 64 #Slider's step
+                    "step": 64, #Slider's step
                    "display": "number" # Cosmetic only: display as "number" or "slider"
                }),
-                "float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
+                "float_field": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01, "display": "number"}),
                "print_to_screen": (["enable", "disable"],),
                "string_field": ("STRING", {
                    "multiline": False, #True if you want the field to look like the one on the ClipTextEncode node
--- a/folder_paths.py
+++ b/folder_paths.py
@ -43,6 +43,10 @@ def set_output_directory(output_dir):
    global output_directory
    output_directory = output_dir
 def set_temp_directory(temp_dir):
    global temp_directory
    temp_directory = temp_dir
 def get_output_directory():
    global output_directory
    return output_directory
@ -111,6 +115,8 @@ def add_model_folder_path(folder_name, full_folder_path):
    global folder_names_and_paths
    if folder_name in folder_names_and_paths:
        folder_names_and_paths[folder_name][0].append(full_folder_path)
    else:
        folder_names_and_paths[folder_name] = ([full_folder_path], set())
 def get_folder_paths(folder_name):
    return folder_names_and_paths[folder_name][0][:]
--- a/main.py
+++ b/main.py
@ -72,6 +72,17 @@ from server import BinaryEventTypes
 from nodes import init_custom_nodes
 import comfy.model_management
 def cuda_malloc_warning():
    device = comfy.model_management.get_torch_device()
    device_name = comfy.model_management.get_torch_device_name(device)
    cuda_malloc_warning = False
    if "cudaMallocAsync" in device_name:
        for b in cuda_malloc.blacklist:
            if b in device_name:
                cuda_malloc_warning = True
        if cuda_malloc_warning:
            print("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n")
 def prompt_worker(q, server):
    e = execution.PromptExecutor(server)
    while True:
@ -100,7 +111,7 @@ def hijack_progress(server):
 def cleanup_temp():
-    temp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "temp")
+    temp_dir = folder_paths.get_temp_directory()
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir, ignore_errors=True)
@ -127,6 +138,10 @@ def load_extra_path_config(yaml_path):
 if __name__ == "__main__":
    if args.temp_directory:
        temp_dir = os.path.join(os.path.abspath(args.temp_directory), "temp")
        print(f"Setting temp directory to: {temp_dir}")
        folder_paths.set_temp_directory(temp_dir)
    cleanup_temp()
    loop = asyncio.new_event_loop()
@ -143,6 +158,9 @@ if __name__ == "__main__":
            load_extra_path_config(config_path)
    init_custom_nodes()
    cuda_malloc_warning()
    server.add_routes()
    hijack_progress(server)
--- a/nodes.py
+++ b/nodes.py
@ -773,7 +773,7 @@ class StyleModelApply:
    CATEGORY = "conditioning/style_model"
    def apply_stylemodel(self, clip_vision_output, style_model, conditioning):
-        cond = style_model.get_cond(clip_vision_output)
+        cond = style_model.get_cond(clip_vision_output).flatten(start_dim=0, end_dim=1).unsqueeze(dim=0)
        c = []
        for t in conditioning:
            n = [torch.cat((t[0], cond), dim=1), t[1].copy()]
@ -1450,6 +1450,22 @@ class ImageInvert:
        s = 1.0 - image
        return (s,)
 class ImageBatch:
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "image1": ("IMAGE",), "image2": ("IMAGE",)}}
    RETURN_TYPES = ("IMAGE",)
    FUNCTION = "batch"
    CATEGORY = "image"
    def batch(self, image1, image2):
        if image1.shape[1:] != image2.shape[1:]:
            image2 = comfy.utils.common_upscale(image2.movedim(-1,1), image1.shape[2], image1.shape[1], "bilinear", "center").movedim(1,-1)
        s = torch.cat((image1, image2), dim=0)
        return (s,)
 class ImagePadForOutpaint:
@ -1555,6 +1571,7 @@ NODE_CLASS_MAPPINGS = {
    "ImageScale": ImageScale,
    "ImageScaleBy": ImageScaleBy,
    "ImageInvert": ImageInvert,
    "ImageBatch": ImageBatch,
    "ImagePadForOutpaint": ImagePadForOutpaint,
    "ConditioningAverage ": ConditioningAverage ,
    "ConditioningCombine": ConditioningCombine,
@ -1651,6 +1668,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "ImageUpscaleWithModel": "Upscale Image (using Model)",
    "ImageInvert": "Invert Image",
    "ImagePadForOutpaint": "Pad Image for Outpainting",
    "ImageBatch": "Batch Images",
    # Prompts
    "DynamicPrompt": "Dynamic Prompt",
    # _for_testing
--- a/requirements.txt
+++ b/requirements.txt
@ -9,3 +9,4 @@ pyyaml
 Pillow
 scipy
 tqdm
 psutil
--- a/web/extensions/core/linkRenderMode.js
+++ b/web/extensions/core/linkRenderMode.js
@ -1,4 +1,4 @@
-import { app } from "/scripts/app.js";
+import { app } from "../../scripts/app.js";
 const id = "Comfy.LinkRenderMode";
 const ext = {
--- a/web/lib/litegraph.core.js
+++ b/web/lib/litegraph.core.js
@ -9766,6 +9766,7 @@ LGraphNode.prototype.executeAction = function(action)
            switch (w.type) {
                case "button":
                    ctx.fillStyle = background_color;
                    if (w.clicked) {
                        ctx.fillStyle = "#AAA";
                        w.clicked = false;
@ -9835,7 +9836,11 @@ LGraphNode.prototype.executeAction = function(action)
                        ctx.textAlign = "center";
                        ctx.fillStyle = text_color;
                        ctx.fillText(
-                            w.label || w.name + "  " + Number(w.value).toFixed(3),
+                            w.label || w.name + "  " + Number(w.value).toFixed(
                                                            w.options.precision != null
                                                                ? w.options.precision
                                                                : 3
                                                        ),
                            widget_width * 0.5,
                            y + H * 0.7
                        );
@ -13835,7 +13840,7 @@ LGraphNode.prototype.executeAction = function(action)
        if (!disabled) {
            element.addEventListener("click", inner_onclick);
        }
-        if (options.autoopen) {
+        if (!disabled && options.autoopen) {
 			LiteGraph.pointerListenerAdd(element,"enter",inner_over);
        }
--- a/web/scripts/widgets.js
+++ b/web/scripts/widgets.js
@ -433,7 +433,7 @@ export const ComfyWidgets = {
 		// Add handler to check if an image is being dragged over our node
 		node.onDragOver = function (e) {
 			if (e.dataTransfer && e.dataTransfer.items) {
-				const image = [...e.dataTransfer.items].find((f) => f.kind === "file" && f.type.startsWith("image/"));
+				const image = [...e.dataTransfer.items].find((f) => f.kind === "file");
 				return !!image;
 			}