Merge 2d3a8d523d into e89b22993a

2026-02-06 19:42:34 +08:00 · 2026-01-23 21:44:00 +01:00
7 changed files with 22 additions and 43 deletions
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -8,7 +8,6 @@ class LatentFormat:
    latent_rgb_factors_bias = None
    latent_rgb_factors_reshape = None
    taesd_decoder_name = None
-    spacial_downscale_ratio = 8

    def process_in(self, latent):
        return latent * self.scale_factor
@ -182,7 +181,6 @@ class Flux(SD3):

 class Flux2(LatentFormat):
    latent_channels = 128
-    spacial_downscale_ratio = 16

    def __init__(self):
        self.latent_rgb_factors =[
@ -751,7 +749,6 @@ class ACEAudio(LatentFormat):

 class ChromaRadiance(LatentFormat):
    latent_channels = 3
-    spacial_downscale_ratio = 1

    def __init__(self):
        self.latent_rgb_factors = [
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@ -5,7 +5,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from einops import rearrange
-from comfy.ldm.modules.diffusionmodules.model import vae_attention, torch_cat_if_needed
+from comfy.ldm.modules.diffusionmodules.model import vae_attention

 import comfy.ops
 ops = comfy.ops.disable_weight_init
@ -20,29 +20,22 @@ class CausalConv3d(ops.Conv3d):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-        self._padding = 2 * self.padding[0]
-        self.padding = (0, self.padding[1], self.padding[2])
+        self._padding = (self.padding[2], self.padding[2], self.padding[1],
+                         self.padding[1], 2 * self.padding[0], 0)
+        self.padding = (0, 0, 0)

    def forward(self, x, cache_x=None, cache_list=None, cache_idx=None):
        if cache_list is not None:
            cache_x = cache_list[cache_idx]
            cache_list[cache_idx] = None

-        if cache_x is None and x.shape[2] == 1:
-            #Fast path - the op will pad for use by truncating the weight
-            #and save math on a pile of zeros.
-            return super().forward(x, autopad="causal_zero")
-
-        if self._padding > 0:
-            padding_needed = self._padding
-            if cache_x is not None:
-                cache_x = cache_x.to(x.device)
-                padding_needed = max(0, padding_needed - cache_x.shape[2])
-            padding_shape = list(x.shape)
-            padding_shape[2] = padding_needed
-            padding = torch.zeros(padding_shape, device=x.device, dtype=x.dtype)
-            x = torch_cat_if_needed([padding, cache_x, x], dim=2)
+        padding = list(self._padding)
+        if cache_x is not None and self._padding[4] > 0:
+            cache_x = cache_x.to(x.device)
+            x = torch.cat([cache_x, x], dim=2)
+            padding[4] -= cache_x.shape[2]
            del cache_x
+        x = F.pad(x, padding)

        return super().forward(x)

--- a/comfy/ops.py
+++ b/comfy/ops.py
@ -203,9 +203,7 @@ class disable_weight_init:
        def reset_parameters(self):
            return None

-        def _conv_forward(self, input, weight, bias, autopad=None, *args, **kwargs):
-            if autopad == "causal_zero":
-                weight = weight[:, :, -input.shape[2]:, :, :]
+        def _conv_forward(self, input, weight, bias, *args, **kwargs):
            if NVIDIA_MEMORY_CONV_BUG_WORKAROUND and weight.dtype in (torch.float16, torch.bfloat16):
                out = torch.cudnn_convolution(input, weight, self.padding, self.stride, self.dilation, self.groups, benchmark=False, deterministic=False, allow_tf32=True)
                if bias is not None:
@ -214,15 +212,15 @@ class disable_weight_init:
            else:
                return super()._conv_forward(input, weight, bias, *args, **kwargs)

-        def forward_comfy_cast_weights(self, input, autopad=None):
+        def forward_comfy_cast_weights(self, input):
            weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
-            x = self._conv_forward(input, weight, bias, autopad=autopad)
+            x = self._conv_forward(input, weight, bias)
            uncast_bias_weight(self, weight, bias, offload_stream)
            return x

        def forward(self, *args, **kwargs):
            run_every_op()
-            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0 or "autopad" in kwargs:
+            if self.comfy_cast_weights or len(self.weight_function) > 0 or len(self.bias_function) > 0:
                return self.forward_comfy_cast_weights(*args, **kwargs)
            else:
                return super().forward(*args, **kwargs)
--- a/comfy/sample.py
+++ b/comfy/sample.py
@ -37,18 +37,12 @@ def prepare_noise(latent_image, seed, noise_inds=None):

    return noises

-def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None):
+def fix_empty_latent_channels(model, latent_image):
    if latent_image.is_nested:
        return latent_image
    latent_format = model.get_model_object("latent_format") #Resize the empty latent image so it has the right number of channels
-    if torch.count_nonzero(latent_image) == 0:
-        if latent_format.latent_channels != latent_image.shape[1]:
-            latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_format.latent_channels, dim=1)
-        if downscale_ratio_spacial is not None:
-            if downscale_ratio_spacial != latent_format.spacial_downscale_ratio:
-                ratio = downscale_ratio_spacial / latent_format.spacial_downscale_ratio
-                latent_image = comfy.utils.common_upscale(latent_image, round(latent_image.shape[-1] * ratio), round(latent_image.shape[-2] * ratio), "nearest-exact", crop="disabled")
-
+    if latent_format.latent_channels != latent_image.shape[1] and torch.count_nonzero(latent_image) == 0:
+        latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_format.latent_channels, dim=1)
    if latent_format.latent_dimensions == 3 and latent_image.ndim == 4:
        latent_image = latent_image.unsqueeze(2)
    return latent_image
--- a/comfy_extras/nodes_custom_sampler.py
+++ b/comfy_extras/nodes_custom_sampler.py
@ -741,7 +741,7 @@ class SamplerCustom(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None))
+        latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)
        latent["samples"] = latent_image

        if not add_noise:
@ -760,7 +760,6 @@ class SamplerCustom(io.ComfyNode):
        samples = comfy.sample.sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent_image, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=noise_seed)

        out = latent.copy()
-        out.pop("downscale_ratio_spacial", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = model.model.process_latent_out(x0_output["x0"].cpu())
@ -940,7 +939,7 @@ class SamplerCustomAdvanced(io.ComfyNode):
        latent = latent_image
        latent_image = latent["samples"]
        latent = latent.copy()
-        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None))
+        latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image)
        latent["samples"] = latent_image

        noise_mask = None
@ -955,7 +954,6 @@ class SamplerCustomAdvanced(io.ComfyNode):
        samples = samples.to(comfy.model_management.intermediate_device())

        out = latent.copy()
-        out.pop("downscale_ratio_spacial", None)
        out["samples"] = samples
        if "x0" in x0_output:
            x0_out = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu())
--- a/comfy_extras/nodes_sd3.py
+++ b/comfy_extras/nodes_sd3.py
@ -55,7 +55,7 @@ class EmptySD3LatentImage(io.ComfyNode):
    @classmethod
    def execute(cls, width, height, batch_size=1) -> io.NodeOutput:
        latent = torch.zeros([batch_size, 16, height // 8, width // 8], device=comfy.model_management.intermediate_device())
-        return io.NodeOutput({"samples": latent, "downscale_ratio_spacial": 8})
+        return io.NodeOutput({"samples":latent})

    generate = execute  # TODO: remove

--- a/nodes.py
+++ b/nodes.py
@ -1230,7 +1230,7 @@ class EmptyLatentImage:

    def generate(self, width, height, batch_size=1):
        latent = torch.zeros([batch_size, 4, height // 8, width // 8], device=self.device)
-        return ({"samples": latent, "downscale_ratio_spacial": 8}, )
+        return ({"samples":latent}, )


 class LatentFromBatch:
@ -1538,7 +1538,7 @@ class SetLatentNoiseMask:

 def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False):
    latent_image = latent["samples"]
-    latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None))
+    latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image)

    if disable_noise:
        noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu")
@ -1556,7 +1556,6 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive,
                                  denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step,
                                  force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
    out = latent.copy()
-    out.pop("downscale_ratio_spacial", None)
    out["samples"] = samples
    return (out, )