From 990a7ae7f20df5a5092200fad129007f84252ae0 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 18 May 2026 20:01:43 -0700 Subject: [PATCH] Initial work to make downscale_ratio_temporal work. (#13972) --- comfy/sample.py | 12 ++++++++++-- comfy_extras/nodes_custom_sampler.py | 6 ++++-- nodes.py | 3 ++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/comfy/sample.py b/comfy/sample.py index 653829582..2be0cae5f 100644 --- a/comfy/sample.py +++ b/comfy/sample.py @@ -37,11 +37,12 @@ def prepare_noise(latent_image, seed, noise_inds=None): return noises -def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None): +def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None, downscale_ratio_temporal=None): if latent_image.is_nested: return latent_image latent_format = model.get_model_object("latent_format") #Resize the empty latent image so it has the right number of channels - if torch.count_nonzero(latent_image) == 0: + is_empty = torch.count_nonzero(latent_image) == 0 + if is_empty: if latent_format.latent_channels != latent_image.shape[1]: latent_image = comfy.utils.repeat_to_batch_size(latent_image, latent_format.latent_channels, dim=1) if downscale_ratio_spacial is not None: @@ -51,6 +52,13 @@ def fix_empty_latent_channels(model, latent_image, downscale_ratio_spacial=None) if latent_format.latent_dimensions == 3 and latent_image.ndim == 4: latent_image = latent_image.unsqueeze(2) + + if is_empty and downscale_ratio_temporal is not None: + if downscale_ratio_temporal != latent_format.temporal_downscale_ratio: + ratio = downscale_ratio_temporal / latent_format.temporal_downscale_ratio + new_t = max(1, round(latent_image.shape[2] * ratio)) + latent_image = comfy.utils.repeat_to_batch_size(latent_image, new_t, dim=2) + return latent_image def prepare_sampling(model, noise_shape, positive, negative, noise_mask): diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index c67145d2d..02fb9385f 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -750,7 +750,7 @@ class SamplerCustom(io.ComfyNode): latent = latent_image latent_image = latent["samples"] latent = latent.copy() - latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None)) + latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None)) latent["samples"] = latent_image if not add_noise: @@ -770,6 +770,7 @@ class SamplerCustom(io.ComfyNode): out = latent.copy() out.pop("downscale_ratio_spacial", None) + out.pop("downscale_ratio_temporal", None) out["samples"] = samples if "x0" in x0_output: x0_out = model.model.process_latent_out(x0_output["x0"].cpu()) @@ -949,7 +950,7 @@ class SamplerCustomAdvanced(io.ComfyNode): latent = latent_image latent_image = latent["samples"] latent = latent.copy() - latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None)) + latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None)) latent["samples"] = latent_image noise_mask = None @@ -965,6 +966,7 @@ class SamplerCustomAdvanced(io.ComfyNode): out = latent.copy() out.pop("downscale_ratio_spacial", None) + out.pop("downscale_ratio_temporal", None) out["samples"] = samples if "x0" in x0_output: x0_out = guider.model_patcher.model.process_latent_out(x0_output["x0"].cpu()) diff --git a/nodes.py b/nodes.py index 374217eea..42fb8fd56 100644 --- a/nodes.py +++ b/nodes.py @@ -1524,7 +1524,7 @@ class SetLatentNoiseMask: def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent, denoise=1.0, disable_noise=False, start_step=None, last_step=None, force_full_denoise=False): latent_image = latent["samples"] - latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None)) + latent_image = comfy.sample.fix_empty_latent_channels(model, latent_image, latent.get("downscale_ratio_spacial", None), latent.get("downscale_ratio_temporal", None)) if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") @@ -1543,6 +1543,7 @@ def common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed) out = latent.copy() out.pop("downscale_ratio_spacial", None) + out.pop("downscale_ratio_temporal", None) out["samples"] = samples return (out, )