Use temporal downscale to make empty audio latent nodes more reusable. (#13975)

2026-06-29 19:29:24 +08:00 · 2026-05-18 21:14:30 -07:00 · 2026-05-18 21:14:30 -07:00 · a4382e056e
commit a4382e056e
parent d71cc1c8f2
3 changed files with 4 additions and 2 deletions
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@ -150,6 +150,7 @@ class SD3(LatentFormat):
 class StableAudio1(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
+    temporal_downscale_ratio = 2048

 class Flux(SD3):
    latent_channels = 16
@ -766,6 +767,7 @@ class ACEAudio(LatentFormat):
 class ACEAudio15(LatentFormat):
    latent_channels = 64
    latent_dimensions = 1
+    temporal_downscale_ratio = 1764

 class ChromaRadiance(LatentFormat):
    latent_channels = 3
--- a/comfy_extras/nodes_ace.py
+++ b/comfy_extras/nodes_ace.py
@ -104,7 +104,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode):
    def execute(cls, seconds, batch_size) -> IO.NodeOutput:
        length = round((seconds * 48000 / 1920))
        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
-        return IO.NodeOutput({"samples": latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 1764})

 class ReferenceAudio(IO.ComfyNode):
    @classmethod
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@ -33,7 +33,7 @@ class EmptyLatentAudio(IO.ComfyNode):
    def execute(cls, seconds, batch_size) -> IO.NodeOutput:
        length = round((seconds * 44100 / 2048) / 2) * 2
        latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
-        return IO.NodeOutput({"samples":latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 2048})

    generate = execute  # TODO: remove