From a4382e056e348533a7a8ef6d74f6f75b93c1a247 Mon Sep 17 00:00:00 2001
From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com>
Date: Mon, 18 May 2026 21:14:30 -0700
Subject: [PATCH] Use temporal downscale to make empty audio latent nodes more
 reusable. (#13975)

---
 comfy/latent_formats.py     | 2 ++
 comfy_extras/nodes_ace.py   | 2 +-
 comfy_extras/nodes_audio.py | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py
index d527eec4a..6e37080bb 100644
--- a/comfy/latent_formats.py
+++ b/comfy/latent_formats.py
@@ -150,6 +150,7 @@ class SD3(LatentFormat):
 class StableAudio1(LatentFormat):
     latent_channels = 64
     latent_dimensions = 1
+    temporal_downscale_ratio = 2048
 
 class Flux(SD3):
     latent_channels = 16
@@ -766,6 +767,7 @@ class ACEAudio(LatentFormat):
 class ACEAudio15(LatentFormat):
     latent_channels = 64
     latent_dimensions = 1
+    temporal_downscale_ratio = 1764
 
 class ChromaRadiance(LatentFormat):
     latent_channels = 3
diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py
index affcf3b71..247d9ae8a 100644
--- a/comfy_extras/nodes_ace.py
+++ b/comfy_extras/nodes_ace.py
@@ -104,7 +104,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode):
     def execute(cls, seconds, batch_size) -> IO.NodeOutput:
         length = round((seconds * 48000 / 1920))
         latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
-        return IO.NodeOutput({"samples": latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 1764})
 
 class ReferenceAudio(IO.ComfyNode):
     @classmethod
diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py
index fcc1c34d5..2d6b3c7ea 100644
--- a/comfy_extras/nodes_audio.py
+++ b/comfy_extras/nodes_audio.py
@@ -33,7 +33,7 @@ class EmptyLatentAudio(IO.ComfyNode):
     def execute(cls, seconds, batch_size) -> IO.NodeOutput:
         length = round((seconds * 44100 / 2048) / 2) * 2
         latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
-        return IO.NodeOutput({"samples":latent, "type": "audio"})
+        return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 2048})
 
     generate = execute  # TODO: remove