Use temporal downscale to make empty audio latent nodes more reusable. (#13975)

This commit is contained in:
comfyanonymous 2026-05-18 21:14:30 -07:00 committed by GitHub
parent d71cc1c8f2
commit a4382e056e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 4 additions and 2 deletions

View File

@ -150,6 +150,7 @@ class SD3(LatentFormat):
class StableAudio1(LatentFormat):
latent_channels = 64
latent_dimensions = 1
temporal_downscale_ratio = 2048
class Flux(SD3):
latent_channels = 16
@ -766,6 +767,7 @@ class ACEAudio(LatentFormat):
class ACEAudio15(LatentFormat):
latent_channels = 64
latent_dimensions = 1
temporal_downscale_ratio = 1764
class ChromaRadiance(LatentFormat):
latent_channels = 3

View File

@ -104,7 +104,7 @@ class EmptyAceStep15LatentAudio(IO.ComfyNode):
def execute(cls, seconds, batch_size) -> IO.NodeOutput:
length = round((seconds * 48000 / 1920))
latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
return IO.NodeOutput({"samples": latent, "type": "audio"})
return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 1764})
class ReferenceAudio(IO.ComfyNode):
@classmethod

View File

@ -33,7 +33,7 @@ class EmptyLatentAudio(IO.ComfyNode):
def execute(cls, seconds, batch_size) -> IO.NodeOutput:
length = round((seconds * 44100 / 2048) / 2) * 2
latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
return IO.NodeOutput({"samples":latent, "type": "audio"})
return IO.NodeOutput({"samples": latent, "type": "audio", "downscale_ratio_temporal": 2048})
generate = execute # TODO: remove