diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 3dac5be18..60c0dfd7e 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -9,6 +9,7 @@ class LatentFormat: latent_rgb_factors_reshape = None taesd_decoder_name = None spacial_downscale_ratio = 8 + temporal_downscale_ratio = 1 def process_in(self, latent): return latent * self.scale_factor @@ -235,6 +236,7 @@ class Flux2(LatentFormat): class Mochi(LatentFormat): latent_channels = 12 latent_dimensions = 3 + temporal_downscale_ratio = 6 def __init__(self): self.scale_factor = 1.0 @@ -278,6 +280,7 @@ class LTXV(LatentFormat): latent_channels = 128 latent_dimensions = 3 spacial_downscale_ratio = 32 + temporal_downscale_ratio = 8 def __init__(self): self.latent_rgb_factors = [ @@ -421,6 +424,7 @@ class LTXAV(LTXV): class HunyuanVideo(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 4 scale_factor = 0.476986 latent_rgb_factors = [ [-0.0395, -0.0331, 0.0445], @@ -447,6 +451,7 @@ class HunyuanVideo(LatentFormat): class Cosmos1CV8x8x8(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 8 latent_rgb_factors = [ [ 0.1817, 0.2284, 0.2423], @@ -472,6 +477,7 @@ class Cosmos1CV8x8x8(LatentFormat): class Wan21(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 4 latent_rgb_factors = [ [-0.1299, -0.1692, 0.2932], @@ -734,6 +740,7 @@ class HunyuanVideo15(LatentFormat): latent_channels = 32 latent_dimensions = 3 spacial_downscale_ratio = 16 + temporal_downscale_ratio = 4 scale_factor = 1.03682 taesd_decoder_name = "lighttaehy1_5" @@ -788,6 +795,7 @@ class ZImagePixelSpace(ChromaRadiance): class CogVideoX(LatentFormat): latent_channels = 16 latent_dimensions = 3 + temporal_downscale_ratio = 4 def __init__(self): self.scale_factor = 1.15258426