diff --git a/comfy/ldm/lightricks/vae/audio_vae.py b/comfy/ldm/lightricks/vae/audio_vae.py index dd5320c8f..b11cd7d17 100644 --- a/comfy/ldm/lightricks/vae/audio_vae.py +++ b/comfy/ldm/lightricks/vae/audio_vae.py @@ -154,7 +154,7 @@ class AudioVAE(torch.nn.Module): waveform, waveform_sample_rate, device=waveform.device ) - latents = self.autoencoder.encode(mel_spec) + latents = self.autoencoder.encode(mel_spec.to(dtype=waveform.dtype)) posterior = DiagonalGaussianDistribution(latents) latent_mode = posterior.mode()