diff --git a/comfy/ldm/lightricks/vae/audio_vae.py b/comfy/ldm/lightricks/vae/audio_vae.py index fa0a00748..4019e32fd 100644 --- a/comfy/ldm/lightricks/vae/audio_vae.py +++ b/comfy/ldm/lightricks/vae/audio_vae.py @@ -193,7 +193,7 @@ class AudioVAE(torch.nn.Module): waveform, waveform_sample_rate, device=self.device_manager.load_device ) - latents = self.autoencoder.encode(mel_spec) + latents = self.autoencoder.encode(mel_spec.to(dtype=waveform.dtype)) posterior = DiagonalGaussianDistribution(latents) latent_mode = posterior.mode()