From eb2222538739c4ebd396cd0a40cb6d80befd04fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Tue, 21 Apr 2026 20:46:37 +0300 Subject: [PATCH] Support standalone LTXV audio VAEs (#13499) --- comfy/sd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comfy/sd.py b/comfy/sd.py index a4d3ee269..736fe35de 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -807,6 +807,7 @@ class VAE: self.memory_used_encode = lambda shape, dtype: (700 * (max(1, (shape[-3] ** 0.66 * 0.11)) * shape[-2] * shape[-1]) * model_management.dtype_size(dtype)) self.memory_used_decode = lambda shape, dtype: (50 * (max(1, (shape[-3] ** 0.65 * 0.26)) * shape[-2] * shape[-1] * 32 * 32) * model_management.dtype_size(dtype)) elif "vocoder.resblocks.0.convs1.0.weight" in sd or "vocoder.vocoder.resblocks.0.convs1.0.weight" in sd: # LTX Audio + sd = comfy.utils.state_dict_prefix_replace(sd, {"audio_vae.": "autoencoder."}) self.first_stage_model = comfy.ldm.lightricks.vae.audio_vae.AudioVAE(metadata=metadata) self.memory_used_encode = lambda shape, dtype: (shape[2] * 330) * model_management.dtype_size(dtype) self.memory_used_decode = lambda shape, dtype: (shape[2] * shape[3] * 87000) * model_management.dtype_size(dtype)