From 2beca418adf32443753d7b5a46dbfc1f47ec045b Mon Sep 17 00:00:00 2001 From: Dustin <6962246+djdarcy@users.noreply.github.com> Date: Tue, 7 Apr 2026 06:07:26 -0400 Subject: [PATCH] Fix noise/latent tensor mismatch when latent is nested but noise is not When using LTXAV (audio+video) workflows, latent_image is a NestedTensor but noise may be a regular tensor. Calling unbind() on non-nested noise splits along dim=0 (channels), producing a shape mismatch at noise_scaling. Check whether noise is nested before unbinding. If not, pad with zero-noise for additional components (e.g. audio), which is semantically correct since those components don't need denoising in the video sampler. --- comfy/samplers.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/comfy/samplers.py b/comfy/samplers.py index 0a4d062db..34d4a9a48 100755 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -1006,8 +1006,16 @@ class CFGGuider: return latent_image if latent_image.is_nested: - latent_image, latent_shapes = comfy.utils.pack_latents(latent_image.unbind()) - noise, _ = comfy.utils.pack_latents(noise.unbind()) + li_tensors = latent_image.unbind() + if noise.is_nested: + n_tensors = noise.unbind() + else: + # Noise only covers video -- pad remaining components (audio) with zeros + n_tensors = [noise] + for i in range(1, len(li_tensors)): + n_tensors.append(torch.zeros_like(li_tensors[i])) + latent_image, latent_shapes = comfy.utils.pack_latents(li_tensors) + noise, _ = comfy.utils.pack_latents(n_tensors) else: latent_shapes = [latent_image.shape]