Merge 0388ac4309 into 65045730a6

Defensively truncate/pad nested noise components to match latent
When noise.is_nested with a different number of components than latent_image, truncate extras or pad missing components with torch.zeros_like, mirroring the denoise_mask handling pattern below. Addresses CodeRabbit nitpick on #13318.
2026-05-10 01:02:56 +08:00 · 2026-05-08 16:24:59 -04:00 · 2026-05-03 23:52:08 -04:00 · 2026-04-07 06:07:26 -04:00
1 changed files with 13 additions and 2 deletions
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@ -1006,8 +1006,19 @@ class CFGGuider:
            return latent_image

        if latent_image.is_nested:
-            latent_image, latent_shapes = comfy.utils.pack_latents(latent_image.unbind())
-            noise, _ = comfy.utils.pack_latents(noise.unbind())
+            li_tensors = latent_image.unbind()
+            if noise.is_nested:
+                # Truncate extra noise components, pad missing ones with zeros
+                n_tensors = list(noise.unbind()[:len(li_tensors)])
+                for i in range(len(n_tensors), len(li_tensors)):
+                    n_tensors.append(torch.zeros_like(li_tensors[i]))
+            else:
+                # Noise only covers video -- pad remaining components (audio) with zeros
+                n_tensors = [noise]
+                for i in range(1, len(li_tensors)):
+                    n_tensors.append(torch.zeros_like(li_tensors[i]))
+            latent_image, latent_shapes = comfy.utils.pack_latents(li_tensors)
+            noise, _ = comfy.utils.pack_latents(n_tensors)
        else:
            latent_shapes = [latent_image.shape]