mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-04-15 13:02:35 +08:00
Fix noise/latent tensor mismatch when latent is nested but noise is not
When using LTXAV (audio+video) workflows, latent_image is a NestedTensor but noise may be a regular tensor. Calling unbind() on non-nested noise splits along dim=0 (channels), producing a shape mismatch at noise_scaling. Check whether noise is nested before unbinding. If not, pad with zero-noise for additional components (e.g. audio), which is semantically correct since those components don't need denoising in the video sampler.
This commit is contained in:
parent
b615af1c65
commit
2beca418ad
@ -1006,8 +1006,16 @@ class CFGGuider:
|
|||||||
return latent_image
|
return latent_image
|
||||||
|
|
||||||
if latent_image.is_nested:
|
if latent_image.is_nested:
|
||||||
latent_image, latent_shapes = comfy.utils.pack_latents(latent_image.unbind())
|
li_tensors = latent_image.unbind()
|
||||||
noise, _ = comfy.utils.pack_latents(noise.unbind())
|
if noise.is_nested:
|
||||||
|
n_tensors = noise.unbind()
|
||||||
|
else:
|
||||||
|
# Noise only covers video -- pad remaining components (audio) with zeros
|
||||||
|
n_tensors = [noise]
|
||||||
|
for i in range(1, len(li_tensors)):
|
||||||
|
n_tensors.append(torch.zeros_like(li_tensors[i]))
|
||||||
|
latent_image, latent_shapes = comfy.utils.pack_latents(li_tensors)
|
||||||
|
noise, _ = comfy.utils.pack_latents(n_tensors)
|
||||||
else:
|
else:
|
||||||
latent_shapes = [latent_image.shape]
|
latent_shapes = [latent_image.shape]
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user