"""Regression: ``comfy.ldm.seedvr.vae.VideoAutoencoderKL.forward`` must honor the actual tensor/tuple return contract of ``encode()`` and ``decode_()`` and must NOT dereference diffusers-style ``.latent_dist`` or ``.sample`` attributes on those returns. The pre-fix body raised ``AttributeError: 'Tensor' object has no attribute 'latent_dist'`` for ``mode in {"encode", "all"}`` and ``AttributeError: 'VideoAutoencoderKL' object has no attribute 'decode'`` for ``mode == "decode"`` (the class only defines ``decode_`` with a trailing underscore). The post-fix body unwraps the optional one-element tuple shape that ``return_dict=False`` produces and returns the tensor directly. Tests construct a stub subclass of ``VideoAutoencoderKL`` that bypasses the heavy ``__init__`` via ``torch.nn.Module.__init__(self)`` and overrides ``encode``/``decode_`` with known tensors so the contract can be probed without loading any real VAE weights. """ import inspect import re import torch import torch.nn as nn from comfy.cli_args import args as cli_args if not torch.cuda.is_available(): cli_args.cpu = True from comfy.ldm.seedvr.vae import VideoAutoencoderKL # noqa: E402 _LATENT_SHAPE = (1, 16, 2, 2, 2) _DECODED_SHAPE = (1, 3, 5, 16, 16) _INPUT_ENCODE_SHAPE = (1, 3, 5, 16, 16) _INPUT_DECODE_SHAPE = (1, 16, 2, 2, 2) class _StubVAE(VideoAutoencoderKL): def __init__(self): nn.Module.__init__(self) self._encode_out = torch.zeros(*_LATENT_SHAPE) self._decode_out = torch.zeros(*_DECODED_SHAPE) def encode(self, x, return_dict=True): return self._encode_out def decode_(self, z, return_dict=True): return self._decode_out def test_forward_encode_returns_tensor(): vae = _StubVAE() x = torch.zeros(*_INPUT_ENCODE_SHAPE) result = vae.forward(x, mode="encode") assert type(result) is torch.Tensor assert result.shape == torch.Size(_LATENT_SHAPE) def test_forward_decode_returns_tensor(): vae = _StubVAE() z = torch.zeros(*_INPUT_DECODE_SHAPE) result = vae.forward(z, mode="decode") assert type(result) is torch.Tensor assert result.shape == torch.Size(_DECODED_SHAPE) def test_forward_all_returns_tensor(): vae = _StubVAE() x = torch.zeros(*_INPUT_ENCODE_SHAPE) result = vae.forward(x, mode="all") assert type(result) is torch.Tensor assert result.shape == torch.Size(_DECODED_SHAPE) def test_forward_source_has_no_diffusers_attr_access(): src = inspect.getsource(VideoAutoencoderKL.forward) assert ".latent_dist" not in src assert ".sample" not in src assert re.search(r"self\.decode\(", src) is None class _TupleReturningStubVAE(VideoAutoencoderKL): """Stub variant whose ``encode``/``decode_`` return the ``(tensor,)`` one-element tuple shape ``return_dict=False`` produces in the parent class. Exercises the unwrap branch of ``VideoAutoencoderKL.forward``. """ def __init__(self): nn.Module.__init__(self) self._encode_tensor = torch.zeros(*_LATENT_SHAPE) self._decode_tensor = torch.zeros(*_DECODED_SHAPE) def encode(self, x, return_dict=True): return (self._encode_tensor,) def decode_(self, z, return_dict=True): return (self._decode_tensor,) def test_forward_encode_unwraps_one_tuple(): vae = _TupleReturningStubVAE() x = torch.zeros(*_INPUT_ENCODE_SHAPE) result = vae.forward(x, mode="encode") assert type(result) is torch.Tensor assert result.shape == torch.Size(_LATENT_SHAPE) def test_forward_decode_unwraps_one_tuple(): vae = _TupleReturningStubVAE() z = torch.zeros(*_INPUT_DECODE_SHAPE) result = vae.forward(z, mode="decode") assert type(result) is torch.Tensor assert result.shape == torch.Size(_DECODED_SHAPE) def test_forward_all_unwraps_one_tuple_at_each_step(): vae = _TupleReturningStubVAE() x = torch.zeros(*_INPUT_ENCODE_SHAPE) result = vae.forward(x, mode="all") assert type(result) is torch.Tensor assert result.shape == torch.Size(_DECODED_SHAPE)