This commit is contained in:
brucew4yn3rp 2026-07-02 13:27:40 +01:00 committed by GitHub
commit b4a857b7aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 27 additions and 9 deletions

View File

@ -1131,13 +1131,14 @@ class VAE:
output = self.decode_tiled_3d(samples, **args)
return output.movedim(1, -1)
def encode(self, pixel_samples):
def encode(self, pixel_samples, not_video=None):
self.throw_exception_if_invalid()
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
pixel_samples = pixel_samples.movedim(-1, 1)
do_tile = False
_not_video = self.not_video if not_video is None else not_video
if self.latent_dim == 3 and pixel_samples.ndim < 5:
if not self.not_video:
if not _not_video:
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
else:
pixel_samples = pixel_samples.unsqueeze(2)
@ -1184,13 +1185,14 @@ class VAE:
return samples
def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None):
def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None, not_video=None):
self.throw_exception_if_invalid()
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
dims = self.latent_dim
pixel_samples = pixel_samples.movedim(-1, 1)
_not_video = self.not_video if not_video is None else not_video
if dims == 3:
if not self.not_video:
if not _not_video:
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
else:
pixel_samples = pixel_samples.unsqueeze(2)
@ -1909,6 +1911,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
vae_sd = model_config.process_vae_state_dict(vae_sd)
vae_device = model_options.get("load_device", None)
vae = VAE(sd=vae_sd, metadata=metadata, device=vae_device)
if getattr(model_config, 'vae_not_video', None) is not None: # <-- add
vae.not_video = model_config.vae_not_video
if output_clip:
if te_model_options.get("custom_operations", None) is None:

View File

@ -1867,6 +1867,7 @@ class QwenImage(supported_models_base.BASE):
vae_key_prefix = ["vae."]
text_encoder_key_prefix = ["text_encoders."]
vae_not_video = True
def get_model(self, state_dict, prefix="", device=None):
out = model_base.QwenImage(self, device=device)

View File

@ -373,15 +373,22 @@ class VAEDecodeTiled:
class VAEEncode:
@classmethod
def INPUT_TYPES(s):
return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", )}}
return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", ),
"encode_as": (["Auto", "Video Frames", "Individual Images"], {"default": "Auto", "advanced": True, "tooltip": "For 3D/video VAEs: 'Video Frames' merges the batch into a temporal sequence, 'Individual Images' encodes each image independently. 'auto' uses the VAE default."}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "encode"
CATEGORY = "model/latent"
SEARCH_ALIASES = ["encode", "encode image", "image to latent"]
def encode(self, vae, pixels):
t = vae.encode(pixels)
def encode(self, vae, pixels, encode_as="Auto"):
not_video = None
if encode_as == "Individual Images":
not_video = True
elif encode_as == "Video Frames":
not_video = False
t = vae.encode(pixels, not_video=not_video)
return ({"samples":t}, )
class VAEEncodeTiled:
@ -392,14 +399,20 @@ class VAEEncodeTiled:
"overlap": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 32, "advanced": True}),
"temporal_size": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to encode at a time.", "advanced": True}),
"temporal_overlap": ("INT", {"default": 8, "min": 4, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
"encode_as": (["Auto", "Video Frames", "Individual Images"], {"default": "Auto", "advanced": True, "tooltip": "For 3D/video VAEs: 'Video Frames' merges the batch into a temporal sequence, 'Individual Images' encodes each image independently. 'auto' uses the VAE default."}),
}}
RETURN_TYPES = ("LATENT",)
FUNCTION = "encode"
CATEGORY = "model/latent"
def encode(self, vae, pixels, tile_size, overlap, temporal_size=64, temporal_overlap=8):
t = vae.encode_tiled(pixels, tile_x=tile_size, tile_y=tile_size, overlap=overlap, tile_t=temporal_size, overlap_t=temporal_overlap)
def encode(self, vae, pixels, tile_size, overlap, temporal_size=64, temporal_overlap=8, encode_as="Auto"):
not_video = None
if encode_as == "Individual Images":
not_video = True
elif encode_as == "Video Frames":
not_video = False
t = vae.encode_tiled(pixels, tile_x=tile_size, tile_y=tile_size, overlap=overlap, tile_t=temporal_size, overlap_t=temporal_overlap, not_video=not_video)
return ({"samples": t}, )
class VAEEncodeForInpaint: