mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-07-03 13:19:23 +08:00
Merge 77cb84873a into 694815f498
This commit is contained in:
commit
b4a857b7aa
12
comfy/sd.py
12
comfy/sd.py
@ -1131,13 +1131,14 @@ class VAE:
|
||||
output = self.decode_tiled_3d(samples, **args)
|
||||
return output.movedim(1, -1)
|
||||
|
||||
def encode(self, pixel_samples):
|
||||
def encode(self, pixel_samples, not_video=None):
|
||||
self.throw_exception_if_invalid()
|
||||
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
|
||||
pixel_samples = pixel_samples.movedim(-1, 1)
|
||||
do_tile = False
|
||||
_not_video = self.not_video if not_video is None else not_video
|
||||
if self.latent_dim == 3 and pixel_samples.ndim < 5:
|
||||
if not self.not_video:
|
||||
if not _not_video:
|
||||
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
|
||||
else:
|
||||
pixel_samples = pixel_samples.unsqueeze(2)
|
||||
@ -1184,13 +1185,14 @@ class VAE:
|
||||
|
||||
return samples
|
||||
|
||||
def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None):
|
||||
def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None, not_video=None):
|
||||
self.throw_exception_if_invalid()
|
||||
pixel_samples = self.vae_encode_crop_pixels(pixel_samples)
|
||||
dims = self.latent_dim
|
||||
pixel_samples = pixel_samples.movedim(-1, 1)
|
||||
_not_video = self.not_video if not_video is None else not_video
|
||||
if dims == 3:
|
||||
if not self.not_video:
|
||||
if not _not_video:
|
||||
pixel_samples = pixel_samples.movedim(1, 0).unsqueeze(0)
|
||||
else:
|
||||
pixel_samples = pixel_samples.unsqueeze(2)
|
||||
@ -1909,6 +1911,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
|
||||
vae_sd = model_config.process_vae_state_dict(vae_sd)
|
||||
vae_device = model_options.get("load_device", None)
|
||||
vae = VAE(sd=vae_sd, metadata=metadata, device=vae_device)
|
||||
if getattr(model_config, 'vae_not_video', None) is not None: # <-- add
|
||||
vae.not_video = model_config.vae_not_video
|
||||
|
||||
if output_clip:
|
||||
if te_model_options.get("custom_operations", None) is None:
|
||||
|
||||
@ -1867,6 +1867,7 @@ class QwenImage(supported_models_base.BASE):
|
||||
|
||||
vae_key_prefix = ["vae."]
|
||||
text_encoder_key_prefix = ["text_encoders."]
|
||||
vae_not_video = True
|
||||
|
||||
def get_model(self, state_dict, prefix="", device=None):
|
||||
out = model_base.QwenImage(self, device=device)
|
||||
|
||||
23
nodes.py
23
nodes.py
@ -373,15 +373,22 @@ class VAEDecodeTiled:
|
||||
class VAEEncode:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", )}}
|
||||
return {"required": { "pixels": ("IMAGE", ), "vae": ("VAE", ),
|
||||
"encode_as": (["Auto", "Video Frames", "Individual Images"], {"default": "Auto", "advanced": True, "tooltip": "For 3D/video VAEs: 'Video Frames' merges the batch into a temporal sequence, 'Individual Images' encodes each image independently. 'auto' uses the VAE default."}),
|
||||
}}
|
||||
RETURN_TYPES = ("LATENT",)
|
||||
FUNCTION = "encode"
|
||||
|
||||
CATEGORY = "model/latent"
|
||||
SEARCH_ALIASES = ["encode", "encode image", "image to latent"]
|
||||
|
||||
def encode(self, vae, pixels):
|
||||
t = vae.encode(pixels)
|
||||
def encode(self, vae, pixels, encode_as="Auto"):
|
||||
not_video = None
|
||||
if encode_as == "Individual Images":
|
||||
not_video = True
|
||||
elif encode_as == "Video Frames":
|
||||
not_video = False
|
||||
t = vae.encode(pixels, not_video=not_video)
|
||||
return ({"samples":t}, )
|
||||
|
||||
class VAEEncodeTiled:
|
||||
@ -392,14 +399,20 @@ class VAEEncodeTiled:
|
||||
"overlap": ("INT", {"default": 64, "min": 0, "max": 4096, "step": 32, "advanced": True}),
|
||||
"temporal_size": ("INT", {"default": 64, "min": 8, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to encode at a time.", "advanced": True}),
|
||||
"temporal_overlap": ("INT", {"default": 8, "min": 4, "max": 4096, "step": 4, "tooltip": "Only used for video VAEs: Amount of frames to overlap.", "advanced": True}),
|
||||
"encode_as": (["Auto", "Video Frames", "Individual Images"], {"default": "Auto", "advanced": True, "tooltip": "For 3D/video VAEs: 'Video Frames' merges the batch into a temporal sequence, 'Individual Images' encodes each image independently. 'auto' uses the VAE default."}),
|
||||
}}
|
||||
RETURN_TYPES = ("LATENT",)
|
||||
FUNCTION = "encode"
|
||||
|
||||
CATEGORY = "model/latent"
|
||||
|
||||
def encode(self, vae, pixels, tile_size, overlap, temporal_size=64, temporal_overlap=8):
|
||||
t = vae.encode_tiled(pixels, tile_x=tile_size, tile_y=tile_size, overlap=overlap, tile_t=temporal_size, overlap_t=temporal_overlap)
|
||||
def encode(self, vae, pixels, tile_size, overlap, temporal_size=64, temporal_overlap=8, encode_as="Auto"):
|
||||
not_video = None
|
||||
if encode_as == "Individual Images":
|
||||
not_video = True
|
||||
elif encode_as == "Video Frames":
|
||||
not_video = False
|
||||
t = vae.encode_tiled(pixels, tile_x=tile_size, tile_y=tile_size, overlap=overlap, tile_t=temporal_size, overlap_t=temporal_overlap, not_video=not_video)
|
||||
return ({"samples": t}, )
|
||||
|
||||
class VAEEncodeForInpaint:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user