diff --git a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py index 043ca0496..f91870d71 100644 --- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py +++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py @@ -695,7 +695,7 @@ class DepthToSpaceUpsample(nn.Module): class LayerNorm(nn.Module): def __init__(self, dim, eps, elementwise_affine=True) -> None: super().__init__() - self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine) + self.norm = ops.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine) def forward(self, x): x = rearrange(x, "b c d h w -> b d h w c") diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 392550727..0c132c706 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -762,7 +762,7 @@ class LTXV(supported_models_base.BASE): unet_extra_config = {} latent_format = latent_formats.LTXV - memory_usage_factor = 2.7 + memory_usage_factor = 5.5 # TODO: img2vid is about 2x vs txt2vid supported_inference_dtypes = [torch.bfloat16, torch.float32] diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index 8bd548bcd..b608b9407 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -194,11 +194,6 @@ class LTXVAddGuide: frame_idx, latent_idx = self.get_latent_index(positive, latent_length, frame_idx, scale_factors) assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence." - if frame_idx == 0: - latent_image, noise_mask = self.replace_latent_frames(latent_image, noise_mask, t, latent_idx, strength) - return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},) - - num_prefix_frames = min(self._num_prefix_frames, t.shape[2]) positive, negative, latent_image, noise_mask = self.append_keyframe( @@ -252,6 +247,8 @@ class LTXVCropGuides: noise_mask = get_noise_mask(latent) _, num_keyframes = get_keyframe_idxs(positive) + if num_keyframes == 0: + return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},) latent_image = latent_image[:, :, :-num_keyframes] noise_mask = noise_mask[:, :, :-num_keyframes] @@ -413,7 +410,7 @@ def preprocess(image: torch.Tensor, crf=29): if crf == 0: return image - image_array = (image * 255.0).byte().cpu().numpy() + image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy() with io.BytesIO() as output_file: encode_single_frame(output_file, image_array, crf) video_bytes = output_file.getvalue() @@ -447,12 +444,11 @@ class LTXVPreprocess: CATEGORY = "image" def preprocess(self, image, img_compression): - output_image = image if img_compression > 0: - output_image = torch.zeros_like(image) + output_images = [] for i in range(image.shape[0]): - output_image[i] = preprocess(image[i], img_compression) - return (output_image,) + output_images.append(preprocess(image[i], img_compression)) + return (torch.stack(output_images),) NODE_CLASS_MAPPINGS = { diff --git a/comfyui_version.py b/comfyui_version.py index 5ded466ad..0e50db99b 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.19" +__version__ = "0.3.22" diff --git a/pyproject.toml b/pyproject.toml index 444a1efc1..9dbbe7cc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.19" +version = "0.3.22" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9"