Merge branch 'comfyanonymous:master' into master

2026-01-22 12:20:16 +08:00 · 2025-03-05 19:11:25 +03:00 · 2025-03-05 19:11:25 +03:00 · c36a942c12
commit c36a942c12
parent 93001919fa 889519971f
5 changed files with 10 additions and 14 deletions
--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@ -695,7 +695,7 @@ class DepthToSpaceUpsample(nn.Module):
 class LayerNorm(nn.Module):
    def __init__(self, dim, eps, elementwise_affine=True) -> None:
        super().__init__()
-        self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
+        self.norm = ops.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
    def forward(self, x):
        x = rearrange(x, "b c d h w -> b d h w c")
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@ -762,7 +762,7 @@ class LTXV(supported_models_base.BASE):
    unet_extra_config = {}
    latent_format = latent_formats.LTXV
-    memory_usage_factor = 2.7
+    memory_usage_factor = 5.5 # TODO: img2vid is about 2x vs txt2vid
    supported_inference_dtypes = [torch.bfloat16, torch.float32]
--- a/comfy_extras/nodes_lt.py
+++ b/comfy_extras/nodes_lt.py
@ -194,11 +194,6 @@ class LTXVAddGuide:
        frame_idx, latent_idx = self.get_latent_index(positive, latent_length, frame_idx, scale_factors)
        assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence."
        if frame_idx == 0:
            latent_image, noise_mask = self.replace_latent_frames(latent_image, noise_mask, t, latent_idx, strength)
            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
        num_prefix_frames = min(self._num_prefix_frames, t.shape[2])
        positive, negative, latent_image, noise_mask = self.append_keyframe(
@ -252,6 +247,8 @@ class LTXVCropGuides:
        noise_mask = get_noise_mask(latent)
        _, num_keyframes = get_keyframe_idxs(positive)
        if num_keyframes == 0:
            return (positive, negative, {"samples": latent_image, "noise_mask": noise_mask},)
        latent_image = latent_image[:, :, :-num_keyframes]
        noise_mask = noise_mask[:, :, :-num_keyframes]
@ -413,7 +410,7 @@ def preprocess(image: torch.Tensor, crf=29):
    if crf == 0:
        return image
-    image_array = (image * 255.0).byte().cpu().numpy()
+    image_array = (image[:(image.shape[0] // 2) * 2, :(image.shape[1] // 2) * 2] * 255.0).byte().cpu().numpy()
    with io.BytesIO() as output_file:
        encode_single_frame(output_file, image_array, crf)
        video_bytes = output_file.getvalue()
@ -447,12 +444,11 @@ class LTXVPreprocess:
    CATEGORY = "image"
    def preprocess(self, image, img_compression):
        output_image = image
        if img_compression > 0:
-            output_image = torch.zeros_like(image)
+            output_images = []
            for i in range(image.shape[0]):
-                output_image[i] = preprocess(image[i], img_compression)
+                output_images.append(preprocess(image[i], img_compression))
-        return (output_image,)
+        return (torch.stack(output_images),)
 NODE_CLASS_MAPPINGS = {
--- a/comfyui_version.py
+++ b/comfyui_version.py
@ -1,3 +1,3 @@
 # This file is automatically generated by the build process when version is
 # updated in pyproject.toml.
-__version__ = "0.3.19"
+__version__ = "0.3.22"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "ComfyUI"
-version = "0.3.19"
+version = "0.3.22"
 readme = "README.md"
 license = { file = "LICENSE" }
 requires-python = ">=3.9"