ltx: vae: scale the chunk size with the users VRAM

Scale this linearly down for users with low VRAM.
2026-05-08 16:22:38 +08:00 · 2026-03-14 19:46:32 +10:00 · 2026-03-14 19:46:32 +10:00 · d4a3cfc649
commit d4a3cfc649
parent 7a16e8aa4e
1 changed files with 19 additions and 2 deletions
--- a/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
+++ b/comfy/ldm/lightricks/vae/causal_video_autoencoder.py
@ -297,7 +297,23 @@ class Encoder(nn.Module):
                    module.temporal_cache_state.pop(tid, None)


-MAX_CHUNK_SIZE=(128 * 1024 ** 2)
+MIN_VRAM_FOR_CHUNK_SCALING = 6 * 1024 ** 3
+MAX_VRAM_FOR_CHUNK_SCALING = 24 * 1024 ** 3
+MIN_CHUNK_SIZE = 32 * 1024 ** 2
+MAX_CHUNK_SIZE = 128 * 1024 ** 2
+
+def get_max_chunk_size(device: torch.device) -> int:
+    total_memory = comfy.model_management.get_total_memory(dev=device)
+
+    if total_memory <= MIN_VRAM_FOR_CHUNK_SCALING:
+        return MIN_CHUNK_SIZE
+    if total_memory >= MAX_VRAM_FOR_CHUNK_SCALING:
+        return MAX_CHUNK_SIZE
+
+    interp = (total_memory - MIN_VRAM_FOR_CHUNK_SCALING) / (
+        MAX_VRAM_FOR_CHUNK_SCALING - MIN_VRAM_FOR_CHUNK_SCALING
+    )
+    return int(MIN_CHUNK_SIZE + interp * (MAX_CHUNK_SIZE - MIN_CHUNK_SIZE))

 class Decoder(nn.Module):
    r"""
@ -525,6 +541,7 @@ class Decoder(nn.Module):
            timestep_shift_scale = ada_values.unbind(dim=1)

        output = []
+        max_chunk_size = get_max_chunk_size(sample.device)

        def run_up(idx, sample, ended):
            if idx >= len(self.up_blocks):
@ -554,7 +571,7 @@ class Decoder(nn.Module):
                return

            total_bytes = sample.numel() * sample.element_size()
-            num_chunks = (total_bytes + MAX_CHUNK_SIZE - 1) // MAX_CHUNK_SIZE
+            num_chunks = (total_bytes + max_chunk_size - 1) // max_chunk_size
            samples = torch.chunk(sample, chunks=num_chunks, dim=2)

            for chunk_idx, sample1 in enumerate(samples):