From d8635dcb399d4f874345b836a481378ff9ed2264 Mon Sep 17 00:00:00 2001
From: Jedrzej Kosinski <kosinkadink1@gmail.com>
Date: Sun, 14 Jun 2026 23:31:41 -0700
Subject: [PATCH] Cube3D: keep disable_offload=True (VQ decode needs full
 residency)

The VQ bottleneck reads raw parameters outside any hooked forward, so the
streaming-offload cast hooks cannot relocate them and decode fails with a
device mismatch under partial load. disable_offload is the standard
declarative flag for VAEs that need full residency (audio VAEs do the same),
and the decode still flows through the managed comfy.sd.VAE.decode path.

Amp-Thread-ID: https://ampcode.com/threads/T-019ec361-addb-70d8-a74b-438ce8a1e096
Co-authored-by: Amp <amp@ampcode.com>
---
 comfy/sd.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/comfy/sd.py b/comfy/sd.py
index c2ececeeb..74e388553 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -783,6 +783,12 @@ class VAE:
             elif "bottleneck.block.codebook.weight" in sd:
                 self.cube3d = True
                 self.latent_dim = 1
+                # The VQ bottleneck (get_codebook/lookup_codebook) reads raw parameters
+                # outside any hooked forward, so the streaming-offload cast hooks can't
+                # relocate them; the model must be fully resident to decode. This is a
+                # correctness requirement, declared via the standard flag (like the audio
+                # VAEs) rather than managed manually in the node.
+                self.disable_offload = True
                 embed_dim = sd["bottleneck.block.codebook.weight"].shape[1]
                 num_codes = sd["bottleneck.block.codebook.weight"].shape[0]
                 width = sd["bottleneck.block.c_out.weight"].shape[0]