From d8635dcb399d4f874345b836a481378ff9ed2264 Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Sun, 14 Jun 2026 23:31:41 -0700 Subject: [PATCH] Cube3D: keep disable_offload=True (VQ decode needs full residency) The VQ bottleneck reads raw parameters outside any hooked forward, so the streaming-offload cast hooks cannot relocate them and decode fails with a device mismatch under partial load. disable_offload is the standard declarative flag for VAEs that need full residency (audio VAEs do the same), and the decode still flows through the managed comfy.sd.VAE.decode path. Amp-Thread-ID: https://ampcode.com/threads/T-019ec361-addb-70d8-a74b-438ce8a1e096 Co-authored-by: Amp --- comfy/sd.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/comfy/sd.py b/comfy/sd.py index c2ececeeb..74e388553 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -783,6 +783,12 @@ class VAE: elif "bottleneck.block.codebook.weight" in sd: self.cube3d = True self.latent_dim = 1 + # The VQ bottleneck (get_codebook/lookup_codebook) reads raw parameters + # outside any hooked forward, so the streaming-offload cast hooks can't + # relocate them; the model must be fully resident to decode. This is a + # correctness requirement, declared via the standard flag (like the audio + # VAEs) rather than managed manually in the node. + self.disable_offload = True embed_dim = sd["bottleneck.block.codebook.weight"].shape[1] num_codes = sd["bottleneck.block.codebook.weight"].shape[0] width = sd["bottleneck.block.c_out.weight"].shape[0]