""" Nodes for native Roblox Cube3D text-to-3D support. Graph: CLIPLoader(clip-l) -> CLIPTextEncode -> CONDITIONING UNETLoader(shape_gpt) -> MODEL --\ VAELoader(shape_tokenizer) -> VAE -> CubeCodebookPatch -> MODEL CFGGuider(MODEL, pos, neg, cfg) + SamplerCube + (trivial sigmas) + EmptyCubeLatent -> SamplerCustomAdvanced -> LATENT (token IDs) VAEDecodeCube(VAE, LATENT) -> MESH -> SaveGLB """ import numpy as np import torch from typing_extensions import override import comfy.ldm.cube.vae import comfy.model_management import comfy.samplers from comfy_api.latest import ComfyExtension, IO, Types from comfy_extras.nodes_save_3d import pack_variable_mesh_batch class EmptyCubeLatent(IO.ComfyNode): @classmethod def define_schema(cls): return IO.Schema( node_id="EmptyCubeLatent", category="latent/3d", inputs=[ IO.Int.Input("num_tokens", default=1024, min=1, max=8192, tooltip="Shape token sequence length. Must match the tokenizer " "(1024 for cube3d-v0.5, 512 for v0.1)."), IO.Int.Input("batch_size", default=1, min=1, max=64), ], outputs=[IO.Latent.Output()], ) @classmethod def execute(cls, num_tokens, batch_size) -> IO.NodeOutput: # Channels-first 1D latent (B, 1, num_tokens), mirroring Hunyuan3Dv2's (B, C, L) # convention (latent_channels=1). The sampler only uses the sequence length. latent = torch.zeros([batch_size, 1, num_tokens], device=comfy.model_management.intermediate_device()) return IO.NodeOutput({"samples": latent, "type": "cube_tokens"}) class CubeCodebookPatch(IO.ComfyNode): """Inject the projected VQ codebook into the GPT token-embedding table. Upstream copies shape_proj(tokenizer.codebook) into wte.weight[:num_codes] at load time; without it generation is garbage. Done here as a ModelPatcher object patch so it composes with normal model loading/offload.""" @classmethod def define_schema(cls): return IO.Schema( node_id="CubeCodebookPatch", display_name="Cube Codebook Patch", category="advanced/model", inputs=[ IO.Model.Input("model"), IO.Vae.Input("vae"), ], outputs=[IO.Model.Output()], ) @classmethod def execute(cls, model, vae) -> IO.NodeOutput: gpt = model.get_model_object("diffusion_model") codebook = vae.first_stage_model.bottleneck.block.get_codebook() # (num_codes, embed_dim) fp32 w = gpt.shape_proj.weight proj = gpt.shape_proj(codebook.to(device=w.device, dtype=w.dtype)) # (num_codes, n_embd) old = model.get_model_object("diffusion_model.transformer.wte.weight") new = old.clone() new[:proj.shape[0]] = proj.to(device=new.device, dtype=new.dtype) m = model.clone() m.add_object_patch("diffusion_model.transformer.wte.weight", torch.nn.Parameter(new, requires_grad=False)) return IO.NodeOutput(m) class SamplerCube(IO.ComfyNode): @classmethod def define_schema(cls): return IO.Schema( node_id="SamplerCube", display_name="Sampler Cube (autoregressive)", category="sampling/custom_sampling/samplers", inputs=[ IO.Float.Input("top_p", default=1.0, min=0.0, max=1.0, step=0.01, tooltip="1.0 = deterministic greedy (upstream default). " "<1.0 enables nucleus sampling."), ], outputs=[IO.Sampler.Output()], ) @classmethod def execute(cls, top_p) -> IO.NodeOutput: return IO.NodeOutput(comfy.samplers.ksampler("cube", {"top_p": top_p})) class VAEDecodeCube(IO.ComfyNode): @classmethod def define_schema(cls): return IO.Schema( node_id="VAEDecodeCube", display_name="VAE Decode Cube (3D)", category="latent/3d", inputs=[ IO.Vae.Input("vae"), IO.Latent.Input("samples"), IO.Float.Input("resolution_base", default=8.0, min=4.0, max=10.0, step=0.5, tooltip="Grid cells per axis = 2^resolution_base. 8.0 matches " "upstream default (257^3 grid)."), IO.Int.Input("chunk_size", default=100000, min=1000, max=2000000, advanced=True), ], outputs=[IO.Mesh.Output()], ) @classmethod def execute(cls, vae, samples, resolution_base, chunk_size) -> IO.NodeOutput: # Managed decode: comfy.sd.VAE.decode handles model loading + device/dtype and # returns the occupancy grid logits (B, gx, gy, gz). Marching cubes runs here. grid = vae.decode(samples["samples"], vae_options={"resolution_base": resolution_base, "chunk_size": chunk_size}) bounds = vae.first_stage_model.decode_bounds bbox_min = np.array(bounds[0:3]) bbox_size = np.array(bounds[3:6]) - bbox_min grid_size = list(grid.shape[1:]) verts_list, faces_list = [], [] for i in range(grid.shape[0]): v, f = comfy.ldm.cube.vae.grid_logits_to_mesh(grid[i], grid_size, bbox_size, bbox_min) verts_list.append(torch.from_numpy(v)) faces_list.append(torch.from_numpy(f.astype(np.int64))) mesh = pack_variable_mesh_batch(verts_list, faces_list) return IO.NodeOutput(mesh) class CubeExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ EmptyCubeLatent, CubeCodebookPatch, SamplerCube, VAEDecodeCube, ] async def comfy_entrypoint() -> CubeExtension: return CubeExtension()