From 1d95ed211e0d971a1bfa76330a079d03eff9c802 Mon Sep 17 00:00:00 2001 From: drozbay <17261091+drozbay@users.noreply.github.com> Date: Tue, 12 May 2026 16:57:31 -0600 Subject: [PATCH 01/13] Fix LTXV mid-video multi-frame guide alignment (CORE-129) (#13625) --- comfy_extras/nodes_lt.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index a4c85db77..3dc1199c2 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -338,8 +338,25 @@ class LTXVAddGuide(io.ComfyNode): noise_mask = get_noise_mask(latent) _, _, latent_length, latent_height, latent_width = latent_image.shape + + # For mid-video multi-frame guides, prepend+strip a throwaway first frame so the VAE's "first latent = 1 pixel frame" asymmetry lands on the discarded slot + time_scale_factor = scale_factors[0] + num_frames_to_keep = ((image.shape[0] - 1) // time_scale_factor) * time_scale_factor + 1 + resolved_frame_idx = frame_idx + if frame_idx < 0: + _, num_keyframes = get_keyframe_idxs(positive) + resolved_frame_idx = max((latent_length - num_keyframes - 1) * time_scale_factor + 1 + frame_idx, 0) + causal_fix = resolved_frame_idx == 0 or num_frames_to_keep == 1 + + if not causal_fix: + image = torch.cat([image[:1], image], dim=0) + image, t = cls.encode(vae, latent_width, latent_height, image, scale_factors) + if not causal_fix: + t = t[:, :, 1:, :, :] + image = image[1:] + frame_idx, latent_idx = cls.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors) assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence." @@ -352,6 +369,7 @@ class LTXVAddGuide(io.ComfyNode): t, strength, scale_factors, + causal_fix=causal_fix, ) # Track this guide for per-reference attention control. From 300b6c8c9186cfcd4b2b2c51ec0afd4449e7fbb7 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 12 May 2026 17:28:20 -0700 Subject: [PATCH 02/13] Revert some breaking changes. (#13861) --- comfy_extras/nodes_mask.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index c9b2a84d9..96ee1a0f8 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -40,23 +40,13 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou inverse_mask = torch.ones_like(mask) - mask - source_rgb = source[:, :3, :visible_height, :visible_width] - dest_slice = destination[..., top:bottom, left:right] - - if destination.shape[1] == 4: - if torch.max(dest_slice) == 0: - destination[:, :3, top:bottom, left:right] = source_rgb - destination[:, 3:4, top:bottom, left:right] = mask - else: - destination[:, :3, top:bottom, left:right] = (mask * source_rgb) + (inverse_mask * dest_slice[:, :3]) - destination[:, 3:4, top:bottom, left:right] = torch.max(mask, dest_slice[:, 3:4]) - else: - source_portion = mask * source_rgb - destination_portion = inverse_mask * dest_slice - destination[..., top:bottom, left:right] = source_portion + destination_portion + source_portion = mask * source[..., :visible_height, :visible_width] + destination_portion = inverse_mask * destination[..., top:bottom, left:right] + destination[..., top:bottom, left:right] = source_portion + destination_portion return destination + class LatentCompositeMasked(IO.ComfyNode): @classmethod def define_schema(cls): @@ -95,23 +85,18 @@ class ImageCompositeMasked(IO.ComfyNode): display_name="Image Composite Masked", category="image", inputs=[ + IO.Image.Input("destination"), IO.Image.Input("source"), IO.Int.Input("x", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1), IO.Int.Input("y", default=0, min=0, max=nodes.MAX_RESOLUTION, step=1), IO.Boolean.Input("resize_source", default=False), - IO.Image.Input("destination", optional=True), IO.Mask.Input("mask", optional=True), ], outputs=[IO.Image.Output()], ) @classmethod - def execute(cls, source, x, y, resize_source, destination = None, mask = None) -> IO.NodeOutput: - if destination is None: # transparent rgba - B, H, W, C = source.shape - destination = torch.zeros((B, H, W, 4), dtype=source.dtype, device=source.device) - if C == 3: - source = torch.nn.functional.pad(source, (0, 1), value=1.0) + def execute(cls, destination, source, x, y, resize_source, mask = None) -> IO.NodeOutput: destination, source = node_helpers.image_alpha_fix(destination, source) destination = destination.clone().movedim(-1, 1) output = composite(destination, source.movedim(-1, 1), x, y, mask, 1, resize_source).movedim(1, -1) From cccb697aa3d4f560a45b68d45f12369ca265079e Mon Sep 17 00:00:00 2001 From: angad777 Date: Wed, 13 May 2026 12:41:07 +1000 Subject: [PATCH 03/13] fix: create input directory if missing in LoadAudio define_schema (#13834) --- comfy_extras/nodes_audio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 5f514716f..6382dd618 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -297,6 +297,7 @@ class LoadAudio(IO.ComfyNode): @classmethod def define_schema(cls): input_dir = folder_paths.get_input_directory() + os.makedirs(input_dir, exist_ok=True) files = folder_paths.filter_files_content_types(os.listdir(input_dir), ["audio", "video"]) return IO.Schema( node_id="LoadAudio", From 2bd65f2091f0276e9ff6e18380d452d4f505fc27 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Tue, 12 May 2026 20:55:38 -0700 Subject: [PATCH 04/13] Better Hidream O1 mem usage factor for non dynamic vram. (#13864) --- comfy/supported_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 8d2e02f68..1e4434fd5 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -1443,7 +1443,7 @@ class HiDreamO1(supported_models_base.BASE): } latent_format = latent_formats.HiDreamO1Pixel - memory_usage_factor = 0.6 + memory_usage_factor = 0.033 # fp16 not supported: LM MLP down_proj activations fp16 overflow, causing NaNs supported_inference_dtypes = [torch.bfloat16, torch.float32] From 240363f11e6605f8e864ff0491297d55b9793e91 Mon Sep 17 00:00:00 2001 From: "Daxiong (Lin)" Date: Wed, 13 May 2026 13:33:29 +0800 Subject: [PATCH 05/13] chore: update embedded docs to v0.5.0 (#13865) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c5a6f4cec..86c0a3c72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.43.18 comfyui-workflow-templates==0.9.73 -comfyui-embedded-docs==0.4.4 +comfyui-embedded-docs==0.5.0 torch torchsde torchvision From a5189fed515a96b71cf2b743fb93eaa3d42bc881 Mon Sep 17 00:00:00 2001 From: AustinMroz Date: Tue, 12 May 2026 23:42:31 -0700 Subject: [PATCH 06/13] Add Create Video to the essentials tab (#13863) --- comfy_extras/nodes_video.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comfy_extras/nodes_video.py b/comfy_extras/nodes_video.py index 719acf2f1..78a2a28f8 100644 --- a/comfy_extras/nodes_video.py +++ b/comfy_extras/nodes_video.py @@ -123,6 +123,7 @@ class CreateVideo(io.ComfyNode): search_aliases=["images to video"], display_name="Create Video", category="video", + essentials_category="Video Tools", description="Create a video from images.", inputs=[ io.Image.Input("images", tooltip="The images to create a video from."), From 8505abf52e42f4441d9d53baf4c31a2ec7123400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jukka=20Sepp=C3=A4nen?= <40791699+kijai@users.noreply.github.com> Date: Wed, 13 May 2026 18:33:53 +0300 Subject: [PATCH 07/13] feat: Extend Save3D to save vertex colors and textures (CORE-189) (#13824) Split GLB save logic out of nodes_hunyuan3d.py into a new nodes_save_3d.py, and extend the writer to support UVs, per-vertex colors, and embedded baseColor textures. Extend the MESH type with optional uvs, vertex_colors, and texture fields so meshes can carry texture data through the graph. Add pack_variable_mesh_batch / get_mesh_batch_item helpers and switch VoxelToMesh / VoxelToMeshBasic to use them so batches with differing vertex/face counts no longer fail at torch.stack. --- comfy_api/latest/_util/geometry_types.py | 21 +- comfy_extras/nodes_hunyuan3d.py | 211 +----------- comfy_extras/nodes_save_3d.py | 396 +++++++++++++++++++++++ nodes.py | 1 + 4 files changed, 422 insertions(+), 207 deletions(-) create mode 100644 comfy_extras/nodes_save_3d.py diff --git a/comfy_api/latest/_util/geometry_types.py b/comfy_api/latest/_util/geometry_types.py index b586fceb3..cdde60b10 100644 --- a/comfy_api/latest/_util/geometry_types.py +++ b/comfy_api/latest/_util/geometry_types.py @@ -12,9 +12,24 @@ class VOXEL: class MESH: - def __init__(self, vertices: torch.Tensor, faces: torch.Tensor): - self.vertices = vertices - self.faces = faces + def __init__(self, vertices: torch.Tensor, faces: torch.Tensor, + uvs: torch.Tensor | None = None, + vertex_colors: torch.Tensor | None = None, + texture: torch.Tensor | None = None, + vertex_counts: torch.Tensor | None = None, + face_counts: torch.Tensor | None = None): + + assert (vertex_counts is None) == (face_counts is None), \ + "vertex_counts and face_counts must be provided together (both or neither)" + self.vertices = vertices # vertices: (B, N, 3) + self.faces = faces # faces: (B, M, 3) + self.uvs = uvs # uvs: (B, N, 2) + self.vertex_colors = vertex_colors # vertex_colors: (B, N, 3 or 4) + self.texture = texture # texture: (B, H, W, 3) + # When vertices/faces are zero-padded to a common N/M across the batch (variable-size mesh batch), + # these hold the real per-item lengths (B,). None means rows are uniform and no slicing is needed. + self.vertex_counts = vertex_counts + self.face_counts = face_counts class File3D: diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index bf18ecb88..403eb855b 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -1,12 +1,7 @@ import torch -import os -import json -import struct -import numpy as np from comfy.ldm.modules.diffusionmodules.mmdit import get_1d_sincos_pos_embed_from_grid_torch -import folder_paths import comfy.model_management -from comfy.cli_args import args +from comfy_extras.nodes_save_3d import pack_variable_mesh_batch from typing_extensions import override from comfy_api.latest import ComfyExtension, IO, Types from comfy_api.latest._util import MESH, VOXEL # only for backward compatibility if someone import it from this file (will be removed later) # noqa @@ -444,7 +439,9 @@ class VoxelToMeshBasic(IO.ComfyNode): vertices.append(v) faces.append(f) - return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces): + return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces)) decode = execute # TODO: remove @@ -481,206 +478,13 @@ class VoxelToMesh(IO.ComfyNode): vertices.append(v) faces.append(f) - return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + if vertices and all(v.shape == vertices[0].shape for v in vertices) and all(f.shape == faces[0].shape for f in faces): + return IO.NodeOutput(Types.MESH(torch.stack(vertices), torch.stack(faces))) + return IO.NodeOutput(pack_variable_mesh_batch(vertices, faces)) decode = execute # TODO: remove -def save_glb(vertices, faces, filepath, metadata=None): - """ - Save PyTorch tensor vertices and faces as a GLB file without external dependencies. - - Parameters: - vertices: torch.Tensor of shape (N, 3) - The vertex coordinates - faces: torch.Tensor of shape (M, 3) - The face indices (triangle faces) - filepath: str - Output filepath (should end with .glb) - """ - - # Convert tensors to numpy arrays - vertices_np = vertices.cpu().numpy().astype(np.float32) - faces_np = faces.cpu().numpy().astype(np.uint32) - - vertices_buffer = vertices_np.tobytes() - indices_buffer = faces_np.tobytes() - - def pad_to_4_bytes(buffer): - padding_length = (4 - (len(buffer) % 4)) % 4 - return buffer + b'\x00' * padding_length - - vertices_buffer_padded = pad_to_4_bytes(vertices_buffer) - indices_buffer_padded = pad_to_4_bytes(indices_buffer) - - buffer_data = vertices_buffer_padded + indices_buffer_padded - - vertices_byte_length = len(vertices_buffer) - vertices_byte_offset = 0 - indices_byte_length = len(indices_buffer) - indices_byte_offset = len(vertices_buffer_padded) - - gltf = { - "asset": {"version": "2.0", "generator": "ComfyUI"}, - "buffers": [ - { - "byteLength": len(buffer_data) - } - ], - "bufferViews": [ - { - "buffer": 0, - "byteOffset": vertices_byte_offset, - "byteLength": vertices_byte_length, - "target": 34962 # ARRAY_BUFFER - }, - { - "buffer": 0, - "byteOffset": indices_byte_offset, - "byteLength": indices_byte_length, - "target": 34963 # ELEMENT_ARRAY_BUFFER - } - ], - "accessors": [ - { - "bufferView": 0, - "byteOffset": 0, - "componentType": 5126, # FLOAT - "count": len(vertices_np), - "type": "VEC3", - "max": vertices_np.max(axis=0).tolist(), - "min": vertices_np.min(axis=0).tolist() - }, - { - "bufferView": 1, - "byteOffset": 0, - "componentType": 5125, # UNSIGNED_INT - "count": faces_np.size, - "type": "SCALAR" - } - ], - "meshes": [ - { - "primitives": [ - { - "attributes": { - "POSITION": 0 - }, - "indices": 1, - "mode": 4 # TRIANGLES - } - ] - } - ], - "nodes": [ - { - "mesh": 0 - } - ], - "scenes": [ - { - "nodes": [0] - } - ], - "scene": 0 - } - - if metadata is not None: - gltf["asset"]["extras"] = metadata - - # Convert the JSON to bytes - gltf_json = json.dumps(gltf).encode('utf8') - - def pad_json_to_4_bytes(buffer): - padding_length = (4 - (len(buffer) % 4)) % 4 - return buffer + b' ' * padding_length - - gltf_json_padded = pad_json_to_4_bytes(gltf_json) - - # Create the GLB header - # Magic glTF - glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data)) - - # Create JSON chunk header (chunk type 0) - json_chunk_header = struct.pack('