diff --git a/comfy/bg_removal_model.py b/comfy/bg_removal_model.py index 6dec65e63..c772c5f6a 100644 --- a/comfy/bg_removal_model.py +++ b/comfy/bg_removal_model.py @@ -55,12 +55,7 @@ class BackgroundRemovalModel(): out = torch.nn.functional.interpolate(out, size=(H, W), mode="bicubic", antialias=False) mask = out.sigmoid().to(device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype()) - if mask.ndim == 3: - mask = mask.unsqueeze(0) - if mask.shape[1] != 1: - mask = mask.movedim(-1, 1) - - return mask + return mask.squeeze(1) # (B, 1, H, W) -> (B, H, W) def load_background_removal_model(sd): diff --git a/comfy/cli_args.py b/comfy/cli_args.py index 9bda414d1..a4cabcc65 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -149,6 +149,7 @@ parser.add_argument("--async-offload", nargs='?', const=2, type=int, default=Non parser.add_argument("--disable-async-offload", action="store_true", help="Disable async weight offloading.") parser.add_argument("--disable-dynamic-vram", action="store_true", help="Disable dynamic VRAM and use estimate based model loading.") parser.add_argument("--enable-dynamic-vram", action="store_true", help="Enable dynamic VRAM on systems where it's not enabled by default.") +parser.add_argument("--fast-disk", action="store_true", help="Prefer disk-backed dynamic loading and offload over unpinned RAM. Can be faster for users with fast NVME disks.") parser.add_argument("--force-non-blocking", action="store_true", help="Force ComfyUI to use non-blocking operations for all applicable tensors. This may improve performance on some non-Nvidia systems but can cause issues with some workflows.") diff --git a/comfy/ldm/cosmos/predict2.py b/comfy/ldm/cosmos/predict2.py index 2268bff38..671fe834d 100644 --- a/comfy/ldm/cosmos/predict2.py +++ b/comfy/ldm/cosmos/predict2.py @@ -14,15 +14,7 @@ from torchvision import transforms import comfy.patcher_extension from comfy.ldm.modules.attention import optimized_attention import comfy.ldm.common_dit - -def apply_rotary_pos_emb( - t: torch.Tensor, - freqs: torch.Tensor, -) -> torch.Tensor: - t_ = t.reshape(*t.shape[:-1], 2, -1).movedim(-2, -1).unsqueeze(-2).float() - t_out = freqs[..., 0] * t_[..., 0] + freqs[..., 1] * t_[..., 1] - t_out = t_out.movedim(-1, -2).reshape(*t.shape).type_as(t) - return t_out +import comfy.quant_ops # ---------------------- Feed Forward Network ----------------------- @@ -173,8 +165,7 @@ class Attention(nn.Module): k = self.k_norm(k) v = self.v_norm(v) if self.is_selfattn and rope_emb is not None: # only apply to self-attention! - q = apply_rotary_pos_emb(q, rope_emb) - k = apply_rotary_pos_emb(k, rope_emb) + q, k = comfy.quant_ops.ck.apply_rope_split_half(q, k, rope_emb) return q, k, v q, k, v = apply_norm_and_rotary_pos_emb(q, k, v, rope_emb) diff --git a/comfy/ldm/ernie/model.py b/comfy/ldm/ernie/model.py index eba661aec..f158ca1d2 100644 --- a/comfy/ldm/ernie/model.py +++ b/comfy/ldm/ernie/model.py @@ -5,6 +5,7 @@ import torch.nn.functional as F from comfy.ldm.modules.attention import optimized_attention import comfy.model_management +import comfy.quant_ops def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: assert dim % 2 == 0 @@ -19,15 +20,6 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor: out = torch.stack([torch.cos(out), torch.sin(out)], dim=0) return out.to(dtype=torch.float32, device=pos.device) -def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor: - rot_dim = freqs_cis.shape[-1] - x, x_pass = x_in[..., :rot_dim], x_in[..., rot_dim:] - cos_ = freqs_cis[0] - sin_ = freqs_cis[1] - x1, x2 = x.chunk(2, dim=-1) - x_rotated = torch.cat((-x2, x1), dim=-1) - return torch.cat((x * cos_ + x_rotated * sin_, x_pass), dim=-1) - class ErnieImageEmbedND3(nn.Module): def __init__(self, dim: int, theta: int, axes_dim: tuple): super().__init__() @@ -37,8 +29,16 @@ class ErnieImageEmbedND3(nn.Module): def forward(self, ids: torch.Tensor) -> torch.Tensor: emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1) - emb = emb.unsqueeze(3) # [2, B, S, 1, head_dim//2] - return torch.stack([emb, emb], dim=-1).reshape(*emb.shape[:-1], -1) # [B, S, 1, head_dim] + cos_ = emb[0] + sin_ = emb[1] + N = cos_.shape[-1] + half = N // 2 + cos_top = cos_[..., :half].repeat_interleave(2, dim=-1) + sin_top = sin_[..., :half].repeat_interleave(2, dim=-1) + cos_bot = cos_[..., half:].repeat_interleave(2, dim=-1) + sin_bot = sin_[..., half:].repeat_interleave(2, dim=-1) + rot = torch.stack([cos_top, -sin_top, sin_bot, cos_bot], dim=-1) + return rot.reshape(*rot.shape[:-1], 2, 2).unsqueeze(2) class ErnieImagePatchEmbedDynamic(nn.Module): def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None): @@ -115,8 +115,7 @@ class ErnieImageAttention(nn.Module): key = self.norm_k(key) if image_rotary_emb is not None: - query = apply_rotary_emb(query, image_rotary_emb) - key = apply_rotary_emb(key, image_rotary_emb) + query, key = comfy.quant_ops.ck.apply_rope_split_half(query, key, image_rotary_emb) q_flat = query.reshape(B, S, -1) k_flat = key.reshape(B, S, -1) @@ -274,7 +273,7 @@ class ErnieImageModel(nn.Module): image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1) - rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype) + rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)) del image_ids, text_ids sample = self.time_proj(timesteps).to(dtype) diff --git a/comfy/ldm/qwen_image/model.py b/comfy/ldm/qwen_image/model.py index 0862f72f7..3462d8108 100644 --- a/comfy/ldm/qwen_image/model.py +++ b/comfy/ldm/qwen_image/model.py @@ -51,15 +51,6 @@ class FeedForward(nn.Module): return hidden_states -def apply_rotary_emb(x, freqs_cis): - if x.shape[1] == 0: - return x - - t_ = x.reshape(*x.shape[:-1], -1, 1, 2) - t_out = freqs_cis[..., 0] * t_[..., 0] + freqs_cis[..., 1] * t_[..., 1] - return t_out.reshape(*x.shape) - - class QwenTimestepProjEmbeddings(nn.Module): def __init__(self, embedding_dim, pooled_projection_dim, use_additional_t_cond=False, dtype=None, device=None, operations=None): super().__init__() diff --git a/comfy/memory_management.py b/comfy/memory_management.py index 962addb27..e032b7dcd 100644 --- a/comfy/memory_management.py +++ b/comfy/memory_management.py @@ -4,6 +4,7 @@ import dataclasses import torch from typing import NamedTuple +import comfy_aimdo.host_buffer from comfy.quant_ops import QuantizedTensor @@ -17,21 +18,18 @@ class TensorFileSlice(NamedTuple): def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=None): if isinstance(tensor, QuantizedTensor): - if not isinstance(destination, QuantizedTensor): - return False - if tensor._layout_cls != destination._layout_cls: - return False - - if not read_tensor_file_slice_into(tensor._qdata, destination._qdata, stream=stream, + if not read_tensor_file_slice_into(tensor._qdata, + destination._qdata if destination is not None else None, stream=stream, destination2=(destination2._qdata if destination2 is not None else None)): return False - dst_orig_dtype = destination._params.orig_dtype - destination._params.copy_from(tensor._params, non_blocking=False) - destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype) + if destination is not None: + dst_orig_dtype = destination._params.orig_dtype + destination._params.copy_from(tensor._params, non_blocking=False) + destination._params = dataclasses.replace(destination._params, orig_dtype=dst_orig_dtype) if destination2 is not None: dst_orig_dtype = destination2._params.orig_dtype - destination2._params.copy_from(destination._params, non_blocking=True) + destination2._params.copy_from(destination._params if destination is not None else tensor._params, non_blocking=True) destination2._params = dataclasses.replace(destination2._params, orig_dtype=dst_orig_dtype) return True @@ -39,10 +37,15 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N if info is None: return False + if destination is not None and destination.device.type != "cpu" and destination2 is None: + destination2 = destination + destination = None + file_obj = info.file_ref - if (destination.device.type != "cpu" - or file_obj is None - or destination.numel() * destination.element_size() < info.size + if (file_obj is None + or (destination is None and destination2 is None) + or (destination is not None and (destination.device.type != "cpu" or destination.numel() * destination.element_size() < info.size)) + or (destination2 is not None and (destination2.device.type == "cpu" or destination2.numel() * destination2.element_size() < info.size)) or tensor.numel() * tensor.element_size() != info.size or tensor.storage_offset() != 0 or not tensor.is_contiguous()): @@ -51,6 +54,14 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N if info.size == 0: return True + if destination is None: + stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0 + comfy_aimdo.host_buffer.read_file_to_device(file_obj, info.offset, info.size, + stream_ptr, destination2.data_ptr(), + destination2.device.index, + mark_cold=False) + return True + hostbuf = getattr(destination.untyped_storage(), "_comfy_hostbuf", None) if hostbuf is not None: stream_ptr = getattr(stream, "cuda_stream", 0) if stream is not None else 0 @@ -63,6 +74,9 @@ def read_tensor_file_slice_into(tensor, destination, stream=None, destination2=N device=None if destination2 is None else destination2.device.index) return True + if not hasattr(file_obj, "seek") or not hasattr(file_obj, "readinto"): + return False + buf_type = ctypes.c_ubyte * info.size view = memoryview(buf_type.from_address(destination.data_ptr())) diff --git a/comfy/model_management.py b/comfy/model_management.py index b01c4d7fa..dfd58bf1b 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -641,14 +641,17 @@ def free_pins(size, evict_active=False): return freed_total def ensure_pin_budget(size, evict_active=False): - shortfall = size + comfy.memory_management.RAM_CACHE_HEADROOM / 2 - psutil.virtual_memory().available + if args.fast_disk: + shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY + else: + shortfall = size + max(comfy.memory_management.RAM_CACHE_HEADROOM / 2, 2048 * 1024 ** 2) - psutil.virtual_memory().available if shortfall <= 0: return True to_free = shortfall + PIN_PRESSURE_HYSTERESIS return free_pins(to_free, evict_active=evict_active) >= shortfall -def ensure_pin_registerable(size, evict_active=False): +def ensure_pin_registerable(size, evict_active=True): shortfall = TOTAL_PINNED_MEMORY + size - MAX_PINNED_MEMORY if MAX_PINNED_MEMORY <= 0: return False @@ -658,10 +661,17 @@ def ensure_pin_registerable(size, evict_active=False): shortfall += REGISTERABLE_PIN_HYSTERESIS for loaded_model in reversed(current_loaded_models): model = loaded_model.model - if model is not None and model.is_dynamic() and (evict_active or not model.model.dynamic_pins[model.load_device]["active"]): + if model is not None and model.is_dynamic() and not model.model.dynamic_pins[model.load_device]["active"]: shortfall -= model.unregister_inactive_pins(shortfall) if shortfall <= 0: return True + if evict_active: + for loaded_model in current_loaded_models: + model = loaded_model.model + if model is not None and model.is_dynamic() and model.model.dynamic_pins[model.load_device]["active"]: + shortfall -= model.unregister_inactive_pins(shortfall) + if shortfall <= 0: + return True return shortfall <= REGISTERABLE_PIN_HYSTERESIS class LoadedModel: @@ -803,9 +813,9 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins for x in can_unload_sorted: i = x[-1] memory_to_free = 1e32 - if current_loaded_models[i].model.is_dynamic() and (not DISABLE_SMART_MEMORY or device is None): + if not DISABLE_SMART_MEMORY or device is None: memory_to_free = 0 if device is None else memory_required - get_free_memory(device) - if for_dynamic: + if current_loaded_models[i].model.is_dynamic() and for_dynamic: #don't actually unload dynamic models for the sake of other dynamic models #as that works on-demand. memory_required -= current_loaded_models[i].model.loaded_size() @@ -817,6 +827,10 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins for i in sorted(unloaded_model, reverse=True): unloaded_models.append(current_loaded_models.pop(i)) + if not for_dynamic and pins_required > 0: + ensure_pin_budget(pins_required) + ensure_pin_registerable(pins_required) + if len(unloaded_model) > 0: soft_empty_cache() elif device is not None: @@ -879,15 +893,19 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu model_to_unload.model_finalizer.detach() total_memory_required = {} + total_pins_required = {} for loaded_model in models_to_load: device = loaded_model.device total_memory_required[device] = total_memory_required.get(device, 0) + loaded_model.model_memory_required(device) + if not loaded_model.model.is_dynamic(): + total_pins_required[device] = total_pins_required.get(device, 0) + loaded_model.model_memory() for device in total_memory_required: if device != torch.device("cpu"): free_memory(total_memory_required[device] * 1.1 + extra_mem, device, - for_dynamic=free_for_dynamic) + for_dynamic=free_for_dynamic, + pins_required=total_pins_required.get(device, 0)) for device in total_memory_required: if device != torch.device("cpu"): @@ -1283,7 +1301,6 @@ STREAM_CAST_BUFFERS = {} LARGEST_CASTED_WEIGHT = (None, 0) STREAM_AIMDO_CAST_BUFFERS = {} LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) -STREAM_PIN_BUFFERS = {} DEFAULT_AIMDO_CAST_BUFFER_RESERVATION_SIZE = 16 * 1024 ** 3 @@ -1326,42 +1343,13 @@ def get_aimdo_cast_buffer(offload_stream, device): STREAM_AIMDO_CAST_BUFFERS[offload_stream] = cast_buffer return cast_buffer -def get_pin_buffer(offload_stream): - pin_buffer = STREAM_PIN_BUFFERS.get(offload_stream, None) - if pin_buffer is None: - pin_buffer = comfy_aimdo.host_buffer.HostBuffer(0, 0, pinned_hostbuf_size(8 * 1024**3), mark_cold=False) - STREAM_PIN_BUFFERS[offload_stream] = pin_buffer - elif offload_stream is not None: - event = getattr(pin_buffer, "_comfy_event", None) - if event is not None: - event.synchronize() - delattr(pin_buffer, "_comfy_event") - return pin_buffer - -def resize_pin_buffer(pin_buffer, size): - global TOTAL_PINNED_MEMORY - old_size = pin_buffer.size - if size <= old_size: - return True - growth = size - old_size - comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM) - ensure_pin_budget(growth, evict_active=True) - ensure_pin_registerable(growth, evict_active=True) - try: - pin_buffer.extend(size=size, reallocate=True) - except RuntimeError: - return False - TOTAL_PINNED_MEMORY += pin_buffer.size - old_size - return True - def reset_cast_buffers(): - global TOTAL_PINNED_MEMORY global LARGEST_CASTED_WEIGHT global LARGEST_AIMDO_CASTED_WEIGHT LARGEST_CASTED_WEIGHT = (None, 0) LARGEST_AIMDO_CASTED_WEIGHT = (None, 0) - for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS) | set(STREAM_PIN_BUFFERS): + for offload_stream in set(STREAM_CAST_BUFFERS) | set(STREAM_AIMDO_CAST_BUFFERS): if offload_stream is not None: offload_stream.synchronize() synchronize() @@ -1370,20 +1358,24 @@ def reset_cast_buffers(): mmap_obj.bounce() DIRTY_MMAPS.clear() - for pin_buffer in STREAM_PIN_BUFFERS.values(): - TOTAL_PINNED_MEMORY -= pin_buffer.size - TOTAL_PINNED_MEMORY = max(0, TOTAL_PINNED_MEMORY) - for loaded_model in current_loaded_models: model = loaded_model.model if model is not None and model.is_dynamic(): - model.model.dynamic_pins[model.load_device]["active"] = False + pin_state = model.model.dynamic_pins[model.load_device] + + if pin_state["active"]: + *_, buckets = pin_state["weights"] + for size, bucket in list(buckets.items()): + bucket[:] = [ entry for entry in bucket if entry[-1] is not None ] + if not bucket: + del buckets[size] + + pin_state["active"] = False model.partially_unload_ram(1e30, subsets=[ "patches" ]) - model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0]) + model.model.dynamic_pins[model.load_device]["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, pinned_hostbuf_size(model.model_size())), [], [-1], [0], [0], {}) STREAM_CAST_BUFFERS.clear() STREAM_AIMDO_CAST_BUFFERS.clear() - STREAM_PIN_BUFFERS.clear() soft_empty_cache() def get_offload_stream(device): @@ -1436,7 +1428,7 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None, r2=None): if hasattr(wf_context, "as_context"): wf_context = wf_context.as_context(stream) - dest_views = comfy.memory_management.interpret_gathered_like(tensors, r) + dest_views = comfy.memory_management.interpret_gathered_like(tensors, r) if r is not None else [None] * len(tensors) dest2_views = comfy.memory_management.interpret_gathered_like(tensors, r2) if r2 is not None else None with wf_context: for tensor in tensors: @@ -1448,9 +1440,10 @@ def cast_to_gathered(tensors, r, non_blocking=False, stream=None, r2=None): continue storage = tensor._qdata.untyped_storage() if isinstance(tensor, comfy.quant_ops.QuantizedTensor) else tensor.untyped_storage() mark_mmap_dirty(storage) - dest_view.copy_(tensor, non_blocking=non_blocking) + if dest_view is not None: + dest_view.copy_(tensor, non_blocking=non_blocking) if dest2_view is not None: - dest2_view.copy_(dest_view, non_blocking=non_blocking) + dest2_view.copy_(tensor if dest_view is None else dest_view, non_blocking=non_blocking) def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, stream=None, r=None): @@ -1723,6 +1716,13 @@ def is_device_xpu(device): def is_device_cuda(device): return is_device_type(device, 'cuda') +def set_torch_device(device): + """Set the current device for the given torch device. Supports CUDA and XPU.""" + if is_device_cuda(device): + torch.cuda.set_device(device) + elif is_device_xpu(device): + torch.xpu.set_device(device) + def is_directml_enabled(): global directml_enabled if directml_enabled: diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 00a15fa63..b716a69e2 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -1721,8 +1721,8 @@ class ModelPatcherDynamic(ModelPatcher): """ if device not in self.model.dynamic_pins: self.model.dynamic_pins[device] = { - "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), - "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0]), + "weights": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0], [0], {}), + "patches": (comfy_aimdo.host_buffer.HostBuffer(0, 0, 0), [], [-1], [0], [0], {}), "hostbufs_initialized": False, "failed": False, "active": False, @@ -1799,8 +1799,8 @@ class ModelPatcherDynamic(ModelPatcher): pin_state = self.model.dynamic_pins[self.load_device] if not pin_state["hostbufs_initialized"]: hostbuf_size = comfy.model_management.pinned_hostbuf_size(self.model_size()) - pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0]) - pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0]) + pin_state["weights"] = (comfy_aimdo.host_buffer.HostBuffer(0, 64 * 1024 * 1024, hostbuf_size), [], [-1], [0], [0], {}) + pin_state["patches"] = (comfy_aimdo.host_buffer.HostBuffer(0, 8 * 1024 * 1024, hostbuf_size), [], [-1], [0], [0], {}) pin_state["hostbufs_initialized"] = True pin_state["failed"] = False pin_state["active"] = True @@ -1942,18 +1942,16 @@ class ModelPatcherDynamic(ModelPatcher): return freed def loaded_ram_size(self): - return (self.model.dynamic_pins[self.load_device]["weights"][0].size + - self.model.dynamic_pins[self.load_device]["patches"][0].size) + return (self.model.dynamic_pins[self.load_device]["weights"][0].size) def pinned_memory_size(self): - return (self.model.dynamic_pins[self.load_device]["weights"][3][0] + - self.model.dynamic_pins[self.load_device]["patches"][3][0]) + return (self.model.dynamic_pins[self.load_device]["weights"][3][0]) def unregister_inactive_pins(self, ram_to_unload, subsets=[ "weights", "patches" ]): freed = 0 pin_state = self.model.dynamic_pins[self.load_device] for subset in subsets: - hostbuf, stack, stack_split, pinned_size = pin_state[subset] + hostbuf, stack, stack_split, pinned_size, *_ = pin_state[subset] split = stack_split[0] while split >= 0: module, offset = stack[split] @@ -1978,10 +1976,12 @@ class ModelPatcherDynamic(ModelPatcher): freed = 0 pin_state = self.model.dynamic_pins[self.load_device] for subset in subsets: - hostbuf, stack, stack_split, pinned_size = pin_state[subset] + hostbuf, stack, stack_split, pinned_size, *_ = pin_state[subset] while len(stack) > 0: module, offset = stack.pop() size = module._pin.numel() * module._pin.element_size() + module._pin_balancer_entry[-1] = None + del module._pin_balancer_entry del module._pin hostbuf.truncate(offset, do_unregister=module._pin_registered) stack_split[0] = min(stack_split[0], len(stack) - 1) diff --git a/comfy/model_prefetch.py b/comfy/model_prefetch.py index 72e11dec6..aa6d22d77 100644 --- a/comfy/model_prefetch.py +++ b/comfy/model_prefetch.py @@ -1,4 +1,5 @@ import comfy_aimdo.model_vbar +import comfy.memory_management import comfy.model_management import comfy.ops @@ -50,7 +51,17 @@ def prefetch_queue_pop(queue, device, module): if hasattr(s, "_v"): comfy_modules.append(s) + registerable_size = 0 + for s in comfy_modules: + registerable_size += comfy.memory_management.vram_aligned_size([s.weight, s.bias]) + for param_key in ("weight", "bias"): + lowvram_fn = getattr(s, param_key + "_lowvram_function", None) + if lowvram_fn is not None: + registerable_size += lowvram_fn.memory_required() + offload_stream = comfy.ops.cast_modules_with_vbar(comfy_modules, None, device, None, True) + if not comfy.model_management.args.fast_disk: + comfy.model_management.ensure_pin_registerable(registerable_size) comfy.model_management.sync_stream(device, offload_stream) queue[0] = (offload_stream, (prefetch, comfy_modules)) diff --git a/comfy/multigpu.py b/comfy/multigpu.py index e7f5b3d6f..bb9d334d3 100644 --- a/comfy/multigpu.py +++ b/comfy/multigpu.py @@ -17,7 +17,7 @@ class MultiGPUThreadPool: """Persistent thread pool for multi-GPU work distribution. Maintains one worker thread per extra GPU device. Each thread calls - torch.cuda.set_device() once at startup so that compiled kernel caches + set_torch_device() once at startup so that compiled kernel caches (inductor/triton) stay warm across diffusion steps. """ @@ -37,7 +37,7 @@ class MultiGPUThreadPool: def _worker_loop(self, device: torch.device, work_q: queue.Queue, result_q: queue.Queue): try: - torch.cuda.set_device(device) + comfy.model_management.set_torch_device(device) except Exception as e: logging.error(f"MultiGPUThreadPool: failed to set device {device}: {e}") while True: diff --git a/comfy/ops.py b/comfy/ops.py index 56445be8d..119177c37 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -76,8 +76,6 @@ except: cast_to = comfy.model_management.cast_to #TODO: remove once no more references -STREAM_PIN_BUFFER_HEADROOM = 8 * 1024 * 1024 - def cast_to_input(weight, input, non_blocking=False, copy=True): return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) @@ -94,9 +92,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin offload_stream = None cast_buffer = None cast_buffer_offset = 0 - stream_pin_hostbuf = None - stream_pin_offset = 0 - stream_pin_queue = [] def ensure_offload_stream(module, required_size, check_largest): nonlocal offload_stream @@ -130,22 +125,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin cast_buffer_offset += buffer_size return buffer - def get_stream_pin_buffer_offset(buffer_size): - nonlocal stream_pin_hostbuf - nonlocal stream_pin_offset - - if buffer_size == 0 or offload_stream is None: - return None - - if stream_pin_hostbuf is None: - stream_pin_hostbuf = comfy.model_management.get_pin_buffer(offload_stream) - if stream_pin_hostbuf is None: - return None - - offset = stream_pin_offset - stream_pin_offset += buffer_size - return offset - for s in comfy_modules: signature = comfy_aimdo.model_vbar.vbar_fault(s._v) resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature) @@ -184,12 +163,18 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin if xfer_dest is None: xfer_dest = get_cast_buffer(dest_size) - def cast_maybe_lowvram_patch(xfer_source, xfer_dest, stream): + def cast_maybe_lowvram_patch(xfer_source, xfer_dest, stream, xfer_dest2=None): if xfer_source is not None: if getattr(xfer_source, "is_lowvram_patch", False): - xfer_source.prepare(xfer_dest, stream, copy=True, commit=False) - else: - comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=stream) + if xfer_dest is not None: + xfer_source.prepare(xfer_dest, stream, copy=True, commit=False) + xfer_source = [ xfer_dest ] + xfer_dest = xfer_dest2 + xfer_dest2 = None + elif xfer_dest2 is not None: + xfer_source.prepare(xfer_dest2, stream, copy=True, commit=False) + return + comfy.model_management.cast_to_gathered(xfer_source, xfer_dest, non_blocking=non_blocking, stream=stream, r2=xfer_dest2) def handle_pin(m, pin, source, dest, subset="weights", size=None): if pin is not None: @@ -198,19 +183,7 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin if signature is None: comfy.pinned_memory.pin_memory(m, subset=subset, size=size) pin = comfy.pinned_memory.get_pin(m, subset=subset) - if pin is not None: - if isinstance(source, list): - comfy.model_management.cast_to_gathered(source, pin, non_blocking=non_blocking, stream=offload_stream, r2=dest) - else: - cast_maybe_lowvram_patch(source, pin, None) - cast_maybe_lowvram_patch([ pin ], dest, offload_stream) - return - if pin is None: - pin_offset = get_stream_pin_buffer_offset(size) - if pin_offset is not None: - stream_pin_queue.append((source, pin_offset, size, dest)) - return - cast_maybe_lowvram_patch(source, dest, offload_stream) + cast_maybe_lowvram_patch(source, pin, offload_stream, xfer_dest2=dest) handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size) @@ -232,23 +205,6 @@ def cast_modules_with_vbar(comfy_modules, dtype, device, bias_dtype, non_blockin prefetch["needs_cast"] = needs_cast s._prefetch = prefetch - if stream_pin_offset > 0: - if stream_pin_hostbuf.size < stream_pin_offset: - if not comfy.model_management.resize_pin_buffer(stream_pin_hostbuf, stream_pin_offset + STREAM_PIN_BUFFER_HEADROOM): - for xfer_source, _, _, xfer_dest in stream_pin_queue: - cast_maybe_lowvram_patch(xfer_source, xfer_dest, offload_stream) - return offload_stream - stream_pin_tensor = comfy_aimdo.torch.hostbuf_to_tensor(stream_pin_hostbuf) - stream_pin_tensor.untyped_storage()._comfy_hostbuf = stream_pin_hostbuf - for xfer_source, pin_offset, pin_size, xfer_dest in stream_pin_queue: - pin = stream_pin_tensor[pin_offset:pin_offset + pin_size] - if isinstance(xfer_source, list): - comfy.model_management.cast_to_gathered(xfer_source, pin, non_blocking=non_blocking, stream=offload_stream, r2=xfer_dest) - else: - cast_maybe_lowvram_patch(xfer_source, pin, None) - comfy.model_management.cast_to_gathered([ pin ], xfer_dest, non_blocking=non_blocking, stream=offload_stream) - stream_pin_hostbuf._comfy_event = offload_stream.record_event() - return offload_stream diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py index 0e8f573ba..ffe12e0dc 100644 --- a/comfy/pinned_memory.py +++ b/comfy/pinned_memory.py @@ -1,17 +1,55 @@ +import bisect + import comfy.model_management import comfy.memory_management +import comfy.utils import comfy_aimdo.host_buffer import comfy_aimdo.torch import torch from comfy.cli_args import args +def _add_to_bucket(module, buckets, size, priority): + bucket = buckets.setdefault(size, []) + entry = [-priority, 0, module] + entry[1] = id(entry) + bisect.insort(bucket, entry) + module._pin_balancer_entry = entry + +def _steal_pin(module, stack, buckets, size, priority): + bucket = buckets.get(size) + if bucket is None: + return False + + while bucket and bucket[-1][-1] is None: + bucket.pop() + if not bucket: + del buckets[size] + return False + + if priority <= -bucket[-1][0]: + return False + + *_, victim = bucket.pop() + module._pin = victim._pin + module._pin_registered = victim._pin_registered + module._pin_stack_index = victim._pin_stack_index + stack[module._pin_stack_index] = (module, stack[module._pin_stack_index][1]) + + victim._pin_registered = False + del victim._pin + del victim._pin_stack_index + del victim._pin_balancer_entry + + _add_to_bucket(module, buckets, size, priority) + return True + def get_pin(module, subset="weights"): pin = getattr(module, "_pin", None) if pin is None or module._pin_registered or args.disable_pinned_memory: return pin - _, _, stack_split, pinned_size = module._pin_state[subset] + _, _, stack_split, pinned_size, *_ = module._pin_state[subset] size = pin.nbytes comfy.model_management.ensure_pin_registerable(size) @@ -31,26 +69,30 @@ def pin_memory(module, subset="weights", size=None): return pin = get_pin(module, subset) - if pin is not None or pin_state["failed"]: + if pin is not None: return - hostbuf, stack, stack_split, pinned_size = pin_state[subset] + hostbuf, stack, stack_split, pinned_size, counter, buckets = pin_state[subset] if size is None: size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ]) offset = hostbuf.size - registerable_size = size + max(0, hostbuf.size - pinned_size[0]) + registerable_size = size + priority = getattr(module, "_pin_balancer_priority", None) + + if priority is None: + priority = comfy.utils.bit_reverse_range(counter[0], 16) + counter[0] += 1 + module._pin_balancer_priority = priority comfy.memory_management.extra_ram_release(comfy.memory_management.RAM_CACHE_HEADROOM) if (not comfy.model_management.ensure_pin_budget(size) or not comfy.model_management.ensure_pin_registerable(registerable_size)): - pin_state["failed"] = True - return False + return _steal_pin(module, stack, buckets, size, priority) try: hostbuf.extend(size=size) except RuntimeError: - pin_state["failed"] = True - return False + return _steal_pin(module, stack, buckets, size, priority) module._pin = comfy_aimdo.torch.hostbuf_to_tensor(hostbuf)[offset:offset + size] module._pin.untyped_storage()._comfy_hostbuf = hostbuf @@ -60,4 +102,5 @@ def pin_memory(module, subset="weights", size=None): stack_split[0] = max(stack_split[0], module._pin_stack_index) comfy.model_management.TOTAL_PINNED_MEMORY += size pinned_size[0] += size + _add_to_bucket(module, buckets, size, priority) return True diff --git a/comfy/samplers.py b/comfy/samplers.py index e31277f7b..25c5a855f 100755 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -464,10 +464,7 @@ def _calc_cond_batch_multigpu(model: BaseModel, conds: list[list[dict]], x_in: t def _handle_batch(device: torch.device, batch_tuple: tuple[comfy.hooks.HookGroup, tuple], results: list[thread_result]): try: - # TODO: non-NVIDIA support -- guard with `if device.type == "cuda":` once - # we extend multigpu QA beyond CUDA. Unconditional call crashes on - # XPU/NPU/MPS/CPU/DirectML backends. - torch.cuda.set_device(device) + comfy.model_management.set_torch_device(device) model_current: BaseModel = model_options["multigpu_clones"][device].model # run every hooked_to_run separately with torch.no_grad(): diff --git a/comfy/utils.py b/comfy/utils.py index 430923389..e3a4ed7be 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -85,9 +85,9 @@ _TYPES = { def load_safetensors(ckpt): import comfy_aimdo.model_mmap - f = open(ckpt, "rb", buffering=0) file_lock = threading.Lock() model_mmap = comfy_aimdo.model_mmap.ModelMMAP(ckpt) + f = model_mmap.get_file_handle() file_size = os.path.getsize(ckpt) mv = memoryview((ctypes.c_uint8 * file_size).from_address(model_mmap.get())) @@ -1463,3 +1463,10 @@ def deepcopy_list_dict(obj, memo=None): memo[obj_id] = res return res + +def bit_reverse_range(index, bits): + result = 0 + for _ in range(bits): + result = (result << 1) | (index & 1) + index >>= 1 + return result diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index 19d8176b0..e03bafcde 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -727,6 +727,30 @@ class File3DUSDZ(ComfyTypeIO): Type = File3D +@comfytype(io_type="FILE_3D_PLY") +class File3DPLY(ComfyTypeIO): + """PLY format 3D file - point cloud or Gaussian splat.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_SPLAT") +class File3DSPLAT(ComfyTypeIO): + """SPLAT format 3D file - 3D Gaussian splat.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_SPZ") +class File3DSPZ(ComfyTypeIO): + """SPZ format 3D file - compressed 3D Gaussian splat.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_KSPLAT") +class File3DKSPLAT(ComfyTypeIO): + """KSPLAT format 3D file - 3D Gaussian splat.""" + Type = File3D + + @comfytype(io_type="HOOKS") class Hooks(ComfyTypeIO): if TYPE_CHECKING: @@ -2303,6 +2327,10 @@ __all__ = [ "File3DOBJ", "File3DSTL", "File3DUSDZ", + "File3DPLY", + "File3DSPLAT", + "File3DSPZ", + "File3DKSPLAT", "Hooks", "HookKeyframes", "TimestepsRange", diff --git a/comfy_api/latest/_ui.py b/comfy_api/latest/_ui.py index e238cdf3c..6592f6b1d 100644 --- a/comfy_api/latest/_ui.py +++ b/comfy_api/latest/_ui.py @@ -452,6 +452,16 @@ class PreviewUI3D(_UIOutput): return {"result": [self.model_file, self.camera_info, self.bg_image_path]} +class PreviewUI3DAdvanced(_UIOutput): + def __init__(self, model_file, camera_info, model_3d_info): + self.model_file = model_file + self.camera_info = camera_info + self.model_3d_info = model_3d_info + + def as_dict(self): + return {"result": [self.model_file, self.camera_info, self.model_3d_info]} + + class PreviewText(_UIOutput): def __init__(self, value: str, **kwargs): self.value = value @@ -471,5 +481,6 @@ __all__ = [ "PreviewAudio", "PreviewVideo", "PreviewUI3D", + "PreviewUI3DAdvanced", "PreviewText", ] diff --git a/comfy_api_nodes/apis/tripo.py b/comfy_api_nodes/apis/tripo.py index bce6b0e89..7ac81d42c 100644 --- a/comfy_api_nodes/apis/tripo.py +++ b/comfy_api_nodes/apis/tripo.py @@ -1,25 +1,25 @@ from enum import Enum -from typing import Optional, Any +from typing import Any from pydantic import BaseModel, Field, RootModel class TripoModelVersion(str, Enum): - v3_1_20260211 = 'v3.1-20260211' - v3_0_20250812 = 'v3.0-20250812' - v2_5_20250123 = 'v2.5-20250123' - v2_0_20240919 = 'v2.0-20240919' - v1_4_20240625 = 'v1.4-20240625' + v3_1_20260211 = "v3.1-20260211" + v3_0_20250812 = "v3.0-20250812" + v2_5_20250123 = "v2.5-20250123" + v2_0_20240919 = "v2.0-20240919" + v1_4_20240625 = "v1.4-20240625" class TripoGeometryQuality(str, Enum): - standard = 'standard' - detailed = 'detailed' + standard = "standard" + detailed = "detailed" class TripoTextureQuality(str, Enum): - standard = 'standard' - detailed = 'detailed' + standard = "standard" + detailed = "detailed" class TripoStyle(str, Enum): @@ -33,6 +33,7 @@ class TripoStyle(str, Enum): ANCIENT_BRONZE = "ancient_bronze" NONE = "None" + class TripoTaskType(str, Enum): TEXT_TO_MODEL = "text_to_model" IMAGE_TO_MODEL = "image_to_model" @@ -45,26 +46,27 @@ class TripoTaskType(str, Enum): STYLIZE_MODEL = "stylize_model" CONVERT_MODEL = "convert_model" + class TripoTextureAlignment(str, Enum): ORIGINAL_IMAGE = "original_image" GEOMETRY = "geometry" + class TripoOrientation(str, Enum): ALIGN_IMAGE = "align_image" DEFAULT = "default" + class TripoOutFormat(str, Enum): GLB = "glb" FBX = "fbx" -class TripoTopology(str, Enum): - BIP = "bip" - QUAD = "quad" class TripoSpec(str, Enum): MIXAMO = "mixamo" TRIPO = "tripo" + class TripoAnimation(str, Enum): IDLE = "preset:idle" WALK = "preset:walk" @@ -83,11 +85,6 @@ class TripoAnimation(str, Enum): SERPENTINE_MARCH = "preset:serpentine:march" AQUATIC_MARCH = "preset:aquatic:march" -class TripoStylizeStyle(str, Enum): - LEGO = "lego" - VOXEL = "voxel" - VORONOI = "voronoi" - MINECRAFT = "minecraft" class TripoConvertFormat(str, Enum): GLTF = "GLTF" @@ -97,6 +94,7 @@ class TripoConvertFormat(str, Enum): STL = "STL" _3MF = "3MF" + class TripoTextureFormat(str, Enum): BMP = "BMP" DPX = "DPX" @@ -108,6 +106,7 @@ class TripoTextureFormat(str, Enum): TIFF = "TIFF" WEBP = "WEBP" + class TripoTaskStatus(str, Enum): QUEUED = "queued" RUNNING = "running" @@ -118,183 +117,223 @@ class TripoTaskStatus(str, Enum): BANNED = "banned" EXPIRED = "expired" + class TripoFbxPreset(str, Enum): BLENDER = "blender" MIXAMO = "mixamo" _3DSMAX = "3dsmax" + class TripoFileTokenReference(BaseModel): - type: Optional[str] = Field(None, description='The type of the reference') + type: str | None = Field(None, description="The type of the reference") file_token: str + class TripoUrlReference(BaseModel): - type: Optional[str] = Field(None, description='The type of the reference') + type: str | None = Field(None, description="The type of the reference") url: str + class TripoObjectStorage(BaseModel): bucket: str key: str + class TripoObjectReference(BaseModel): type: str object: TripoObjectStorage + class TripoFileEmptyReference(BaseModel): pass + class TripoFileReference(RootModel): root: TripoFileTokenReference | TripoUrlReference | TripoObjectReference | TripoFileEmptyReference -class TripoGetStsTokenRequest(BaseModel): - format: str = Field(..., description='The format of the image') class TripoTextToModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task') - prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024) - negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024) - model_version: Optional[TripoModelVersion] = TripoModelVersion.v2_5_20250123 - face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') - texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') - pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') - image_seed: Optional[int] = Field(None, description='The seed for the text') - model_seed: Optional[int] = Field(None, description='The seed for the model') - texture_seed: Optional[int] = Field(None, description='The seed for the texture') - texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard - geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard - style: Optional[TripoStyle] = None - auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') - quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description="Type of task") + prompt: str = Field(..., description="The text prompt describing the model to generate", max_length=1024) + negative_prompt: str | None = Field(None, description="The negative text prompt", max_length=1024) + model_version: TripoModelVersion | None = TripoModelVersion.v2_5_20250123 + face_limit: int | None = Field(None, description="The number of faces to limit the generation to") + texture: bool | None = Field(True, description="Whether to apply texture to the generated model") + pbr: bool | None = Field(True, description="Whether to apply PBR to the generated model") + image_seed: int | None = Field(None, description="The seed for the text") + model_seed: int | None = Field(None, description="The seed for the model") + texture_seed: int | None = Field(None, description="The seed for the texture") + texture_quality: TripoTextureQuality | None = TripoTextureQuality.standard + geometry_quality: TripoGeometryQuality | None = TripoGeometryQuality.standard + style: TripoStyle | None = None + auto_size: bool | None = Field(False, description="Whether to auto-size the model") + quad: bool | None = Field(False, description="Whether to apply quad to the generated model") + class TripoImageToModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.IMAGE_TO_MODEL, description='Type of task') - file: TripoFileReference = Field(..., description='The file reference to convert to a model') - model_version: Optional[TripoModelVersion] = Field(None, description='The model version to use for generation') - face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') - texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') - pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') - model_seed: Optional[int] = Field(None, description='The seed for the model') - texture_seed: Optional[int] = Field(None, description='The seed for the texture') - texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard - geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard - texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method') - style: Optional[TripoStyle] = Field(None, description='The style to apply to the generated model') - auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') - orientation: Optional[TripoOrientation] = TripoOrientation.DEFAULT - quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + type: TripoTaskType = Field(TripoTaskType.IMAGE_TO_MODEL, description="Type of task") + file: TripoFileReference = Field(..., description="The file reference to convert to a model") + model_version: TripoModelVersion | None = Field(None, description="The model version to use for generation") + face_limit: int | None = Field(None, description="The number of faces to limit the generation to") + texture: bool | None = Field(True, description="Whether to apply texture to the generated model") + pbr: bool | None = Field(True, description="Whether to apply PBR to the generated model") + model_seed: int | None = Field(None, description="The seed for the model") + texture_seed: int | None = Field(None, description="The seed for the texture") + texture_quality: TripoTextureQuality | None = TripoTextureQuality.standard + geometry_quality: TripoGeometryQuality | None = TripoGeometryQuality.standard + texture_alignment: TripoTextureAlignment | None = Field( + TripoTextureAlignment.ORIGINAL_IMAGE, description="The texture alignment method" + ) + style: TripoStyle | None = Field(None, description="The style to apply to the generated model") + auto_size: bool | None = Field(False, description="Whether to auto-size the model") + orientation: TripoOrientation | None = TripoOrientation.DEFAULT + quad: bool | None = Field(False, description="Whether to apply quad to the generated model") + class TripoMultiviewToModelRequest(BaseModel): type: TripoTaskType = TripoTaskType.MULTIVIEW_TO_MODEL - files: list[TripoFileReference] = Field(..., description='The file references to convert to a model') - model_version: Optional[TripoModelVersion] = Field(None, description='The model version to use for generation') - orthographic_projection: Optional[bool] = Field(False, description='Whether to use orthographic projection') - face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') - texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') - pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') - model_seed: Optional[int] = Field(None, description='The seed for the model') - texture_seed: Optional[int] = Field(None, description='The seed for the texture') - texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard - geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard - texture_alignment: Optional[TripoTextureAlignment] = TripoTextureAlignment.ORIGINAL_IMAGE - auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') - orientation: Optional[TripoOrientation] = Field(TripoOrientation.DEFAULT, description='The orientation for the model') - quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + files: list[TripoFileReference] = Field(..., description="The file references to convert to a model") + model_version: TripoModelVersion | None = Field(None, description="The model version to use for generation") + orthographic_projection: bool | None = Field(False, description="Whether to use orthographic projection") + face_limit: int | None = Field(None, description="The number of faces to limit the generation to") + texture: bool | None = Field(True, description="Whether to apply texture to the generated model") + pbr: bool | None = Field(True, description="Whether to apply PBR to the generated model") + model_seed: int | None = Field(None, description="The seed for the model") + texture_seed: int | None = Field(None, description="The seed for the texture") + texture_quality: TripoTextureQuality | None = TripoTextureQuality.standard + geometry_quality: TripoGeometryQuality | None = TripoGeometryQuality.standard + texture_alignment: TripoTextureAlignment | None = TripoTextureAlignment.ORIGINAL_IMAGE + auto_size: bool | None = Field(False, description="Whether to auto-size the model") + orientation: TripoOrientation | None = Field(TripoOrientation.DEFAULT, description="The orientation for the model") + quad: bool | None = Field(False, description="Whether to apply quad to the generated model") + class TripoTextureModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description='Type of task') - original_model_task_id: str = Field(..., description='The task ID of the original model') - texture: Optional[bool] = Field(True, description='Whether to apply texture to the model') - pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the model') - model_seed: Optional[int] = Field(None, description='The seed for the model') - texture_seed: Optional[int] = Field(None, description='The seed for the texture') - texture_quality: Optional[TripoTextureQuality] = Field(None, description='The quality of the texture') - texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method') + type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description="Type of task") + original_model_task_id: str = Field(..., description="The task ID of the original model") + texture: bool | None = Field(True, description="Whether to apply texture to the model") + pbr: bool | None = Field(True, description="Whether to apply PBR to the model") + model_seed: int | None = Field(None, description="The seed for the model") + texture_seed: int | None = Field(None, description="The seed for the texture") + texture_quality: TripoTextureQuality | None = Field(None, description="The quality of the texture") + texture_alignment: TripoTextureAlignment | None = Field( + TripoTextureAlignment.ORIGINAL_IMAGE, description="The texture alignment method" + ) + class TripoRefineModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.REFINE_MODEL, description='Type of task') - draft_model_task_id: str = Field(..., description='The task ID of the draft model') + type: TripoTaskType = Field(TripoTaskType.REFINE_MODEL, description="Type of task") + draft_model_task_id: str = Field(..., description="The task ID of the draft model") -class TripoAnimatePrerigcheckRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.ANIMATE_PRERIGCHECK, description='Type of task') - original_model_task_id: str = Field(..., description='The task ID of the original model') class TripoAnimateRigRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.ANIMATE_RIG, description='Type of task') - original_model_task_id: str = Field(..., description='The task ID of the original model') - out_format: Optional[TripoOutFormat] = Field(TripoOutFormat.GLB, description='The output format') - spec: Optional[TripoSpec] = Field(TripoSpec.TRIPO, description='The specification for rigging') + type: TripoTaskType = Field(TripoTaskType.ANIMATE_RIG, description="Type of task") + original_model_task_id: str = Field(..., description="The task ID of the original model") + out_format: TripoOutFormat | None = Field(TripoOutFormat.GLB, description="The output format") + spec: TripoSpec | None = Field(TripoSpec.TRIPO, description="The specification for rigging") + class TripoAnimateRetargetRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.ANIMATE_RETARGET, description='Type of task') - original_model_task_id: str = Field(..., description='The task ID of the original model') - animation: TripoAnimation = Field(..., description='The animation to apply') - out_format: Optional[TripoOutFormat] = Field(TripoOutFormat.GLB, description='The output format') - bake_animation: Optional[bool] = Field(True, description='Whether to bake the animation') + type: TripoTaskType = Field(TripoTaskType.ANIMATE_RETARGET, description="Type of task") + original_model_task_id: str = Field(..., description="The task ID of the original model") + animation: TripoAnimation = Field(..., description="The animation to apply") + out_format: TripoOutFormat | None = Field(TripoOutFormat.GLB, description="The output format") + bake_animation: bool | None = Field(True, description="Whether to bake the animation") -class TripoStylizeModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.STYLIZE_MODEL, description='Type of task') - style: TripoStylizeStyle = Field(..., description='The style to apply to the model') - original_model_task_id: str = Field(..., description='The task ID of the original model') - block_size: Optional[int] = Field(80, description='The block size for stylization') class TripoConvertModelRequest(BaseModel): - type: TripoTaskType = Field(TripoTaskType.CONVERT_MODEL, description='Type of task') - format: TripoConvertFormat = Field(..., description='The format to convert to') - original_model_task_id: str = Field(..., description='The task ID of the original model') - quad: Optional[bool] = Field(None, description='Whether to apply quad to the model') - force_symmetry: Optional[bool] = Field(None, description='Whether to force symmetry') - face_limit: Optional[int] = Field(None, description='The number of faces to limit the conversion to') - flatten_bottom: Optional[bool] = Field(None, description='Whether to flatten the bottom of the model') - flatten_bottom_threshold: Optional[float] = Field(None, description='The threshold for flattening the bottom') - texture_size: Optional[int] = Field(None, description='The size of the texture') - texture_format: Optional[TripoTextureFormat] = Field(TripoTextureFormat.JPEG, description='The format of the texture') - pivot_to_center_bottom: Optional[bool] = Field(None, description='Whether to pivot to the center bottom') - scale_factor: Optional[float] = Field(None, description='The scale factor for the model') - with_animation: Optional[bool] = Field(None, description='Whether to include animations') - pack_uv: Optional[bool] = Field(None, description='Whether to pack the UVs') - bake: Optional[bool] = Field(None, description='Whether to bake the model') - part_names: Optional[list[str]] = Field(None, description='The names of the parts to include') - fbx_preset: Optional[TripoFbxPreset] = Field(None, description='The preset for the FBX export') - export_vertex_colors: Optional[bool] = Field(None, description='Whether to export the vertex colors') - export_orientation: Optional[TripoOrientation] = Field(None, description='The orientation for the export') - animate_in_place: Optional[bool] = Field(None, description='Whether to animate in place') + type: TripoTaskType = Field(TripoTaskType.CONVERT_MODEL, description="Type of task") + format: TripoConvertFormat = Field(..., description="The format to convert to") + original_model_task_id: str = Field(..., description="The task ID of the original model") + quad: bool | None = Field(None, description="Whether to apply quad to the model") + force_symmetry: bool | None = Field(None, description="Whether to force symmetry") + face_limit: int | None = Field(None, description="The number of faces to limit the conversion to") + flatten_bottom: bool | None = Field(None, description="Whether to flatten the bottom of the model") + flatten_bottom_threshold: float | None = Field(None, description="The threshold for flattening the bottom") + texture_size: int | None = Field(None, description="The size of the texture") + texture_format: TripoTextureFormat | None = Field(TripoTextureFormat.JPEG, description="The format of the texture") + pivot_to_center_bottom: bool | None = Field(None, description="Whether to pivot to the center bottom") + scale_factor: float | None = Field(None, description="The scale factor for the model") + with_animation: bool | None = Field(None, description="Whether to include animations") + pack_uv: bool | None = Field(None, description="Whether to pack the UVs") + bake: bool | None = Field(None, description="Whether to bake the model") + part_names: list[str] | None = Field(None, description="The names of the parts to include") + fbx_preset: TripoFbxPreset | None = Field(None, description="The preset for the FBX export") + export_vertex_colors: bool | None = Field(None, description="Whether to export the vertex colors") + export_orientation: TripoOrientation | None = Field(None, description="The orientation for the export") + animate_in_place: bool | None = Field(None, description="Whether to animate in place") + + +class TripoP1CommonRequest(BaseModel): + """Fields supported by Tripo P1 across all input types.""" + + model_version: str = Field("P1-20260311") + model_seed: int | None = Field(None, description="Random seed for geometry generation") + face_limit: int | None = Field(None, ge=48, le=20000, description="Target face count (48-20000)") + texture: bool | None = Field(None, description="Enable texturing; pbr=True forces this true") + pbr: bool | None = Field(None, description="Enable PBR maps; when true, texture is also enabled") + texture_seed: int | None = Field(None, description="Random seed for texture generation") + texture_quality: str | None = Field(None, description='"standard" or "detailed"') + auto_size: bool | None = Field(None, description="Scale to real-world meters") + compress: str | None = Field(None, description='Only "geometry" is supported') + export_uv: bool | None = Field(None, description="Perform UV unwrapping during generation") + + +class TripoP1TextToModelRequest(TripoP1CommonRequest): + type: str = "text_to_model" + prompt: str = Field(..., max_length=1024) + negative_prompt: str | None = Field(None, max_length=255) + image_seed: int | None = None + + +class TripoP1ImageToModelRequest(TripoP1CommonRequest): + type: str = "image_to_model" + file: TripoFileReference + enable_image_autofix: bool | None = None + texture_alignment: str | None = Field(None, description='"original_image" or "geometry"') + orientation: str | None = Field(None, description='"default" or "align_image"; needs texture=true') + + +class TripoP1MultiviewToModelRequest(TripoP1CommonRequest): + """P1 multiview generation. + + Tripo requires `files` to be exactly four entries in [front, left, back, right] order with `{}` + (TripoFileEmptyReference) for omitted slots; front is required and at least two images total must be provided. + """ + + type: str = "multiview_to_model" + files: list[TripoFileReference] + texture_alignment: str | None = None + orientation: str | None = None class TripoTaskOutput(BaseModel): - model: Optional[str] = Field(None, description='URL to the model') - base_model: Optional[str] = Field(None, description='URL to the base model') - pbr_model: Optional[str] = Field(None, description='URL to the PBR model') - rendered_image: Optional[str] = Field(None, description='URL to the rendered image') - riggable: Optional[bool] = Field(None, description='Whether the model is riggable') + model: str | None = Field(None, description="URL to the model") + base_model: str | None = Field(None, description="URL to the base model") + pbr_model: str | None = Field(None, description="URL to the PBR model") + rendered_image: str | None = Field(None, description="URL to the rendered image") + riggable: bool | None = Field(None, description="Whether the model is riggable") + class TripoTask(BaseModel): - task_id: str = Field(..., description='The task ID') - type: Optional[str] = Field(None, description='The type of task') - status: Optional[TripoTaskStatus] = Field(None, description='The status of the task') - input: Optional[dict[str, Any]] = Field(None, description='The input parameters for the task') - output: Optional[TripoTaskOutput] = Field(None, description='The output of the task') - progress: Optional[int] = Field(None, description='The progress of the task', ge=0, le=100) - create_time: Optional[int] = Field(None, description='The creation time of the task') - running_left_time: Optional[int] = Field(None, description='The estimated time left for the task') - queue_position: Optional[int] = Field(None, description='The position in the queue') + task_id: str = Field(..., description="The task ID") + type: str | None = Field(None, description="The type of task") + status: TripoTaskStatus | None = Field(None, description="The status of the task") + input: dict[str, Any] | None = Field(None, description="The input parameters for the task") + output: TripoTaskOutput | None = Field(None, description="The output of the task") + progress: int | None = Field(None, description="The progress of the task", ge=0, le=100) + create_time: int | None = Field(None, description="The creation time of the task") + running_left_time: int | None = Field(None, description="The estimated time left for the task") + queue_position: int | None = Field(None, description="The position in the queue") consumed_credit: int | None = Field(None) + class TripoTaskResponse(BaseModel): - code: int = Field(0, description='The response code') - data: TripoTask = Field(..., description='The task data') + code: int = Field(0, description="The response code") + data: TripoTask = Field(..., description="The task data") -class TripoGeneralResponse(BaseModel): - code: int = Field(0, description='The response code') - data: dict[str, str] = Field(..., description='The task ID data') - -class TripoBalanceData(BaseModel): - balance: float = Field(..., description='The account balance') - frozen: float = Field(..., description='The frozen balance') - -class TripoBalanceResponse(BaseModel): - code: int = Field(0, description='The response code') - data: TripoBalanceData = Field(..., description='The balance data') class TripoErrorResponse(BaseModel): - code: int = Field(..., description='The error code') - message: str = Field(..., description='The error message') - suggestion: str = Field(..., description='The suggestion for fixing the error') + code: int = Field(..., description="The error code") + message: str = Field(..., description="The error message") + suggestion: str = Field(..., description="The suggestion for fixing the error") diff --git a/comfy_api_nodes/nodes_grok.py b/comfy_api_nodes/nodes_grok.py index 43e3cdc26..a41da42f3 100644 --- a/comfy_api_nodes/nodes_grok.py +++ b/comfy_api_nodes/nodes_grok.py @@ -58,7 +58,6 @@ class GrokImageNode(IO.ComfyNode): "grok-imagine-image-quality", "grok-imagine-image-pro", "grok-imagine-image", - "grok-imagine-image-beta", ], ), IO.String.Input( @@ -233,7 +232,6 @@ class GrokImageEditNode(IO.ComfyNode): "grok-imagine-image-quality", "grok-imagine-image-pro", "grok-imagine-image", - "grok-imagine-image-beta", ], ), IO.Image.Input("image", display_name="images"), @@ -506,7 +504,7 @@ class GrokVideoNode(IO.ComfyNode): category="video/partner/Grok", description="Generate video from a prompt or an image", inputs=[ - IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]), + IO.Combo.Input("model", options=["grok-imagine-video"]), IO.String.Input( "prompt", multiline=True, @@ -576,8 +574,6 @@ class GrokVideoNode(IO.ComfyNode): seed: int, image: Input.Image | None = None, ) -> IO.NodeOutput: - if model == "grok-imagine-video-beta": - model = "grok-imagine-video" image_url = None if image is not None: if get_number_of_images(image) != 1: @@ -618,7 +614,7 @@ class GrokVideoEditNode(IO.ComfyNode): category="video/partner/Grok", description="Edit an existing video based on a text prompt.", inputs=[ - IO.Combo.Input("model", options=["grok-imagine-video", "grok-imagine-video-beta"]), + IO.Combo.Input("model", options=["grok-imagine-video"]), IO.String.Input( "prompt", multiline=True, diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py index 6ee674a18..4820e26c1 100644 --- a/comfy_api_nodes/nodes_tripo.py +++ b/comfy_api_nodes/nodes_tripo.py @@ -11,6 +11,9 @@ from comfy_api_nodes.apis.tripo import ( TripoModelVersion, TripoMultiviewToModelRequest, TripoOrientation, + TripoP1ImageToModelRequest, + TripoP1MultiviewToModelRequest, + TripoP1TextToModelRequest, TripoRefineModelRequest, TripoStyle, TripoTaskResponse, @@ -93,10 +96,22 @@ class TripoTextToModelNode(IO.ComfyNode): IO.Int.Input("image_seed", default=42, optional=True, advanced=True), IO.Int.Input("model_seed", default=42, optional=True, advanced=True), IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), + IO.Combo.Input( + "texture_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), IO.Int.Input("face_limit", default=-1, min=-1, max=2000000, optional=True, advanced=True), IO.Boolean.Input("quad", default=False, optional=True, advanced=True), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), + IO.Combo.Input( + "geometry_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), ], outputs=[ IO.String.Output(display_name="model_file"), # for backward compatibility only @@ -209,16 +224,36 @@ class TripoImageToModelNode(IO.ComfyNode): IO.Boolean.Input("pbr", default=True, optional=True), IO.Int.Input("model_seed", default=42, optional=True, advanced=True), IO.Combo.Input( - "orientation", options=TripoOrientation, default=TripoOrientation.DEFAULT, optional=True, advanced=True + "orientation", + options=TripoOrientation, + default=TripoOrientation.DEFAULT, + optional=True, + advanced=True, ), IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True + "texture_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), + IO.Combo.Input( + "texture_alignment", + default="original_image", + options=["original_image", "geometry"], + optional=True, + advanced=True, ), IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True, advanced=True), IO.Boolean.Input("quad", default=False, optional=True, advanced=True), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), + IO.Combo.Input( + "geometry_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), ], outputs=[ IO.String.Output(display_name="model_file"), # for backward compatibility only @@ -346,13 +381,35 @@ class TripoMultiviewToModelNode(IO.ComfyNode): IO.Boolean.Input("pbr", default=True, optional=True), IO.Int.Input("model_seed", default=42, optional=True, advanced=True), IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True + "texture_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), + IO.Combo.Input( + "texture_alignment", + default="original_image", + options=["original_image", "geometry"], + optional=True, + advanced=True, ), IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True, advanced=True), - IO.Boolean.Input("quad", default=False, optional=True, advanced=True, tooltip="This parameter is deprecated and does nothing."), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), + IO.Boolean.Input( + "quad", + default=False, + optional=True, + advanced=True, + tooltip="This parameter is deprecated and does nothing.", + ), + IO.Combo.Input( + "geometry_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), ], outputs=[ IO.String.Output(display_name="model_file"), # for backward compatibility only @@ -467,9 +524,19 @@ class TripoTextureNode(IO.ComfyNode): IO.Boolean.Input("texture", default=True, optional=True), IO.Boolean.Input("pbr", default=True, optional=True), IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True + "texture_quality", + default="standard", + options=["standard", "detailed"], + optional=True, + advanced=True, + ), + IO.Combo.Input( + "texture_alignment", + default="original_image", + options=["original_image", "geometry"], + optional=True, + advanced=True, ), ], outputs=[ @@ -626,7 +693,7 @@ class TripoRetargetNode(IO.ComfyNode): "preset:hexapod:walk", "preset:octopod:walk", "preset:serpentine:march", - "preset:aquatic:march" + "preset:aquatic:march", ], ), ], @@ -817,7 +884,7 @@ class TripoConversionNode(IO.ComfyNode): # Parse part_names from comma-separated string to list part_names_list = None if part_names and part_names.strip(): - part_names_list = [name.strip() for name in part_names.split(',') if name.strip()] + part_names_list = [name.strip() for name in part_names.split(",") if name.strip()] response = await sync_op( cls, @@ -848,6 +915,373 @@ class TripoConversionNode(IO.ComfyNode): return await poll_until_finished(cls, response, average_duration=30) +def _p1_price_expr(*, geometry_credits: int, textured_credits: int, detailed_credits: int) -> str: + return ( + "(" + " $mode := widgets.output_mode;" + ' $detailed := $lookup(widgets, "output_mode.texture_quality") = "detailed";' + f' $credits := $mode = "geometry only" ? {geometry_credits} : ($detailed ? {detailed_credits} : {textured_credits});' + ' {"type":"usd","usd": $credits * 0.01, "format": {"approximate": true}}' + ")" + ) + + +def _p1_textured_inputs(*, include_image_alignment: bool) -> list: + """Inputs shown inside the 'Textured' branch of the P1 output_mode DynamicCombo.""" + inputs: list = [ + IO.Boolean.Input("pbr", default=True, tooltip="Include PBR maps. When on, base texture is forced on too."), + IO.Combo.Input("texture_quality", options=["standard", "detailed"], default="standard"), + ] + if include_image_alignment: + inputs.extend( + [ + IO.Combo.Input( + "texture_alignment", + options=["original_image", "geometry"], + default="original_image", + tooltip="Prioritize visual fidelity to the source image, or alignment to the mesh geometry.", + ), + IO.Combo.Input( + "orientation", + options=["default", "align_image"], + default="default", + tooltip="Rotate the output to match the source image. Only applies when textured.", + ), + ] + ) + inputs.append(IO.Int.Input("texture_seed", default=42, advanced=True)) + return inputs + + +def _build_p1_output_mode(*, include_image_alignment: bool) -> IO.DynamicCombo.Input: + return IO.DynamicCombo.Input( + "output_mode", + options=[ + IO.DynamicCombo.Option("Geometry only", []), + IO.DynamicCombo.Option("Textured", _p1_textured_inputs(include_image_alignment=include_image_alignment)), + ], + tooltip='"Geometry only" returns an untextured mesh. "Textured" adds color/PBR maps.', + ) + + +def _resolve_p1_texture_fields(output_mode: dict) -> dict: + """Translate the output_mode DynamicCombo payload into P1 request fields. + + pbr=true forces texture=true server-side, but we send both explicitly so the + intent is visible in the request body and logs. + """ + mode = output_mode["output_mode"] + if mode == "Geometry only": + return {"texture": False, "pbr": False} + out = { + "texture": True, + "pbr": bool(output_mode.get("pbr", True)), + "texture_quality": output_mode.get("texture_quality", "standard"), + "texture_seed": output_mode.get("texture_seed"), + } + if "texture_alignment" in output_mode: + out["texture_alignment"] = output_mode["texture_alignment"] + if "orientation" in output_mode: + out["orientation"] = output_mode["orientation"] + return out + + +def _p1_common_inputs() -> list: + """Inputs shared by all P1 nodes (placed after output_mode).""" + return [ + IO.Int.Input( + "face_limit", + default=-1, + min=-1, + max=20000, + optional=True, + advanced=True, + tooltip="Target face count, 48-20000. -1 lets Tripo pick adaptively.", + ), + IO.Int.Input("model_seed", default=42, optional=True, advanced=True), + IO.Boolean.Input( + "auto_size", + default=False, + optional=True, + advanced=True, + tooltip="Scale the output to approximate real-world meters.", + ), + IO.Boolean.Input( + "export_uv", + default=True, + optional=True, + advanced=True, + tooltip="UV unwrap during generation. Turn off for faster geometry-only runs.", + ), + IO.Boolean.Input( + "compress_geometry", + default=False, + optional=True, + advanced=True, + tooltip="Apply geometry-based compression. Decompress before editing.", + ), + ] + + +def _build_p1_request_kwargs( + *, + output_mode: dict, + face_limit: int, + model_seed: int, + auto_size: bool, + export_uv: bool, + compress_geometry: bool, +) -> dict: + """Common P1 request fields shared by all three node types.""" + kwargs: dict = { + "model_seed": model_seed, + "face_limit": face_limit if face_limit != -1 else None, + "auto_size": auto_size, + "export_uv": export_uv, + "compress": "geometry" if compress_geometry else None, + } + kwargs.update(_resolve_p1_texture_fields(output_mode)) + return kwargs + + +class TripoP1TextToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TripoP1TextToModelNode", + display_name="Tripo P1: Text to Model", + category="3d/partner/Tripo", + description="Tripo P1 text-to-3D. Optimized for low-poly, game-ready meshes with stable topology.", + inputs=[ + IO.String.Input("prompt", multiline=True, tooltip="Up to 1024 characters."), + IO.String.Input("negative_prompt", multiline=True, optional=True, tooltip="Up to 255 characters."), + _build_p1_output_mode(include_image_alignment=False), + IO.Int.Input("image_seed", default=42, optional=True, advanced=True), + *_p1_common_inputs(), + ], + outputs=[ + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["output_mode", "output_mode.texture_quality"]), + expr=_p1_price_expr(geometry_credits=30, textured_credits=40, detailed_credits=50), + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + output_mode: dict, + negative_prompt: str | None = None, + image_seed: int | None = None, + face_limit: int = -1, + model_seed: int | None = None, + auto_size: bool = False, + export_uv: bool = True, + compress_geometry: bool = False, + ) -> IO.NodeOutput: + if not prompt: + raise RuntimeError("Prompt is required") + common = _build_p1_request_kwargs( + output_mode=output_mode, + face_limit=face_limit, + model_seed=model_seed, + auto_size=auto_size, + export_uv=export_uv, + compress_geometry=compress_geometry, + ) + request = TripoP1TextToModelRequest( + prompt=prompt, + negative_prompt=negative_prompt or None, + image_seed=image_seed, + **common, + ) + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/task", method="POST"), + response_model=TripoTaskResponse, + data=request, + ) + return await poll_until_finished(cls, response, average_duration=60) + + +class TripoP1ImageToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TripoP1ImageToModelNode", + display_name="Tripo P1: Image to Model", + category="3d/partner/Tripo", + description="Tripo P1 image-to-3D. Optimized for low-poly, game-ready meshes.", + inputs=[ + IO.Image.Input("image"), + _build_p1_output_mode(include_image_alignment=True), + IO.Boolean.Input( + "enable_image_autofix", + default=False, + optional=True, + advanced=True, + tooltip="Pre-process the input image for better generation quality.", + ), + *_p1_common_inputs(), + ], + outputs=[ + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["output_mode", "output_mode.texture_quality"]), + expr=_p1_price_expr(geometry_credits=40, textured_credits=50, detailed_credits=60), + ), + ) + + @classmethod + async def execute( + cls, + image: Input.Image, + output_mode: dict, + enable_image_autofix: bool = False, + face_limit: int = -1, + model_seed: int | None = None, + auto_size: bool = False, + export_uv: bool = True, + compress_geometry: bool = False, + ) -> IO.NodeOutput: + if image is None: + raise RuntimeError("Image is required") + tripo_file = TripoFileReference( + root=TripoUrlReference( + url=(await upload_images_to_comfyapi(cls, image, max_images=1))[0], + type="jpeg", + ) + ) + common = _build_p1_request_kwargs( + output_mode=output_mode, + face_limit=face_limit, + model_seed=model_seed, + auto_size=auto_size, + export_uv=export_uv, + compress_geometry=compress_geometry, + ) + request = TripoP1ImageToModelRequest( + file=tripo_file, + enable_image_autofix=enable_image_autofix, + **common, + ) + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/task", method="POST"), + response_model=TripoTaskResponse, + data=request, + ) + return await poll_until_finished(cls, response, average_duration=60) + + +class TripoP1MultiviewToModelNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TripoP1MultiviewToModelNode", + display_name="Tripo P1: Multiview to Model", + category="3d/partner/Tripo", + description="Tripo P1 multiview-to-3D from 2-4 reference images in [front, left, back, right] order. " + "Front is required; any combination of the other three may be omitted.", + inputs=[ + IO.Image.Input("image", tooltip="Front view (0°). Required."), + IO.Image.Input( + "image_left", + optional=True, + tooltip="Left view (90°), i.e. the subject's left side.", + ), + IO.Image.Input("image_back", optional=True, tooltip="Back view (180°)."), + IO.Image.Input( + "image_right", + optional=True, + tooltip="Right view (270°), i.e. the subject's right side.", + ), + _build_p1_output_mode(include_image_alignment=True), + *_p1_common_inputs(), + ], + outputs=[ + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["output_mode", "output_mode.texture_quality"]), + expr=_p1_price_expr(geometry_credits=40, textured_credits=50, detailed_credits=60), + ), + ) + + @classmethod + async def execute( + cls, + image: Input.Image, + output_mode: dict, + image_left: Input.Image | None = None, + image_back: Input.Image | None = None, + image_right: Input.Image | None = None, + face_limit: int = -1, + model_seed: int | None = None, + auto_size: bool = False, + export_uv: bool = True, + compress_geometry: bool = False, + ) -> IO.NodeOutput: + views = [image, image_left, image_back, image_right] + if sum(1 for v in views if v is not None) < 2: + raise RuntimeError("Tripo P1 multiview requires at least 2 images (front plus one of left/back/right).") + + files: list[TripoFileReference] = [] + for view in views: + if view is None: + files.append(TripoFileReference(root=TripoFileEmptyReference())) + continue + url = (await upload_images_to_comfyapi(cls, view, max_images=1))[0] + files.append(TripoFileReference(root=TripoUrlReference(url=url, type="jpeg"))) + + common = _build_p1_request_kwargs( + output_mode=output_mode, + face_limit=face_limit, + model_seed=model_seed, + auto_size=auto_size, + export_uv=export_uv, + compress_geometry=compress_geometry, + ) + request = TripoP1MultiviewToModelRequest(files=files, **common) + response = await sync_op( + cls, + endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/task", method="POST"), + response_model=TripoTaskResponse, + data=request, + ) + return await poll_until_finished(cls, response, average_duration=80) + + class TripoExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -855,6 +1289,9 @@ class TripoExtension(ComfyExtension): TripoTextToModelNode, TripoImageToModelNode, TripoMultiviewToModelNode, + TripoP1TextToModelNode, + TripoP1ImageToModelNode, + TripoP1MultiviewToModelNode, TripoTextureNode, TripoRefineNode, TripoRigNode, diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py index 6f05f050e..b339dc4ff 100644 --- a/comfy_extras/nodes_load_3d.py +++ b/comfy_extras/nodes_load_3d.py @@ -124,12 +124,71 @@ class Preview3D(IO.ComfyNode): process = execute # TODO: remove +class Preview3DAdvanced(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Preview3DAdvanced", + display_name="Preview 3D (Advanced)", + search_aliases=["preview 3d", "3d viewer", "view mesh", "frame 3d", "3d camera output"], + category="3d", + is_experimental=True, + is_output_node=True, + inputs=[ + IO.MultiType.Input( + "model_file", + types=[ + IO.File3DGLB, + IO.File3DGLTF, + IO.File3DFBX, + IO.File3DOBJ, + IO.File3DSTL, + IO.File3DUSDZ, + IO.File3DAny, + ], + tooltip="3D model file from an upstream 3D node.", + ), + IO.Load3D.Input("image"), + IO.Load3DCamera.Input("camera_info", optional=True, advanced=True), + IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True), + IO.Int.Input("width", default=1024, min=1, max=4096, step=1), + IO.Int.Input("height", default=1024, min=1, max=4096, step=1), + ], + outputs=[ + IO.File3DAny.Output(display_name="model_file"), + IO.Load3DCamera.Output(display_name="camera_info"), + IO.Load3DModelInfo.Output(display_name="model_3d_info"), + IO.Int.Output(display_name="width"), + IO.Int.Output(display_name="height"), + ], + ) + + @classmethod + def execute(cls, model_file: Types.File3D, image, width: int, height: int, **kwargs) -> IO.NodeOutput: + filename = f"preview3d_advanced_{uuid.uuid4().hex}.{model_file.format}" + model_file.save_to(os.path.join(folder_paths.get_output_directory(), filename)) + + camera_info_input = kwargs.get("camera_info", None) + camera_info = camera_info_input if camera_info_input is not None else image['camera_info'] + model_3d_info_input = kwargs.get("model_3d_info", None) + model_3d_info = model_3d_info_input if model_3d_info_input is not None else image.get('model_3d_info', []) + return IO.NodeOutput( + model_file, + camera_info, + model_3d_info, + width, + height, + ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info), + ) + + class Load3DExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ Load3D, Preview3D, + Preview3DAdvanced, ] diff --git a/requirements.txt b/requirements.txt index 0617667e1..14bba1437 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ comfyui-frontend-package==1.44.19 comfyui-workflow-templates==0.9.91 -comfyui-embedded-docs==0.5.1 +comfyui-embedded-docs==0.5.2 torch torchsde torchvision @@ -22,8 +22,8 @@ alembic SQLAlchemy>=2.0.0 filelock av>=16.0.0 -comfy-kitchen==0.2.9 -comfy-aimdo==0.4.5 +comfy-kitchen==0.2.10 +comfy-aimdo==0.4.7 requests simpleeval>=1.0.0 blake3