mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-07 03:52:32 +08:00
Compare commits
6 Commits
eedecee439
...
3fc1c55323
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fc1c55323 | ||
|
|
de9ada6a41 | ||
|
|
37f711d4a1 | ||
|
|
f76e3a11b5 | ||
|
|
614b167994 | ||
|
|
23474ce816 |
@ -1,9 +1,9 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from vae import SparseTensor, SparseLinear, sparse_cat, VarLenTensor
|
||||
from comfy.ldm.trellis2.vae import SparseTensor, SparseLinear, sparse_cat, VarLenTensor
|
||||
from typing import Optional, Tuple, Literal, Union, List
|
||||
from attention import sparse_windowed_scaled_dot_product_self_attention, sparse_scaled_dot_product_attention
|
||||
from comfy.ldm.trellis2.attention import sparse_windowed_scaled_dot_product_self_attention, sparse_scaled_dot_product_attention
|
||||
from comfy.ldm.genmo.joint_model.layers import TimestepEmbedder
|
||||
|
||||
class SparseGELU(nn.GELU):
|
||||
|
||||
@ -19,7 +19,8 @@
|
||||
import psutil
|
||||
import logging
|
||||
from enum import Enum
|
||||
from comfy.cli_args import args, PerformanceFeature
|
||||
from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
|
||||
import threading
|
||||
import torch
|
||||
import sys
|
||||
import platform
|
||||
@ -650,7 +651,7 @@ def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, ram_
|
||||
soft_empty_cache()
|
||||
return unloaded_models
|
||||
|
||||
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
||||
def load_models_gpu_orig(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
||||
cleanup_models_gc()
|
||||
global vram_state
|
||||
|
||||
@ -746,8 +747,25 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu
|
||||
current_loaded_models.insert(0, loaded_model)
|
||||
return
|
||||
|
||||
def load_model_gpu(model):
|
||||
return load_models_gpu([model])
|
||||
def load_models_gpu_thread(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load):
|
||||
with torch.inference_mode():
|
||||
load_models_gpu_orig(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
|
||||
soft_empty_cache()
|
||||
|
||||
def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimum_memory_required=None, force_full_load=False):
|
||||
#Deliberately load models outside of the Aimdo mempool so they can be retained accross
|
||||
#nodes. Use a dummy thread to do it as pytorch documents that mempool contexts are
|
||||
#thread local. So exploit that to escape context
|
||||
if enables_dynamic_vram():
|
||||
t = threading.Thread(
|
||||
target=load_models_gpu_thread,
|
||||
args=(models, memory_required, force_patch_weights, minimum_memory_required, force_full_load)
|
||||
)
|
||||
t.start()
|
||||
t.join()
|
||||
else:
|
||||
load_models_gpu_orig(models, memory_required=memory_required, force_patch_weights=force_patch_weights,
|
||||
minimum_memory_required=minimum_memory_required, force_full_load=force_full_load)
|
||||
|
||||
def loaded_models(only_currently_used=False):
|
||||
output = []
|
||||
@ -1112,11 +1130,11 @@ def get_cast_buffer(offload_stream, device, size, ref):
|
||||
return None
|
||||
if cast_buffer is not None and cast_buffer.numel() > 50 * (1024 ** 2):
|
||||
#I want my wrongly sized 50MB+ of VRAM back from the caching allocator right now
|
||||
torch.cuda.synchronize()
|
||||
synchronize()
|
||||
del STREAM_CAST_BUFFERS[offload_stream]
|
||||
del cast_buffer
|
||||
#FIXME: This doesn't work in Aimdo because mempool cant clear cache
|
||||
torch.cuda.empty_cache()
|
||||
soft_empty_cache()
|
||||
with wf_context:
|
||||
cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
|
||||
STREAM_CAST_BUFFERS[offload_stream] = cast_buffer
|
||||
@ -1132,9 +1150,7 @@ def reset_cast_buffers():
|
||||
for offload_stream in STREAM_CAST_BUFFERS:
|
||||
offload_stream.synchronize()
|
||||
STREAM_CAST_BUFFERS.clear()
|
||||
if comfy.memory_management.aimdo_allocator is None:
|
||||
#Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist
|
||||
torch.cuda.empty_cache()
|
||||
soft_empty_cache()
|
||||
|
||||
def get_offload_stream(device):
|
||||
stream_counter = stream_counters.get(device, 0)
|
||||
@ -1284,7 +1300,7 @@ def discard_cuda_async_error():
|
||||
a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
|
||||
b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device())
|
||||
_ = a + b
|
||||
torch.cuda.synchronize()
|
||||
synchronize()
|
||||
except torch.AcceleratorError:
|
||||
#Dump it! We already know about it from the synchronous return
|
||||
pass
|
||||
@ -1688,6 +1704,12 @@ def lora_compute_dtype(device):
|
||||
LORA_COMPUTE_DTYPES[device] = dtype
|
||||
return dtype
|
||||
|
||||
def synchronize():
|
||||
if is_intel_xpu():
|
||||
torch.xpu.synchronize()
|
||||
elif torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
|
||||
def soft_empty_cache(force=False):
|
||||
global cpu_state
|
||||
if cpu_state == CPUState.MPS:
|
||||
@ -1713,9 +1735,6 @@ def debug_memory_summary():
|
||||
return torch.cuda.memory.memory_summary()
|
||||
return ""
|
||||
|
||||
#TODO: might be cleaner to put this somewhere else
|
||||
import threading
|
||||
|
||||
class InterruptProcessingException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@ -1597,7 +1597,7 @@ class ModelPatcherDynamic(ModelPatcher):
|
||||
|
||||
if unpatch_weights:
|
||||
self.partially_unload_ram(1e32)
|
||||
self.partially_unload(None)
|
||||
self.partially_unload(None, 1e32)
|
||||
|
||||
def partially_load(self, device_to, extra_memory=0, force_patch_weights=False):
|
||||
assert not force_patch_weights #See above
|
||||
|
||||
@ -193,7 +193,50 @@ class Trellis2Conditioning(IO.ComfyNode):
|
||||
negative = [[conditioning["cond_neg"], {embeds}]]
|
||||
return IO.NodeOutput(positive, negative)
|
||||
|
||||
class EmptyLatentTrellis2(IO.ComfyNode):
|
||||
class EmptyShapeLatentTrellis2(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="EmptyLatentTrellis2",
|
||||
category="latent/3d",
|
||||
inputs=[
|
||||
IO.Latent.Input("structure_output"),
|
||||
],
|
||||
outputs=[
|
||||
IO.Latent.Output(),
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, structure_output):
|
||||
# i will see what i have to do here
|
||||
coords = structure_output or structure_output.coords
|
||||
in_channels = 32
|
||||
latent = SparseTensor(feats=torch.randn(coords.shape[0], in_channels), coords=coords)
|
||||
return IO.NodeOutput({"samples": latent, "type": "trellis2"})
|
||||
|
||||
class EmptyTextureLatentTrellis2(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
node_id="EmptyLatentTrellis2",
|
||||
category="latent/3d",
|
||||
inputs=[
|
||||
IO.Latent.Input("structure_output"),
|
||||
],
|
||||
outputs=[
|
||||
IO.Latent.Output(),
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, structure_output):
|
||||
# TODO
|
||||
in_channels = 32
|
||||
latent = structure_output.replace(feats=torch.randn(structure_output.coords.shape[0], in_channels - structure_output.feats.shape[1]))
|
||||
return IO.NodeOutput({"samples": latent, "type": "trellis2"})
|
||||
|
||||
class EmptyStructureLatentTrellis2(IO.ComfyNode):
|
||||
@classmethod
|
||||
def define_schema(cls):
|
||||
return IO.Schema(
|
||||
@ -202,35 +245,26 @@ class EmptyLatentTrellis2(IO.ComfyNode):
|
||||
inputs=[
|
||||
IO.Int.Input("resolution", default=3072, min=1, max=8192),
|
||||
IO.Int.Input("batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch."),
|
||||
IO.Vae.Input("vae"),
|
||||
IO.Boolean.Input("shape_generation", tooltip="Setting to false will generate texture."),
|
||||
IO.MultiCombo.Input("generation_type", options=["structure_generation", "shape_generation", "texture_generation"])
|
||||
],
|
||||
outputs=[
|
||||
IO.Latent.Output(),
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def execute(cls, batch_size, coords, vae, generation_type) -> IO.NodeOutput:
|
||||
# TODO: i will probably update how shape/texture is generated
|
||||
# could split this too
|
||||
def execute(cls, res, batch_size):
|
||||
in_channels = 32
|
||||
shape_generation = generation_type == "shape_generation"
|
||||
device = comfy.model_management.intermediate_device()
|
||||
if shape_generation:
|
||||
latent = SparseTensor(feats=torch.randn(batch_size, in_channels).to(device), coords=coords)
|
||||
else:
|
||||
# coords = shape_slat in txt gen case
|
||||
latent = coords.replace(feats=torch.randn(coords.coords.shape[0], in_channels - coords.feats.shape[1]).to(device))
|
||||
latent = torch.randn(batch_size, in_channels, res, res, res)
|
||||
return IO.NodeOutput({"samples": latent, "type": "trellis2"})
|
||||
|
||||
|
||||
class Trellis2Extension(ComfyExtension):
|
||||
@override
|
||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||
return [
|
||||
Trellis2Conditioning,
|
||||
EmptyLatentTrellis2,
|
||||
EmptyShapeLatentTrellis2,
|
||||
EmptyStructureLatentTrellis2,
|
||||
EmptyTextureLatentTrellis2,
|
||||
VaeDecodeTextureTrellis,
|
||||
VaeDecodeShapeTrellis
|
||||
]
|
||||
Loading…
Reference in New Issue
Block a user