mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-05 05:47:27 +08:00
Merge branch 'master' into gaussians
This commit is contained in:
commit
1a8043d137
@ -14,6 +14,7 @@ from torchvision import transforms
|
|||||||
import comfy.patcher_extension
|
import comfy.patcher_extension
|
||||||
from comfy.ldm.modules.attention import optimized_attention
|
from comfy.ldm.modules.attention import optimized_attention
|
||||||
import comfy.ldm.common_dit
|
import comfy.ldm.common_dit
|
||||||
|
import comfy.quant_ops
|
||||||
|
|
||||||
|
|
||||||
# ---------------------- Feed Forward Network -----------------------
|
# ---------------------- Feed Forward Network -----------------------
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import torch.nn.functional as F
|
|||||||
|
|
||||||
from comfy.ldm.modules.attention import optimized_attention
|
from comfy.ldm.modules.attention import optimized_attention
|
||||||
import comfy.model_management
|
import comfy.model_management
|
||||||
|
import comfy.quant_ops
|
||||||
|
|
||||||
def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
|
def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
|
||||||
assert dim % 2 == 0
|
assert dim % 2 == 0
|
||||||
@ -19,15 +20,6 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
|
|||||||
out = torch.stack([torch.cos(out), torch.sin(out)], dim=0)
|
out = torch.stack([torch.cos(out), torch.sin(out)], dim=0)
|
||||||
return out.to(dtype=torch.float32, device=pos.device)
|
return out.to(dtype=torch.float32, device=pos.device)
|
||||||
|
|
||||||
def apply_rotary_emb(x_in: torch.Tensor, freqs_cis: torch.Tensor) -> torch.Tensor:
|
|
||||||
rot_dim = freqs_cis.shape[-1]
|
|
||||||
x, x_pass = x_in[..., :rot_dim], x_in[..., rot_dim:]
|
|
||||||
cos_ = freqs_cis[0]
|
|
||||||
sin_ = freqs_cis[1]
|
|
||||||
x1, x2 = x.chunk(2, dim=-1)
|
|
||||||
x_rotated = torch.cat((-x2, x1), dim=-1)
|
|
||||||
return torch.cat((x * cos_ + x_rotated * sin_, x_pass), dim=-1)
|
|
||||||
|
|
||||||
class ErnieImageEmbedND3(nn.Module):
|
class ErnieImageEmbedND3(nn.Module):
|
||||||
def __init__(self, dim: int, theta: int, axes_dim: tuple):
|
def __init__(self, dim: int, theta: int, axes_dim: tuple):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -37,8 +29,16 @@ class ErnieImageEmbedND3(nn.Module):
|
|||||||
|
|
||||||
def forward(self, ids: torch.Tensor) -> torch.Tensor:
|
def forward(self, ids: torch.Tensor) -> torch.Tensor:
|
||||||
emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1)
|
emb = torch.cat([rope(ids[..., i], self.axes_dim[i], self.theta) for i in range(3)], dim=-1)
|
||||||
emb = emb.unsqueeze(3) # [2, B, S, 1, head_dim//2]
|
cos_ = emb[0]
|
||||||
return torch.stack([emb, emb], dim=-1).reshape(*emb.shape[:-1], -1) # [B, S, 1, head_dim]
|
sin_ = emb[1]
|
||||||
|
N = cos_.shape[-1]
|
||||||
|
half = N // 2
|
||||||
|
cos_top = cos_[..., :half].repeat_interleave(2, dim=-1)
|
||||||
|
sin_top = sin_[..., :half].repeat_interleave(2, dim=-1)
|
||||||
|
cos_bot = cos_[..., half:].repeat_interleave(2, dim=-1)
|
||||||
|
sin_bot = sin_[..., half:].repeat_interleave(2, dim=-1)
|
||||||
|
rot = torch.stack([cos_top, -sin_top, sin_bot, cos_bot], dim=-1)
|
||||||
|
return rot.reshape(*rot.shape[:-1], 2, 2).unsqueeze(2)
|
||||||
|
|
||||||
class ErnieImagePatchEmbedDynamic(nn.Module):
|
class ErnieImagePatchEmbedDynamic(nn.Module):
|
||||||
def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None):
|
def __init__(self, in_channels: int, embed_dim: int, patch_size: int, operations, device=None, dtype=None):
|
||||||
@ -115,8 +115,7 @@ class ErnieImageAttention(nn.Module):
|
|||||||
key = self.norm_k(key)
|
key = self.norm_k(key)
|
||||||
|
|
||||||
if image_rotary_emb is not None:
|
if image_rotary_emb is not None:
|
||||||
query = apply_rotary_emb(query, image_rotary_emb)
|
query, key = comfy.quant_ops.ck.apply_rope_split_half(query, key, image_rotary_emb)
|
||||||
key = apply_rotary_emb(key, image_rotary_emb)
|
|
||||||
|
|
||||||
q_flat = query.reshape(B, S, -1)
|
q_flat = query.reshape(B, S, -1)
|
||||||
k_flat = key.reshape(B, S, -1)
|
k_flat = key.reshape(B, S, -1)
|
||||||
@ -274,7 +273,7 @@ class ErnieImageModel(nn.Module):
|
|||||||
|
|
||||||
image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1)
|
image_ids = image_ids.view(1, N_img, 3).expand(B, -1, -1)
|
||||||
|
|
||||||
rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1)).to(x.dtype)
|
rotary_pos_emb = self.pos_embed(torch.cat([image_ids, text_ids], dim=1))
|
||||||
del image_ids, text_ids
|
del image_ids, text_ids
|
||||||
|
|
||||||
sample = self.time_proj(timesteps).to(dtype)
|
sample = self.time_proj(timesteps).to(dtype)
|
||||||
|
|||||||
@ -452,6 +452,16 @@ class PreviewUI3D(_UIOutput):
|
|||||||
return {"result": [self.model_file, self.camera_info, self.bg_image_path]}
|
return {"result": [self.model_file, self.camera_info, self.bg_image_path]}
|
||||||
|
|
||||||
|
|
||||||
|
class PreviewUI3DAdvanced(_UIOutput):
|
||||||
|
def __init__(self, model_file, camera_info, model_3d_info):
|
||||||
|
self.model_file = model_file
|
||||||
|
self.camera_info = camera_info
|
||||||
|
self.model_3d_info = model_3d_info
|
||||||
|
|
||||||
|
def as_dict(self):
|
||||||
|
return {"result": [self.model_file, self.camera_info, self.model_3d_info]}
|
||||||
|
|
||||||
|
|
||||||
class PreviewText(_UIOutput):
|
class PreviewText(_UIOutput):
|
||||||
def __init__(self, value: str, **kwargs):
|
def __init__(self, value: str, **kwargs):
|
||||||
self.value = value
|
self.value = value
|
||||||
@ -471,5 +481,6 @@ __all__ = [
|
|||||||
"PreviewAudio",
|
"PreviewAudio",
|
||||||
"PreviewVideo",
|
"PreviewVideo",
|
||||||
"PreviewUI3D",
|
"PreviewUI3D",
|
||||||
|
"PreviewUI3DAdvanced",
|
||||||
"PreviewText",
|
"PreviewText",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -124,12 +124,71 @@ class Preview3D(IO.ComfyNode):
|
|||||||
process = execute # TODO: remove
|
process = execute # TODO: remove
|
||||||
|
|
||||||
|
|
||||||
|
class Preview3DAdvanced(IO.ComfyNode):
|
||||||
|
@classmethod
|
||||||
|
def define_schema(cls):
|
||||||
|
return IO.Schema(
|
||||||
|
node_id="Preview3DAdvanced",
|
||||||
|
display_name="Preview 3D (Advanced)",
|
||||||
|
search_aliases=["preview 3d", "3d viewer", "view mesh", "frame 3d", "3d camera output"],
|
||||||
|
category="3d",
|
||||||
|
is_experimental=True,
|
||||||
|
is_output_node=True,
|
||||||
|
inputs=[
|
||||||
|
IO.MultiType.Input(
|
||||||
|
"model_file",
|
||||||
|
types=[
|
||||||
|
IO.File3DGLB,
|
||||||
|
IO.File3DGLTF,
|
||||||
|
IO.File3DFBX,
|
||||||
|
IO.File3DOBJ,
|
||||||
|
IO.File3DSTL,
|
||||||
|
IO.File3DUSDZ,
|
||||||
|
IO.File3DAny,
|
||||||
|
],
|
||||||
|
tooltip="3D model file from an upstream 3D node.",
|
||||||
|
),
|
||||||
|
IO.Load3D.Input("image"),
|
||||||
|
IO.Load3DCamera.Input("camera_info", optional=True, advanced=True),
|
||||||
|
IO.Load3DModelInfo.Input("model_3d_info", optional=True, advanced=True),
|
||||||
|
IO.Int.Input("width", default=1024, min=1, max=4096, step=1),
|
||||||
|
IO.Int.Input("height", default=1024, min=1, max=4096, step=1),
|
||||||
|
],
|
||||||
|
outputs=[
|
||||||
|
IO.File3DAny.Output(display_name="model_file"),
|
||||||
|
IO.Load3DCamera.Output(display_name="camera_info"),
|
||||||
|
IO.Load3DModelInfo.Output(display_name="model_3d_info"),
|
||||||
|
IO.Int.Output(display_name="width"),
|
||||||
|
IO.Int.Output(display_name="height"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def execute(cls, model_file: Types.File3D, image, width: int, height: int, **kwargs) -> IO.NodeOutput:
|
||||||
|
filename = f"preview3d_advanced_{uuid.uuid4().hex}.{model_file.format}"
|
||||||
|
model_file.save_to(os.path.join(folder_paths.get_output_directory(), filename))
|
||||||
|
|
||||||
|
camera_info_input = kwargs.get("camera_info", None)
|
||||||
|
camera_info = camera_info_input if camera_info_input is not None else image['camera_info']
|
||||||
|
model_3d_info_input = kwargs.get("model_3d_info", None)
|
||||||
|
model_3d_info = model_3d_info_input if model_3d_info_input is not None else image.get('model_3d_info', [])
|
||||||
|
return IO.NodeOutput(
|
||||||
|
model_file,
|
||||||
|
camera_info,
|
||||||
|
model_3d_info,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
ui=UI.PreviewUI3DAdvanced(filename, camera_info, model_3d_info),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Load3DExtension(ComfyExtension):
|
class Load3DExtension(ComfyExtension):
|
||||||
@override
|
@override
|
||||||
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
async def get_node_list(self) -> list[type[IO.ComfyNode]]:
|
||||||
return [
|
return [
|
||||||
Load3D,
|
Load3D,
|
||||||
Preview3D,
|
Preview3D,
|
||||||
|
Preview3DAdvanced,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
comfyui-frontend-package==1.44.19
|
comfyui-frontend-package==1.44.19
|
||||||
comfyui-workflow-templates==0.9.91
|
comfyui-workflow-templates==0.9.91
|
||||||
comfyui-embedded-docs==0.5.1
|
comfyui-embedded-docs==0.5.2
|
||||||
torch
|
torch
|
||||||
torchsde
|
torchsde
|
||||||
torchvision
|
torchvision
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user