mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-03 21:07:27 +08:00
Merge branch 'master' into feat/point-cloud-gaussian-splat-nodes-v2
This commit is contained in:
commit
1141620177
@ -105,7 +105,7 @@ class WindowAttention(nn.Module):
|
|||||||
|
|
||||||
relative_position_bias = self.relative_position_bias_table[self.relative_position_index.long().view(-1)].view(
|
relative_position_bias = self.relative_position_bias_table[self.relative_position_index.long().view(-1)].view(
|
||||||
self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH
|
self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH
|
||||||
relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
|
relative_position_bias = comfy.ops.cast_to_input(relative_position_bias.permute(2, 0, 1).contiguous(), attn) # nH, Wh*Ww, Wh*Ww
|
||||||
attn = attn + relative_position_bias.unsqueeze(0)
|
attn = attn + relative_position_bias.unsqueeze(0)
|
||||||
|
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
|
|||||||
@ -2,7 +2,6 @@ from .utils import load_torch_file, transformers_convert, state_dict_prefix_repl
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import torch
|
|
||||||
|
|
||||||
import comfy.ops
|
import comfy.ops
|
||||||
import comfy.model_patcher
|
import comfy.model_patcher
|
||||||
@ -50,10 +49,6 @@ class ClipVisionModel():
|
|||||||
self.load_device = comfy.model_management.text_encoder_device()
|
self.load_device = comfy.model_management.text_encoder_device()
|
||||||
offload_device = comfy.model_management.text_encoder_offload_device()
|
offload_device = comfy.model_management.text_encoder_offload_device()
|
||||||
self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
|
self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
|
||||||
if self.model_type == "dinov3" and self.dtype == torch.float16:
|
|
||||||
# DINOv3's activations borderline fits fp16, preferring bf16 if available for better stability #TODO: further fp16 tests in practice
|
|
||||||
if comfy.model_management.should_use_bf16(self.load_device, prioritize_performance=True):
|
|
||||||
self.dtype = torch.bfloat16
|
|
||||||
self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
|
self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast)
|
||||||
self.model.eval()
|
self.model.eval()
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
import comfy.ops
|
||||||
from comfy.ldm.modules.attention import optimized_attention_for_device
|
from comfy.ldm.modules.attention import optimized_attention_for_device
|
||||||
from comfy.image_encoders.dino2 import LayerScale as DINOv3ViTLayerScale
|
from comfy.image_encoders.dino2 import LayerScale as DINOv3ViTLayerScale
|
||||||
|
|
||||||
@ -166,17 +167,16 @@ class DINOv3ViTEmbeddings(nn.Module):
|
|||||||
|
|
||||||
def forward(self, pixel_values, bool_masked_pos=None):
|
def forward(self, pixel_values, bool_masked_pos=None):
|
||||||
batch_size = pixel_values.shape[0]
|
batch_size = pixel_values.shape[0]
|
||||||
target_dtype = self.patch_embeddings.weight.dtype
|
|
||||||
|
|
||||||
patch_embeddings = self.patch_embeddings(pixel_values.to(dtype=target_dtype))
|
patch_embeddings = self.patch_embeddings(pixel_values)
|
||||||
patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2)
|
patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2)
|
||||||
|
|
||||||
if bool_masked_pos is not None:
|
if bool_masked_pos is not None:
|
||||||
mask_token = self.mask_token.to(patch_embeddings.dtype)
|
mask_token = comfy.ops.cast_to_input(self.mask_token, patch_embeddings)
|
||||||
patch_embeddings = torch.where(bool_masked_pos.unsqueeze(-1), mask_token, patch_embeddings)
|
patch_embeddings = torch.where(bool_masked_pos.unsqueeze(-1), mask_token, patch_embeddings)
|
||||||
|
|
||||||
cls_token = self.cls_token.expand(batch_size, -1, -1).to(patch_embeddings.device)
|
cls_token = comfy.ops.cast_to_input(self.cls_token.expand(batch_size, -1, -1), patch_embeddings)
|
||||||
register_tokens = self.register_tokens.expand(batch_size, -1, -1).to(patch_embeddings.device)
|
register_tokens = comfy.ops.cast_to_input(self.register_tokens.expand(batch_size, -1, -1), patch_embeddings)
|
||||||
embeddings = torch.cat([cls_token, register_tokens, patch_embeddings], dim=1)
|
embeddings = torch.cat([cls_token, register_tokens, patch_embeddings], dim=1)
|
||||||
return embeddings
|
return embeddings
|
||||||
|
|
||||||
@ -244,7 +244,6 @@ class DINOv3ViTModel(nn.Module):
|
|||||||
return self.embeddings.patch_embeddings
|
return self.embeddings.patch_embeddings
|
||||||
|
|
||||||
def forward(self, pixel_values, bool_masked_pos=None, **kwargs):
|
def forward(self, pixel_values, bool_masked_pos=None, **kwargs):
|
||||||
pixel_values = pixel_values.to(self.embeddings.patch_embeddings.weight.dtype)
|
|
||||||
hidden_states = self.embeddings(pixel_values, bool_masked_pos=bool_masked_pos)
|
hidden_states = self.embeddings(pixel_values, bool_masked_pos=bool_masked_pos)
|
||||||
position_embeddings = self.rope_embeddings(pixel_values)
|
position_embeddings = self.rope_embeddings(pixel_values)
|
||||||
|
|
||||||
|
|||||||
@ -102,11 +102,18 @@ class MathExpressionNode(io.ComfyNode):
|
|||||||
f"Math Expression '{expression}' must evaluate to a numeric result, "
|
f"Math Expression '{expression}' must evaluate to a numeric result, "
|
||||||
f"got {type(result).__name__}: {result!r}"
|
f"got {type(result).__name__}: {result!r}"
|
||||||
)
|
)
|
||||||
if not math.isfinite(result):
|
try:
|
||||||
|
float_result = float(result)
|
||||||
|
except OverflowError:
|
||||||
|
raise ValueError(
|
||||||
|
f"Math Expression '{expression}' produced a result too large to "
|
||||||
|
f"represent as a float: {result}"
|
||||||
|
) from None
|
||||||
|
if not math.isfinite(float_result):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Math Expression '{expression}' produced a non-finite result: {result}"
|
f"Math Expression '{expression}' produced a non-finite result: {result}"
|
||||||
)
|
)
|
||||||
return io.NodeOutput(float(result), int(result), bool(result))
|
return io.NodeOutput(float_result, int(result), bool(result))
|
||||||
|
|
||||||
|
|
||||||
class MathExtension(ComfyExtension):
|
class MathExtension(ComfyExtension):
|
||||||
|
|||||||
@ -115,12 +115,11 @@ class TripoSplatConditioning(IO.ComfyNode):
|
|||||||
# feature1: DINOv3 token sequence (cls + registers + patches), ImageNet-normalized, with a final non-affine layer norm on top
|
# feature1: DINOv3 token sequence (cls + registers + patches), ImageNet-normalized, with a final non-affine layer norm on top
|
||||||
comfy.model_management.load_model_gpu(clip_vision.patcher)
|
comfy.model_management.load_model_gpu(clip_vision.patcher)
|
||||||
device = clip_vision.load_device
|
device = clip_vision.load_device
|
||||||
model_dtype = next(clip_vision.model.parameters()).dtype
|
|
||||||
img = image.movedim(-1, 1).to(device) # (B,3,H,W) in [0,1]
|
img = image.movedim(-1, 1).to(device) # (B,3,H,W) in [0,1]
|
||||||
mean = torch.tensor(_DINOV3_MEAN, device=device).view(1, 3, 1, 1)
|
mean = torch.tensor(_DINOV3_MEAN, device=device).view(1, 3, 1, 1)
|
||||||
std = torch.tensor(_DINOV3_STD, device=device).view(1, 3, 1, 1)
|
std = torch.tensor(_DINOV3_STD, device=device).view(1, 3, 1, 1)
|
||||||
img = (img - mean) / std
|
img = (img - mean) / std
|
||||||
seq = clip_vision.model(pixel_values=img.to(model_dtype))[0]
|
seq = clip_vision.model(pixel_values=img.float())[0]
|
||||||
feature1 = F.layer_norm(seq.float(), seq.shape[-1:]).to(comfy.model_management.intermediate_device())
|
feature1 = F.layer_norm(seq.float(), seq.shape[-1:]).to(comfy.model_management.intermediate_device())
|
||||||
|
|
||||||
# Second conditioning: the Flux2 VAE latent of the image, carried as a standard reference_latents entry
|
# Second conditioning: the Flux2 VAE latent of the image, carried as a standard reference_latents entry
|
||||||
@ -233,7 +232,9 @@ class TripoSplatSamplingPreview(IO.ComfyNode):
|
|||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
if not state["loaded"]:
|
if not state["loaded"]:
|
||||||
comfy.model_management.load_models_gpu([vae.patcher], memory_required=memory_required)
|
loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
|
||||||
|
loaded_models.append(vae.patcher)
|
||||||
|
comfy.model_management.load_models_gpu(loaded_models, memory_required=memory_required)
|
||||||
state["loaded"] = True
|
state["loaded"] = True
|
||||||
img = decode_x0_to_image(vae, x0, cfg)
|
img = decode_x0_to_image(vae, x0, cfg)
|
||||||
if state["pbar"] is None:
|
if state["pbar"] is None:
|
||||||
|
|||||||
@ -197,3 +197,10 @@ class TestMathExpressionExecute:
|
|||||||
def test_pow_huge_exponent_raises(self):
|
def test_pow_huge_exponent_raises(self):
|
||||||
with pytest.raises(ValueError, match="Exponent .* exceeds maximum"):
|
with pytest.raises(ValueError, match="Exponent .* exceeds maximum"):
|
||||||
self._exec("pow(a, b)", a=10, b=10000000)
|
self._exec("pow(a, b)", a=10, b=10000000)
|
||||||
|
|
||||||
|
def test_huge_int_result_raises_value_error(self):
|
||||||
|
# Exponent is within the allowed MAX_EXPONENT range, so the result is a
|
||||||
|
# finite Python int that is nonetheless too large to convert to float.
|
||||||
|
# This must raise a clean ValueError, not an uncaught OverflowError.
|
||||||
|
with pytest.raises(ValueError, match="too large to represent as a float"):
|
||||||
|
self._exec("2 ** 3999")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user