From e785f0d212731e7f0f4b8c1638c58ab7df6f16b7 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:35:26 -0700 Subject: [PATCH 1/4] Some cast/dtype fixes for the birefnet and dino3 models. (#14217) --- comfy/background_removal/birefnet.py | 2 +- comfy/clip_vision.py | 5 ----- comfy/image_encoders/dino3.py | 4 +--- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/comfy/background_removal/birefnet.py b/comfy/background_removal/birefnet.py index df54b2b90..78a80246e 100644 --- a/comfy/background_removal/birefnet.py +++ b/comfy/background_removal/birefnet.py @@ -105,7 +105,7 @@ class WindowAttention(nn.Module): relative_position_bias = self.relative_position_bias_table[self.relative_position_index.long().view(-1)].view( self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH - relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + relative_position_bias = comfy.ops.cast_to_input(relative_position_bias.permute(2, 0, 1).contiguous(), attn) # nH, Wh*Ww, Wh*Ww attn = attn + relative_position_bias.unsqueeze(0) if mask is not None: diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index 337575191..ce8924a11 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -2,7 +2,6 @@ from .utils import load_torch_file, transformers_convert, state_dict_prefix_repl import os import json import logging -import torch import comfy.ops import comfy.model_patcher @@ -50,10 +49,6 @@ class ClipVisionModel(): self.load_device = comfy.model_management.text_encoder_device() offload_device = comfy.model_management.text_encoder_offload_device() self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) - if self.model_type == "dinov3" and self.dtype == torch.float16: - # DINOv3's activations borderline fits fp16, preferring bf16 if available for better stability #TODO: further fp16 tests in practice - if comfy.model_management.should_use_bf16(self.load_device, prioritize_performance=True): - self.dtype = torch.bfloat16 self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast) self.model.eval() diff --git a/comfy/image_encoders/dino3.py b/comfy/image_encoders/dino3.py index 9bd42a66b..014d1d29a 100644 --- a/comfy/image_encoders/dino3.py +++ b/comfy/image_encoders/dino3.py @@ -166,9 +166,8 @@ class DINOv3ViTEmbeddings(nn.Module): def forward(self, pixel_values, bool_masked_pos=None): batch_size = pixel_values.shape[0] - target_dtype = self.patch_embeddings.weight.dtype - patch_embeddings = self.patch_embeddings(pixel_values.to(dtype=target_dtype)) + patch_embeddings = self.patch_embeddings(pixel_values) patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2) if bool_masked_pos is not None: @@ -244,7 +243,6 @@ class DINOv3ViTModel(nn.Module): return self.embeddings.patch_embeddings def forward(self, pixel_values, bool_masked_pos=None, **kwargs): - pixel_values = pixel_values.to(self.embeddings.patch_embeddings.weight.dtype) hidden_states = self.embeddings(pixel_values, bool_masked_pos=bool_masked_pos) position_embeddings = self.rope_embeddings(pixel_values) From 06b710aa685947f3be69da1c95216e63433f5cd1 Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:35:52 -0700 Subject: [PATCH 2/4] Fix issue with triposplat preview and old offloading mode. (#14218) --- comfy_extras/nodes_triposplat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/comfy_extras/nodes_triposplat.py b/comfy_extras/nodes_triposplat.py index 021b669fd..5646d611b 100644 --- a/comfy_extras/nodes_triposplat.py +++ b/comfy_extras/nodes_triposplat.py @@ -233,7 +233,9 @@ class TripoSplatSamplingPreview(IO.ComfyNode): return try: if not state["loaded"]: - comfy.model_management.load_models_gpu([vae.patcher], memory_required=memory_required) + loaded_models = comfy.model_management.loaded_models(only_currently_used=True) + loaded_models.append(vae.patcher) + comfy.model_management.load_models_gpu(loaded_models, memory_required=memory_required) state["loaded"] = True img = decode_x0_to_image(vae, x0, cfg) if state["pbar"] is None: From 4b48535a7d66b89a4314e087e70fb7051e54eaaa Mon Sep 17 00:00:00 2001 From: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> Date: Mon, 1 Jun 2026 18:08:20 -0700 Subject: [PATCH 3/4] Do tripo dinov3 inference in fp32. (#14221) --- comfy/image_encoders/dino3.py | 7 ++++--- comfy_extras/nodes_triposplat.py | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/comfy/image_encoders/dino3.py b/comfy/image_encoders/dino3.py index 014d1d29a..ad29b06f8 100644 --- a/comfy/image_encoders/dino3.py +++ b/comfy/image_encoders/dino3.py @@ -3,6 +3,7 @@ import torch import torch.nn as nn import torch.nn.functional as F +import comfy.ops from comfy.ldm.modules.attention import optimized_attention_for_device from comfy.image_encoders.dino2 import LayerScale as DINOv3ViTLayerScale @@ -171,11 +172,11 @@ class DINOv3ViTEmbeddings(nn.Module): patch_embeddings = patch_embeddings.flatten(2).transpose(1, 2) if bool_masked_pos is not None: - mask_token = self.mask_token.to(patch_embeddings.dtype) + mask_token = comfy.ops.cast_to_input(self.mask_token, patch_embeddings) patch_embeddings = torch.where(bool_masked_pos.unsqueeze(-1), mask_token, patch_embeddings) - cls_token = self.cls_token.expand(batch_size, -1, -1).to(patch_embeddings.device) - register_tokens = self.register_tokens.expand(batch_size, -1, -1).to(patch_embeddings.device) + cls_token = comfy.ops.cast_to_input(self.cls_token.expand(batch_size, -1, -1), patch_embeddings) + register_tokens = comfy.ops.cast_to_input(self.register_tokens.expand(batch_size, -1, -1), patch_embeddings) embeddings = torch.cat([cls_token, register_tokens, patch_embeddings], dim=1) return embeddings diff --git a/comfy_extras/nodes_triposplat.py b/comfy_extras/nodes_triposplat.py index 5646d611b..1848ad31a 100644 --- a/comfy_extras/nodes_triposplat.py +++ b/comfy_extras/nodes_triposplat.py @@ -115,12 +115,11 @@ class TripoSplatConditioning(IO.ComfyNode): # feature1: DINOv3 token sequence (cls + registers + patches), ImageNet-normalized, with a final non-affine layer norm on top comfy.model_management.load_model_gpu(clip_vision.patcher) device = clip_vision.load_device - model_dtype = next(clip_vision.model.parameters()).dtype img = image.movedim(-1, 1).to(device) # (B,3,H,W) in [0,1] mean = torch.tensor(_DINOV3_MEAN, device=device).view(1, 3, 1, 1) std = torch.tensor(_DINOV3_STD, device=device).view(1, 3, 1, 1) img = (img - mean) / std - seq = clip_vision.model(pixel_values=img.to(model_dtype))[0] + seq = clip_vision.model(pixel_values=img.float())[0] feature1 = F.layer_norm(seq.float(), seq.shape[-1:]).to(comfy.model_management.intermediate_device()) # Second conditioning: the Flux2 VAE latent of the image, carried as a standard reference_latents entry From 33799c4a2ee286b5b6b8aac3c45c43245641fb47 Mon Sep 17 00:00:00 2001 From: vidigoat Date: Tue, 2 Jun 2026 06:45:04 +0530 Subject: [PATCH 4/4] Fix uncaught OverflowError in Math Expression node for large int results (#14214) --- comfy_extras/nodes_math.py | 11 +++++++++-- tests-unit/comfy_extras_test/nodes_math_test.py | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/comfy_extras/nodes_math.py b/comfy_extras/nodes_math.py index 873ee7b51..0883c65ac 100644 --- a/comfy_extras/nodes_math.py +++ b/comfy_extras/nodes_math.py @@ -102,11 +102,18 @@ class MathExpressionNode(io.ComfyNode): f"Math Expression '{expression}' must evaluate to a numeric result, " f"got {type(result).__name__}: {result!r}" ) - if not math.isfinite(result): + try: + float_result = float(result) + except OverflowError: + raise ValueError( + f"Math Expression '{expression}' produced a result too large to " + f"represent as a float: {result}" + ) from None + if not math.isfinite(float_result): raise ValueError( f"Math Expression '{expression}' produced a non-finite result: {result}" ) - return io.NodeOutput(float(result), int(result), bool(result)) + return io.NodeOutput(float_result, int(result), bool(result)) class MathExtension(ComfyExtension): diff --git a/tests-unit/comfy_extras_test/nodes_math_test.py b/tests-unit/comfy_extras_test/nodes_math_test.py index 714e37c32..030accc5e 100644 --- a/tests-unit/comfy_extras_test/nodes_math_test.py +++ b/tests-unit/comfy_extras_test/nodes_math_test.py @@ -197,3 +197,10 @@ class TestMathExpressionExecute: def test_pow_huge_exponent_raises(self): with pytest.raises(ValueError, match="Exponent .* exceeds maximum"): self._exec("pow(a, b)", a=10, b=10000000) + + def test_huge_int_result_raises_value_error(self): + # Exponent is within the allowed MAX_EXPONENT range, so the result is a + # finite Python int that is nonetheless too large to convert to float. + # This must raise a clean ValueError, not an uncaught OverflowError. + with pytest.raises(ValueError, match="too large to represent as a float"): + self._exec("2 ** 3999")