From 35065d500aabc18898f3eca57aac38193ec44e36 Mon Sep 17 00:00:00 2001 From: kijai Date: Wed, 10 Jun 2026 10:47:30 +0300 Subject: [PATCH] Fix DINOv3 ViT-H detection shadowed by generic dino3_large check The dino3_large discriminator (layer.9.attention.o_proj.bias) also matches ViT-H checkpoints since o_proj always has bias=True, so it must be checked after the specific ViT-H (gated MLP + 32-layer) signature. --- comfy/clip_vision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index f8f5bc269..2c8b4d5d6 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -139,10 +139,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False): json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json") elif 'encoder.layer.23.layer_scale2.lambda1' in sd: json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_large.json") - elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers) - json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json") elif 'layer.0.mlp.gate_proj.weight' in sd and 'layer.31.norm1.weight' in sd: # Dinov3 ViT-H/16+ (SwiGLU gated MLP, 32 layers) json_config = comfy.image_encoders.dino3.DINOV3_VITH_CONFIG + elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers); generic o_proj.bias key, so must come after the ViT-H check + json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json") else: return None