From 35065d500aabc18898f3eca57aac38193ec44e36 Mon Sep 17 00:00:00 2001
From: kijai <kijaidesign@gmail.com>
Date: Wed, 10 Jun 2026 10:47:30 +0300
Subject: [PATCH] Fix DINOv3 ViT-H detection shadowed by generic dino3_large
 check

The dino3_large discriminator (layer.9.attention.o_proj.bias) also matches
ViT-H checkpoints since o_proj always has bias=True, so it must be checked
after the specific ViT-H (gated MLP + 32-layer) signature.
---
 comfy/clip_vision.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py
index f8f5bc269..2c8b4d5d6 100644
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@@ -139,10 +139,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
         json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
     elif 'encoder.layer.23.layer_scale2.lambda1' in sd:
         json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_large.json")
-    elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers)
-        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
     elif 'layer.0.mlp.gate_proj.weight' in sd and 'layer.31.norm1.weight' in sd: # Dinov3 ViT-H/16+ (SwiGLU gated MLP, 32 layers)
         json_config = comfy.image_encoders.dino3.DINOV3_VITH_CONFIG
+    elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers); generic o_proj.bias key, so must come after the ViT-H check
+        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
     else:
         return None