Fix DINOv3 ViT-H detection shadowed by generic dino3_large check

The dino3_large discriminator (layer.9.attention.o_proj.bias) also matches ViT-H checkpoints since o_proj always has bias=True, so it must be checked after the specific ViT-H (gated MLP + 32-layer) signature.
2026-07-03 21:20:49 +08:00 · 2026-06-10 10:47:30 +03:00 · 2026-06-10 10:47:30 +03:00 · 35065d500a
commit 35065d500a
parent 3af63b8961
1 changed files with 2 additions and 2 deletions
--- a/comfy/clip_vision.py
+++ b/comfy/clip_vision.py
@ -139,10 +139,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
    elif 'encoder.layer.23.layer_scale2.lambda1' in sd:
        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_large.json")
-    elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers)
-        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
    elif 'layer.0.mlp.gate_proj.weight' in sd and 'layer.31.norm1.weight' in sd: # Dinov3 ViT-H/16+ (SwiGLU gated MLP, 32 layers)
        json_config = comfy.image_encoders.dino3.DINOV3_VITH_CONFIG
+    elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers); generic o_proj.bias key, so must come after the ViT-H check
+        json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
    else:
        return None