Fix DINOv3 ViT-H detection shadowed by generic dino3_large check

The dino3_large discriminator (layer.9.attention.o_proj.bias) also matches
ViT-H checkpoints since o_proj always has bias=True, so it must be checked
after the specific ViT-H (gated MLP + 32-layer) signature.
This commit is contained in:
kijai 2026-06-10 10:47:30 +03:00
parent 3af63b8961
commit 35065d500a

View File

@ -139,10 +139,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json")
elif 'encoder.layer.23.layer_scale2.lambda1' in sd:
json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_large.json")
elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers)
json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
elif 'layer.0.mlp.gate_proj.weight' in sd and 'layer.31.norm1.weight' in sd: # Dinov3 ViT-H/16+ (SwiGLU gated MLP, 32 layers)
json_config = comfy.image_encoders.dino3.DINOV3_VITH_CONFIG
elif 'layer.9.attention.o_proj.bias' in sd: # dinov3 large (24 layers); generic o_proj.bias key, so must come after the ViT-H check
json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino3_large.json")
else:
return None