Check state dict key to auto enable the index_timestep_zero ref method. (#11362)

2025-12-21 03:50:50 +08:00 · 2025-12-16 14:03:17 -08:00 · 2025-12-16 14:03:17 -08:00 · ffdd53b327
commit ffdd53b327
parent 65e2103b09
2 changed files with 6 additions and 1 deletions
--- a/comfy/ldm/qwen_image/model.py
+++ b/comfy/ldm/qwen_image/model.py
@ -363,6 +363,9 @@ class QwenImageTransformer2DModel(nn.Module):
            for _ in range(num_layers)
        ])
        if self.default_ref_method == "index_timestep_zero":
            self.register_buffer("__index_timestep_zero__", torch.tensor([]))
        if final_layer:
            self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
            self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -259,7 +259,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
                dit_config["nerf_tile_size"] = 512
                dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
                dit_config["nerf_embedder_dtype"] = torch.float32
-                if "__x0__" in state_dict_keys: # x0 pred
+                if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
                    dit_config["use_x0"] = True
                else:
                    dit_config["use_x0"] = False
@ -618,6 +618,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        dit_config["image_model"] = "qwen_image"
        dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1]
        dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.')
        if "{}__index_timestep_zero__".format(key_prefix) in state_dict_keys:  # 2511
            dit_config["default_ref_method"] = "index_timestep_zero"
        return dit_config
    if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5