Add configuration for TwinFlow-Z-Image model

2026-07-06 14:41:11 +08:00 · 2026-04-04 17:23:46 +02:00 · 2026-04-04 17:23:46 +02:00 · 4fe75dcce8
commit 4fe75dcce8
parent fec629ba3d
1 changed files with 29 additions and 1 deletions
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@ -46,10 +46,38 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):

    # TwinFlow-Z-Image: detect dual timestep embedder checkpoints first.
    if any(k.startswith('{}t_embedder_2.'.format(key_prefix)) for k in state_dict_keys):
-        return {
+        dit_config = {
            "image_model": "twinflow_z_image",
            "architecture": "TwinFlow_Z_Image",
+            "patch_size": 2,
+            "in_channels": 16,
+            "qk_norm": True,
+            "ffn_dim_multiplier": (8.0 / 3.0),
+            "z_image_modulation": True,
+            "time_scale": 1000.0,
+            "n_refiner_layers": 2,
        }
+
+        cap_embedder_key = '{}cap_embedder.1.weight'.format(key_prefix)
+        if cap_embedder_key in state_dict:
+            w = state_dict[cap_embedder_key]
+            dit_config["dim"] = w.shape[0]
+            dit_config["cap_feat_dim"] = w.shape[1]
+
+        dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
+
+        # Match Z-Image style defaults (TwinFlow checkpoints are 3840-dim variants).
+        dit_config["n_heads"] = 30
+        dit_config["n_kv_heads"] = 30
+        dit_config["axes_dims"] = [32, 48, 48]
+        dit_config["axes_lens"] = [1536, 512, 512]
+        dit_config["rope_theta"] = 256.0
+
+        if '{}cap_pad_token'.format(key_prefix) in state_dict_keys or '{}x_pad_token'.format(key_prefix) in state_dict_keys:
+            dit_config["pad_tokens_multiple"] = 32
+
+        return dit_config
+        
    if '{}joint_blocks.0.context_block.attn.qkv.weight'.format(key_prefix) in state_dict_keys: #mmdit model
        unet_config = {}
        unet_config["in_channels"] = state_dict['{}x_embedder.proj.weight'.format(key_prefix)].shape[1]