From 4fe75dcce812689d37e82e0f0fc0f68faf2c26a1 Mon Sep 17 00:00:00 2001 From: azazeal04 <132445160+azazeal04@users.noreply.github.com> Date: Sat, 4 Apr 2026 17:23:46 +0200 Subject: [PATCH] Add configuration for TwinFlow-Z-Image model --- comfy/model_detection.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/comfy/model_detection.py b/comfy/model_detection.py index aed486e09..7caad2e0b 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -46,10 +46,38 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): # TwinFlow-Z-Image: detect dual timestep embedder checkpoints first. if any(k.startswith('{}t_embedder_2.'.format(key_prefix)) for k in state_dict_keys): - return { + dit_config = { "image_model": "twinflow_z_image", "architecture": "TwinFlow_Z_Image", + "patch_size": 2, + "in_channels": 16, + "qk_norm": True, + "ffn_dim_multiplier": (8.0 / 3.0), + "z_image_modulation": True, + "time_scale": 1000.0, + "n_refiner_layers": 2, } + + cap_embedder_key = '{}cap_embedder.1.weight'.format(key_prefix) + if cap_embedder_key in state_dict: + w = state_dict[cap_embedder_key] + dit_config["dim"] = w.shape[0] + dit_config["cap_feat_dim"] = w.shape[1] + + dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.') + + # Match Z-Image style defaults (TwinFlow checkpoints are 3840-dim variants). + dit_config["n_heads"] = 30 + dit_config["n_kv_heads"] = 30 + dit_config["axes_dims"] = [32, 48, 48] + dit_config["axes_lens"] = [1536, 512, 512] + dit_config["rope_theta"] = 256.0 + + if '{}cap_pad_token'.format(key_prefix) in state_dict_keys or '{}x_pad_token'.format(key_prefix) in state_dict_keys: + dit_config["pad_tokens_multiple"] = 32 + + return dit_config + if '{}joint_blocks.0.context_block.attn.qkv.weight'.format(key_prefix) in state_dict_keys: #mmdit model unet_config = {} unet_config["in_channels"] = state_dict['{}x_embedder.proj.weight'.format(key_prefix)].shape[1]