Add configuration for TwinFlow-Z-Image model

This commit is contained in:
azazeal04 2026-04-04 17:23:46 +02:00 committed by GitHub
parent fec629ba3d
commit 4fe75dcce8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -46,10 +46,38 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
# TwinFlow-Z-Image: detect dual timestep embedder checkpoints first.
if any(k.startswith('{}t_embedder_2.'.format(key_prefix)) for k in state_dict_keys):
return {
dit_config = {
"image_model": "twinflow_z_image",
"architecture": "TwinFlow_Z_Image",
"patch_size": 2,
"in_channels": 16,
"qk_norm": True,
"ffn_dim_multiplier": (8.0 / 3.0),
"z_image_modulation": True,
"time_scale": 1000.0,
"n_refiner_layers": 2,
}
cap_embedder_key = '{}cap_embedder.1.weight'.format(key_prefix)
if cap_embedder_key in state_dict:
w = state_dict[cap_embedder_key]
dit_config["dim"] = w.shape[0]
dit_config["cap_feat_dim"] = w.shape[1]
dit_config["n_layers"] = count_blocks(state_dict_keys, '{}layers.'.format(key_prefix) + '{}.')
# Match Z-Image style defaults (TwinFlow checkpoints are 3840-dim variants).
dit_config["n_heads"] = 30
dit_config["n_kv_heads"] = 30
dit_config["axes_dims"] = [32, 48, 48]
dit_config["axes_lens"] = [1536, 512, 512]
dit_config["rope_theta"] = 256.0
if '{}cap_pad_token'.format(key_prefix) in state_dict_keys or '{}x_pad_token'.format(key_prefix) in state_dict_keys:
dit_config["pad_tokens_multiple"] = 32
return dit_config
if '{}joint_blocks.0.context_block.attn.qkv.weight'.format(key_prefix) in state_dict_keys: #mmdit model
unet_config = {}
unet_config["in_channels"] = state_dict['{}x_embedder.proj.weight'.format(key_prefix)].shape[1]