mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-17 07:05:12 +08:00
fix: Z-Image LoRA and model loading for HuggingFace format weights
Three fixes for Z-Image Turbo support: 1. model_detection.py: Add Z-Image to convert_diffusers_mmdit() so HF-format safetensors (using to_q/to_k/to_v + all_x_embedder.2-1 key names) are detected and converted. 2. sd.py: Apply the Z-Image key conversion when loading a ZImage model whose weights are in HF format (all_x_embedder.2-1 present), mapping separate to_q/to_k/to_v weights into the combined qkv format ComfyUI expects. 3. lora.py: Fix sliced LoRA patches being silently discarded. In calculate_weight(), when a LoRA targets a slice of a combined weight (e.g. to_q/to_k/to_v -> qkv), the computed output was never written back to the weight tensor. Fix by calling narrow().copy_() before restoring old_weight. This affected any LoRA using sliced key mappings. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
850e8b42ff
commit
7ecc3a3935
@ -426,9 +426,11 @@ def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, ori
|
||||
if output is None:
|
||||
logging.warning("Calculate Weight Failed: {} {}".format(v.name, key))
|
||||
else:
|
||||
weight = output
|
||||
if old_weight is not None:
|
||||
old_weight.narrow(offset[0], offset[1], offset[2]).copy_(output)
|
||||
weight = old_weight
|
||||
else:
|
||||
weight = output
|
||||
continue
|
||||
|
||||
if len(v) == 1:
|
||||
|
||||
@ -1064,6 +1064,12 @@ def convert_diffusers_mmdit(state_dict, output_prefix=""):
|
||||
num_blocks = count_blocks(state_dict, 'transformer_blocks.{}.')
|
||||
depth = state_dict["pos_embed.proj.weight"].shape[0] // 64
|
||||
sd_map = comfy.utils.mmdit_to_diffusers({"depth": depth, "num_blocks": num_blocks}, output_prefix=output_prefix)
|
||||
elif 'all_x_embedder.2-1.weight' in state_dict: #Z-Image (HuggingFace format)
|
||||
w = state_dict.get('cap_embedder.1.weight')
|
||||
hidden_size = w.shape[0] if w is not None else 3840
|
||||
n_layers = count_blocks(state_dict, 'layers.{}.')
|
||||
n_refiner = count_blocks(state_dict, 'noise_refiner.{}.')
|
||||
sd_map = comfy.utils.z_image_to_diffusers({"n_layers": n_layers, "n_refiner_layers": n_refiner, "dim": hidden_size}, output_prefix=output_prefix)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@ -1713,6 +1713,12 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable
|
||||
|
||||
if model_config is not None:
|
||||
new_sd = sd
|
||||
# Z-Image from HuggingFace uses diffusers-style key names that need conversion
|
||||
if model_config.unet_config.get('z_image_modulation', False) and 'all_x_embedder.2-1.weight' in new_sd:
|
||||
sd_copy = dict(new_sd)
|
||||
converted = model_detection.convert_diffusers_mmdit(sd_copy, "")
|
||||
if converted is not None:
|
||||
new_sd = {**sd_copy, **converted}
|
||||
else:
|
||||
new_sd = model_detection.convert_diffusers_mmdit(sd, "")
|
||||
if new_sd is not None: #diffusers mmdit
|
||||
|
||||
Loading…
Reference in New Issue
Block a user