fixes from comfyui testing

This commit is contained in:
Yousef Rafat 2025-12-03 23:33:46 +02:00
parent 3dd39efa03
commit a09d1786e7
4 changed files with 16 additions and 18 deletions

View File

@ -596,7 +596,7 @@ class LazyMoELoader(nn.Module):
def get_checkpoint(self):
comfyui_dir = Path.home() / "ComfyUI"
checkpoint = comfyui_dir / "models" / "checkpoint" / "hunyuan_image_3.safetensors"
checkpoint = comfyui_dir / "models" / "checkpoints" / "hunyuan_image_3.safetensors"
checkpoint = checkpoint.resolve()
if not os.path.exists(checkpoint):
raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}")
@ -998,6 +998,7 @@ class HunyuanImage3ForCausalMM(nn.Module):
super().__init__()
config = kwargs
self.config = config
self.dtype = dtype
factory_kwargs = {"device": device, "dtype": dtype, "operations": operations}
self.timestep_emb = TimestepEmbedder(hidden_size=config["hidden_size"], **factory_kwargs)
@ -1065,7 +1066,7 @@ class HunyuanImage3ForCausalMM(nn.Module):
cond_exists = (joint_image[:, 0, :] != -100.0).any(dim=1).any()
height, width = x.size(2) * 16, x.size(3) * 16
gen_timestep_scatter_index = 4
gen_timestep_scatter_index = 3
def fn(string, func = self.encode_tok):
return self.model.wte(torch.tensor(func(string) if not isinstance(func, dict) else func[string], device=inputs_embeds.device))\
@ -1175,7 +1176,7 @@ class HunyuanImage3ForCausalMM(nn.Module):
hidden_states = hidden_states.to(inputs_embeds.device)
img_mask = torch.zeros(hidden_states.size(1))
img_mask[seq_len + x.size(1)+4:] = 1; img_mask[-1] = 0
img_mask[seq_len + 5:seq_len + 5 + x.size(1)] = 1
diffusion_prediction = self.ragged_final_layer(
hidden_states, img_mask, timestep, int(token_height), int(token_width), self.first_step)

View File

@ -483,7 +483,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
dit_config["text_emb_dim"] = 2048
return dit_config
if "{}layers.32.mlp.gate_and_up_proj.weight".format(key_prefix) in state_dict_keys:
if "__SKIP__{}model.layers.0.mlp.experts.0.down_proj.weight" or "{}model.layers.0.mlp.experts.0.down_proj.weight".format(key_prefix) in state_dict_keys: # Hunyaun Image 3
dit_config = {}
dit_config["image_model"] = "hunyuan_image_3"
dit_config["hidden_size"] = 4096

View File

@ -1,7 +1,7 @@
from comfy import sd1_clip
import comfy.text_encoders.llama
from .qwen_image import QwenImageTokenizer, QwenImageTEModel
from transformers import ByT5Tokenizer
from transformers import ByT5Tokenizer, AutoTokenizer
import torch
import os
import re
@ -20,8 +20,9 @@ class HunyuanImage3(sd1_clip.SDClipModel):
super().__init__(device, max_length, freeze, layer, layer_idx, textmodel_json_config, dtype, model_class, layer_norm_hidden_state, enable_attention_masks, zero_out_masked, return_projected_pooled, return_attention_masks, model_options)
class HunyuanImage3Tokenizer(sd1_clip.SDTokenizer):
def __init__(self, tokenizer_path="hunyuan_image_3", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=..., has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, min_padding=None, tokenizer_data=..., tokenizer_args=...):
super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, has_start_token, has_end_token, pad_to_max_length, min_length, pad_token, end_token, min_padding, tokenizer_data, tokenizer_args)
def __init__(self, tokenizer_path="hunyuan3_tokenizer", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=4096, embedding_key='clip_l', tokenizer_class=AutoTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=False, min_length=None, pad_token=128009, end_token=None, min_padding=None):
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), tokenizer_path)
super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, has_start_token, has_end_token, pad_to_max_length, min_length, pad_token, end_token, min_padding)
class ByT5SmallTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}):

View File

@ -40,7 +40,6 @@ class HunyuanImage3Conditioning(io.ComfyNode):
category="conditioning/video_models",
inputs = [
io.Conditioning.Input("text_encoding_positive"),
io.Clip.Input("clip"),
io.Model.Input("model"),
io.Conditioning.Input("vae_encoding", optional=True),
io.Conditioning.Input("vit_encoding", optional=True),
@ -50,11 +49,13 @@ class HunyuanImage3Conditioning(io.ComfyNode):
)
@classmethod
def execute(cls, text_encoding, clip, model, text_encoding_negative=None, vae_encoding = None, vit_encoding = None):
encode_fn = clip.tokenizer.tokenizer.convert_tokens_to_ids
special_fn = clip.tokenizer.tokenizer.added_tokens_encoder
def execute(cls, text_encoding, model, text_encoding_negative=None, vae_encoding = None, vit_encoding = None):
model = model.diffusion_model
encode_fn = model.encode_tok
special_fn = model.special_tok
word_embed = model.wte
word_embed = clip.wte
patch_embed = model.patch_embed
t_embed = model.time_embed
@ -80,12 +81,7 @@ class HunyuanImage3Conditioning(io.ComfyNode):
vae_mask = torch.empty_like(joint_image)
ragged_tensors = torch.nested.nested_tensor([joint_image, vae_mask, text_tokens.to(joint_image.dtype)])
uncond_ragged_tensors = None
if text_encoding_negative is not None:
uncond_ragged_tensors, _ = cls.execute(vae_encoding, vit_encoding, text_encoding_negative, clip=clip, text_encoding_negative = None)
else:
uncond_ragged_tensors = torch.nested.nested_tensor([torch.zeros_like(t) for t in ragged_tensors.unbind()])
uncond_ragged_tensors = torch.nested.nested_tensor([torch.zeros_like(t) for t in ragged_tensors.unbind()])
if uncond_ragged_tensors is not None:
positive = [[ragged_tensors, {}]]