init the models in comfyui testing

This commit is contained in:
Yousef Rafat 2025-12-05 16:29:41 +02:00
parent a09d1786e7
commit 95803e3537
6 changed files with 1221439 additions and 21 deletions

View File

@ -562,11 +562,13 @@ class MoELRUCache(nn.Module):
def parse_layer_expert(key): def parse_layer_expert(key):
parts = key.split(".") parts = key.split(".")
layer = int(parts[2]) layer = int(parts[3])
expert = int(parts[5]) expert = int(parts[6])
return layer, expert return layer, expert
class LazyMoELoader(nn.Module): class LazyMoELoader(nn.Module):
dtype = None
operations = None
def __init__(self, cache, config, max_workers = 16, max_concurrent_loads = 32): def __init__(self, cache, config, max_workers = 16, max_concurrent_loads = 32):
super().__init__() super().__init__()
self.cache = cache self.cache = cache
@ -574,7 +576,7 @@ class LazyMoELoader(nn.Module):
self._loop = cache._loop self._loop = cache._loop
self.expert_key_index = self.index_safetensors() self.expert_key_index = self.index_safetensors()
self._checkpoint = self.get_checkpoint() self._checkpoint = self.get_checkpoint()
self._file = safe_open(self._checkpoint, framework="pt", device="cpu", mmap=True) self._file = safe_open(self._checkpoint, framework="pt", device="cpu")
self.expert_pool = self.build_meta_experts() self.expert_pool = self.build_meta_experts()
self._executor = ThreadPoolExecutor(max_workers=max_workers) self._executor = ThreadPoolExecutor(max_workers=max_workers)
@ -595,9 +597,11 @@ class LazyMoELoader(nn.Module):
return pool return pool
def get_checkpoint(self): def get_checkpoint(self):
comfyui_dir = Path.home() / "ComfyUI" CURRENT_DIR = Path(__file__).resolve().parent
checkpoint = comfyui_dir / "models" / "checkpoints" / "hunyuan_image_3.safetensors" COMFY_ROOT = CURRENT_DIR.parents[2]
checkpoint = checkpoint.resolve()
checkpoint = COMFY_ROOT / "models" / "checkpoints" / "hunyuan_image_3.safetensors"
checkpoint = str(checkpoint)
if not os.path.exists(checkpoint): if not os.path.exists(checkpoint):
raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}") raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}")
return checkpoint return checkpoint
@ -607,6 +611,8 @@ class LazyMoELoader(nn.Module):
index = {} index = {}
with safe_open(checkpoint, framework="pt", device="cpu") as f: with safe_open(checkpoint, framework="pt", device="cpu") as f:
for k in f.keys(): for k in f.keys():
if "_SKIP__" in k:
k = k.split("_SKIP__")[1]
if "experts." in k: if "experts." in k:
layer, expert = parse_layer_expert(k) layer, expert = parse_layer_expert(k)
index.setdefault(layer, {}).setdefault(expert, []).append(k) index.setdefault(layer, {}).setdefault(expert, []).append(k)
@ -915,9 +921,9 @@ class HunyuanImage3Model(nn.Module):
self.shared_tensor = None self.shared_tensor = None
self.moe_lru = moe_lru self.moe_lru = moe_lru
self.additional_layers_set = False self.additional_layers_set = False
LazyMoELoader.dtype = dtype
LazyMoELoader.operations = operations
self.moe_loader = LazyMoELoader(self.moe_lru, self.config) self.moe_loader = LazyMoELoader(self.moe_lru, self.config)
self.moe_loader.operations = operations
self.moe_loader.dtype = dtype
def forward( def forward(
self, self,

View File

@ -1337,20 +1337,20 @@ class HunyuanImage3(supported_models_base.BASE):
"image_model": "hunyuan_image_3", "image_model": "hunyuan_image_3",
} }
latent_format = latent_formats.HunyuanImage3 latent_format = latent_formats.HunyuanImage3
text_encoder_key_prefix = ["text_encoders."]
def get_model(self, state_dict, prefix="", device=None): def get_model(self, state_dict, prefix="", device=None):
self.wte_sd = state_dict["model.model.wte"] self.wte_sd = state_dict["model.model.wte.weight"]
state_dict.pop("model.model.wte", None)
model = model_base.HunyuanImage3(self, device = device) model = model_base.HunyuanImage3(self, device = device)
state_dict["text_encoders.transformer.wte.weight"] = self.wte_sd
temp_tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer() temp_tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer()
model.encode_tok = temp_tokenizer.tokenizer.convert_tokens_to_ids model.encode_tok = temp_tokenizer.tokenizer.convert_tokens_to_ids
model.special_tok = temp_tokenizer.tokenizer.added_tokens_encoder model.special_tok = temp_tokenizer.tokenizer.added_tokens_encoder
return model return model
def clip_target(self, state_dict={}): def clip_target(self, state_dict={}):
clip = comfy.text_encoders.hunyuan_image.HunyuanImage3
clip.embed_wte = self.wte_sd
return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer, comfy.text_encoders.hunyuan_image.HunyuanImage3) return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer, comfy.text_encoders.hunyuan_image.HunyuanImage3)
class HunyuanImage21(HunyuanVideo): class HunyuanImage21(HunyuanVideo):

View File

@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,22 +7,24 @@ import os
import re import re
class HunyuanImage3TextEncoder(torch.nn.Module): class HunyuanImage3TextEncoder(torch.nn.Module):
embed_wte = None def __init__(self, config, dtype=None, device=None, operations=None):
def __init__(self):
super().__init__() super().__init__()
self.wte = torch.nn.Embedding(133120, 4096, padding_idx = 128009) self.wte = operations.Embedding(133120, 4096, padding_idx = 128009, device=device, dtype=dtype)
self.wte.data = self.embed_wte self.num_layers = 1
def get_input_embeddings(self):
return self.wte
def forward(self, x): def forward(self, x):
out = self.wte(x) return x, torch.empty_like(x)
return out, torch.empty_like(out)
class HunyuanImage3(sd1_clip.SDClipModel): class HunyuanImage3(sd1_clip.SDClipModel):
def __init__(self, device="cpu", max_length=77, freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=HunyuanImage3TextEncoder, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False, return_projected_pooled=False, return_attention_masks=False, model_options={}): def __init__(self, device="cpu", max_length=77, freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=HunyuanImage3TextEncoder, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False, return_projected_pooled=False, return_attention_masks=False, model_options={}):
super().__init__(device, max_length, freeze, layer, layer_idx, textmodel_json_config, dtype, model_class, layer_norm_hidden_state, enable_attention_masks, zero_out_masked, return_projected_pooled, return_attention_masks, model_options) self.dtypes = [torch.bfloat16, torch.float32]
super().__init__(device=device, max_length=max_length, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, model_class=model_class, layer_norm_hidden_state=layer_norm_hidden_state, enable_attention_masks=enable_attention_masks, zero_out_masked=zero_out_masked, return_projected_pooled=return_projected_pooled, return_attention_masks=return_attention_masks, model_options=model_options, special_tokens={"pad": 128009})
class HunyuanImage3Tokenizer(sd1_clip.SDTokenizer): class HunyuanImage3Tokenizer(sd1_clip.SDTokenizer):
def __init__(self, tokenizer_path="hunyuan3_tokenizer", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=4096, embedding_key='clip_l', tokenizer_class=AutoTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=False, min_length=None, pad_token=128009, end_token=None, min_padding=None): def __init__(self, tokenizer_path="hunyuan3_tokenizer", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=4096, embedding_key='clip_l', tokenizer_class=AutoTokenizer, pad_to_max_length=False, min_length=None, pad_token=128009, tokenizer_data = {}):
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), tokenizer_path) tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), tokenizer_path)
super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, has_start_token, has_end_token, pad_to_max_length, min_length, pad_token, end_token, min_padding) super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, pad_to_max_length, min_length, pad_token)
class ByT5SmallTokenizer(sd1_clip.SDTokenizer): class ByT5SmallTokenizer(sd1_clip.SDTokenizer):
def __init__(self, embedding_directory=None, tokenizer_data={}): def __init__(self, embedding_directory=None, tokenizer_data={}):