mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-23 21:00:16 +08:00
init the models in comfyui testing
This commit is contained in:
parent
a09d1786e7
commit
95803e3537
@ -562,11 +562,13 @@ class MoELRUCache(nn.Module):
|
|||||||
|
|
||||||
def parse_layer_expert(key):
|
def parse_layer_expert(key):
|
||||||
parts = key.split(".")
|
parts = key.split(".")
|
||||||
layer = int(parts[2])
|
layer = int(parts[3])
|
||||||
expert = int(parts[5])
|
expert = int(parts[6])
|
||||||
return layer, expert
|
return layer, expert
|
||||||
|
|
||||||
class LazyMoELoader(nn.Module):
|
class LazyMoELoader(nn.Module):
|
||||||
|
dtype = None
|
||||||
|
operations = None
|
||||||
def __init__(self, cache, config, max_workers = 16, max_concurrent_loads = 32):
|
def __init__(self, cache, config, max_workers = 16, max_concurrent_loads = 32):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
@ -574,7 +576,7 @@ class LazyMoELoader(nn.Module):
|
|||||||
self._loop = cache._loop
|
self._loop = cache._loop
|
||||||
self.expert_key_index = self.index_safetensors()
|
self.expert_key_index = self.index_safetensors()
|
||||||
self._checkpoint = self.get_checkpoint()
|
self._checkpoint = self.get_checkpoint()
|
||||||
self._file = safe_open(self._checkpoint, framework="pt", device="cpu", mmap=True)
|
self._file = safe_open(self._checkpoint, framework="pt", device="cpu")
|
||||||
self.expert_pool = self.build_meta_experts()
|
self.expert_pool = self.build_meta_experts()
|
||||||
|
|
||||||
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||||
@ -595,9 +597,11 @@ class LazyMoELoader(nn.Module):
|
|||||||
return pool
|
return pool
|
||||||
|
|
||||||
def get_checkpoint(self):
|
def get_checkpoint(self):
|
||||||
comfyui_dir = Path.home() / "ComfyUI"
|
CURRENT_DIR = Path(__file__).resolve().parent
|
||||||
checkpoint = comfyui_dir / "models" / "checkpoints" / "hunyuan_image_3.safetensors"
|
COMFY_ROOT = CURRENT_DIR.parents[2]
|
||||||
checkpoint = checkpoint.resolve()
|
|
||||||
|
checkpoint = COMFY_ROOT / "models" / "checkpoints" / "hunyuan_image_3.safetensors"
|
||||||
|
checkpoint = str(checkpoint)
|
||||||
if not os.path.exists(checkpoint):
|
if not os.path.exists(checkpoint):
|
||||||
raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}")
|
raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}")
|
||||||
return checkpoint
|
return checkpoint
|
||||||
@ -607,6 +611,8 @@ class LazyMoELoader(nn.Module):
|
|||||||
index = {}
|
index = {}
|
||||||
with safe_open(checkpoint, framework="pt", device="cpu") as f:
|
with safe_open(checkpoint, framework="pt", device="cpu") as f:
|
||||||
for k in f.keys():
|
for k in f.keys():
|
||||||
|
if "_SKIP__" in k:
|
||||||
|
k = k.split("_SKIP__")[1]
|
||||||
if "experts." in k:
|
if "experts." in k:
|
||||||
layer, expert = parse_layer_expert(k)
|
layer, expert = parse_layer_expert(k)
|
||||||
index.setdefault(layer, {}).setdefault(expert, []).append(k)
|
index.setdefault(layer, {}).setdefault(expert, []).append(k)
|
||||||
@ -915,9 +921,9 @@ class HunyuanImage3Model(nn.Module):
|
|||||||
self.shared_tensor = None
|
self.shared_tensor = None
|
||||||
self.moe_lru = moe_lru
|
self.moe_lru = moe_lru
|
||||||
self.additional_layers_set = False
|
self.additional_layers_set = False
|
||||||
|
LazyMoELoader.dtype = dtype
|
||||||
|
LazyMoELoader.operations = operations
|
||||||
self.moe_loader = LazyMoELoader(self.moe_lru, self.config)
|
self.moe_loader = LazyMoELoader(self.moe_lru, self.config)
|
||||||
self.moe_loader.operations = operations
|
|
||||||
self.moe_loader.dtype = dtype
|
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -1337,20 +1337,20 @@ class HunyuanImage3(supported_models_base.BASE):
|
|||||||
"image_model": "hunyuan_image_3",
|
"image_model": "hunyuan_image_3",
|
||||||
}
|
}
|
||||||
latent_format = latent_formats.HunyuanImage3
|
latent_format = latent_formats.HunyuanImage3
|
||||||
|
text_encoder_key_prefix = ["text_encoders."]
|
||||||
|
|
||||||
def get_model(self, state_dict, prefix="", device=None):
|
def get_model(self, state_dict, prefix="", device=None):
|
||||||
self.wte_sd = state_dict["model.model.wte"]
|
self.wte_sd = state_dict["model.model.wte.weight"]
|
||||||
state_dict.pop("model.model.wte", None)
|
|
||||||
model = model_base.HunyuanImage3(self, device = device)
|
model = model_base.HunyuanImage3(self, device = device)
|
||||||
|
state_dict["text_encoders.transformer.wte.weight"] = self.wte_sd
|
||||||
|
|
||||||
temp_tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer()
|
temp_tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer()
|
||||||
model.encode_tok = temp_tokenizer.tokenizer.convert_tokens_to_ids
|
model.encode_tok = temp_tokenizer.tokenizer.convert_tokens_to_ids
|
||||||
model.special_tok = temp_tokenizer.tokenizer.added_tokens_encoder
|
model.special_tok = temp_tokenizer.tokenizer.added_tokens_encoder
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def clip_target(self, state_dict={}):
|
def clip_target(self, state_dict={}):
|
||||||
clip = comfy.text_encoders.hunyuan_image.HunyuanImage3
|
|
||||||
clip.embed_wte = self.wte_sd
|
|
||||||
return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer, comfy.text_encoders.hunyuan_image.HunyuanImage3)
|
return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer, comfy.text_encoders.hunyuan_image.HunyuanImage3)
|
||||||
|
|
||||||
class HunyuanImage21(HunyuanVideo):
|
class HunyuanImage21(HunyuanVideo):
|
||||||
|
|||||||
@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|startoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<pad>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
1204368
comfy/text_encoders/hunyuan3_tokenizer/tokenizer.json
Normal file
1204368
comfy/text_encoders/hunyuan3_tokenizer/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
17019
comfy/text_encoders/hunyuan3_tokenizer/tokenizer_config.json
Normal file
17019
comfy/text_encoders/hunyuan3_tokenizer/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -7,22 +7,24 @@ import os
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
class HunyuanImage3TextEncoder(torch.nn.Module):
|
class HunyuanImage3TextEncoder(torch.nn.Module):
|
||||||
embed_wte = None
|
def __init__(self, config, dtype=None, device=None, operations=None):
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.wte = torch.nn.Embedding(133120, 4096, padding_idx = 128009)
|
self.wte = operations.Embedding(133120, 4096, padding_idx = 128009, device=device, dtype=dtype)
|
||||||
self.wte.data = self.embed_wte
|
self.num_layers = 1
|
||||||
|
def get_input_embeddings(self):
|
||||||
|
return self.wte
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
out = self.wte(x)
|
return x, torch.empty_like(x)
|
||||||
return out, torch.empty_like(out)
|
|
||||||
class HunyuanImage3(sd1_clip.SDClipModel):
|
class HunyuanImage3(sd1_clip.SDClipModel):
|
||||||
def __init__(self, device="cpu", max_length=77, freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=HunyuanImage3TextEncoder, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False, return_projected_pooled=False, return_attention_masks=False, model_options={}):
|
def __init__(self, device="cpu", max_length=77, freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=HunyuanImage3TextEncoder, layer_norm_hidden_state=True, enable_attention_masks=False, zero_out_masked=False, return_projected_pooled=False, return_attention_masks=False, model_options={}):
|
||||||
super().__init__(device, max_length, freeze, layer, layer_idx, textmodel_json_config, dtype, model_class, layer_norm_hidden_state, enable_attention_masks, zero_out_masked, return_projected_pooled, return_attention_masks, model_options)
|
self.dtypes = [torch.bfloat16, torch.float32]
|
||||||
|
super().__init__(device=device, max_length=max_length, freeze=freeze, layer=layer, layer_idx=layer_idx, textmodel_json_config=textmodel_json_config, dtype=dtype, model_class=model_class, layer_norm_hidden_state=layer_norm_hidden_state, enable_attention_masks=enable_attention_masks, zero_out_masked=zero_out_masked, return_projected_pooled=return_projected_pooled, return_attention_masks=return_attention_masks, model_options=model_options, special_tokens={"pad": 128009})
|
||||||
|
|
||||||
class HunyuanImage3Tokenizer(sd1_clip.SDTokenizer):
|
class HunyuanImage3Tokenizer(sd1_clip.SDTokenizer):
|
||||||
def __init__(self, tokenizer_path="hunyuan3_tokenizer", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=4096, embedding_key='clip_l', tokenizer_class=AutoTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=False, min_length=None, pad_token=128009, end_token=None, min_padding=None):
|
def __init__(self, tokenizer_path="hunyuan3_tokenizer", max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=4096, embedding_key='clip_l', tokenizer_class=AutoTokenizer, pad_to_max_length=False, min_length=None, pad_token=128009, tokenizer_data = {}):
|
||||||
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), tokenizer_path)
|
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), tokenizer_path)
|
||||||
super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, has_start_token, has_end_token, pad_to_max_length, min_length, pad_token, end_token, min_padding)
|
super().__init__(tokenizer_path, max_length, pad_with_end, embedding_directory, embedding_size, embedding_key, tokenizer_class, pad_to_max_length, min_length, pad_token)
|
||||||
|
|
||||||
class ByT5SmallTokenizer(sd1_clip.SDTokenizer):
|
class ByT5SmallTokenizer(sd1_clip.SDTokenizer):
|
||||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user