diff --git a/comfy/ldm/hunyuan_image_3/model.py b/comfy/ldm/hunyuan_image_3/model.py index 28a296f06..877936fc6 100644 --- a/comfy/ldm/hunyuan_image_3/model.py +++ b/comfy/ldm/hunyuan_image_3/model.py @@ -562,11 +562,13 @@ class MoELRUCache(nn.Module): def parse_layer_expert(key): parts = key.split(".") - layer = int(parts[2]) - expert = int(parts[5]) + layer = int(parts[3]) + expert = int(parts[6]) return layer, expert class LazyMoELoader(nn.Module): + dtype = None + operations = None def __init__(self, cache, config, max_workers = 16, max_concurrent_loads = 32): super().__init__() self.cache = cache @@ -574,7 +576,7 @@ class LazyMoELoader(nn.Module): self._loop = cache._loop self.expert_key_index = self.index_safetensors() self._checkpoint = self.get_checkpoint() - self._file = safe_open(self._checkpoint, framework="pt", device="cpu", mmap=True) + self._file = safe_open(self._checkpoint, framework="pt", device="cpu") self.expert_pool = self.build_meta_experts() self._executor = ThreadPoolExecutor(max_workers=max_workers) @@ -595,9 +597,11 @@ class LazyMoELoader(nn.Module): return pool def get_checkpoint(self): - comfyui_dir = Path.home() / "ComfyUI" - checkpoint = comfyui_dir / "models" / "checkpoints" / "hunyuan_image_3.safetensors" - checkpoint = checkpoint.resolve() + CURRENT_DIR = Path(__file__).resolve().parent + COMFY_ROOT = CURRENT_DIR.parents[2] + + checkpoint = COMFY_ROOT / "models" / "checkpoints" / "hunyuan_image_3.safetensors" + checkpoint = str(checkpoint) if not os.path.exists(checkpoint): raise ValueError(f"Hunyuan Image 3 Checkpoint on one GPU should have the path: {checkpoint}") return checkpoint @@ -607,6 +611,8 @@ class LazyMoELoader(nn.Module): index = {} with safe_open(checkpoint, framework="pt", device="cpu") as f: for k in f.keys(): + if "_SKIP__" in k: + k = k.split("_SKIP__")[1] if "experts." in k: layer, expert = parse_layer_expert(k) index.setdefault(layer, {}).setdefault(expert, []).append(k) @@ -915,9 +921,9 @@ class HunyuanImage3Model(nn.Module): self.shared_tensor = None self.moe_lru = moe_lru self.additional_layers_set = False + LazyMoELoader.dtype = dtype + LazyMoELoader.operations = operations self.moe_loader = LazyMoELoader(self.moe_lru, self.config) - self.moe_loader.operations = operations - self.moe_loader.dtype = dtype def forward( self, diff --git a/comfy/supported_models.py b/comfy/supported_models.py index c9bd19363..aabb67998 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -1337,20 +1337,20 @@ class HunyuanImage3(supported_models_base.BASE): "image_model": "hunyuan_image_3", } latent_format = latent_formats.HunyuanImage3 + text_encoder_key_prefix = ["text_encoders."] def get_model(self, state_dict, prefix="", device=None): - self.wte_sd = state_dict["model.model.wte"] - state_dict.pop("model.model.wte", None) + self.wte_sd = state_dict["model.model.wte.weight"] model = model_base.HunyuanImage3(self, device = device) + state_dict["text_encoders.transformer.wte.weight"] = self.wte_sd temp_tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer() model.encode_tok = temp_tokenizer.tokenizer.convert_tokens_to_ids model.special_tok = temp_tokenizer.tokenizer.added_tokens_encoder return model + def clip_target(self, state_dict={}): - clip = comfy.text_encoders.hunyuan_image.HunyuanImage3 - clip.embed_wte = self.wte_sd return supported_models_base.ClipTarget(comfy.text_encoders.hunyuan_image.HunyuanImage3Tokenizer, comfy.text_encoders.hunyuan_image.HunyuanImage3) class HunyuanImage21(HunyuanVideo): diff --git a/comfy/text_encoders/hunyuan3_tokenizer/special_tokens_map.json b/comfy/text_encoders/hunyuan3_tokenizer/special_tokens_map.json new file mode 100644 index 000000000..63c0a4d2f --- /dev/null +++ b/comfy/text_encoders/hunyuan3_tokenizer/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/comfy/text_encoders/hunyuan3_tokenizer/tokenizer.json b/comfy/text_encoders/hunyuan3_tokenizer/tokenizer.json new file mode 100644 index 000000000..b7b031391 --- /dev/null +++ b/comfy/text_encoders/hunyuan3_tokenizer/tokenizer.json @@ -0,0 +1,1204368 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 127957, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127958, + "content": "<|startoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127959, + "content": "<|bos|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127960, + "content": "<|eos|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127961, + "content": "<|pad|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127962, + "content": "<|extra_0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127963, + "content": "<|extra_1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127964, + "content": "<|extra_2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127965, + "content": "<|extra_3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127966, + "content": "<|extra_4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127967, + "content": "<|extra_5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127968, + "content": "<|extra_6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127969, + "content": "<|extra_7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127970, + "content": "<|extra_8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127971, + "content": "<|extra_9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127972, + "content": "<|extra_10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127973, + "content": "<|extra_11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127974, + "content": "<|extra_12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127975, + "content": "<|extra_13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127976, + "content": "<|extra_14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127977, + "content": "<|extra_15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127978, + "content": "<|extra_16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127979, + "content": "<|extra_17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127980, + "content": "<|extra_18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127981, + "content": "<|extra_19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127982, + "content": "<|extra_20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127983, + "content": "<|extra_21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127984, + "content": "<|extra_22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127985, + "content": "<|extra_23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127986, + "content": "<|extra_24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127987, + "content": "<|extra_25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127988, + "content": "<|extra_26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127989, + "content": "<|extra_27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127990, + "content": "<|extra_28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127991, + "content": "<|extra_29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127992, + "content": "<|extra_30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127993, + "content": "<|extra_31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127994, + "content": "<|extra_32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127995, + "content": "<|extra_33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127996, + "content": "<|extra_34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127997, + "content": "<|extra_35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127998, + "content": "<|extra_36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127999, + "content": "<|extra_37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128000, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128001, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128002, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128003, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128004, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128005, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128006, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128007, + "content": "