mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-05 19:12:41 +08:00
Fix mistral 3 tokenizer code failing on latest transformers version and other breakage. (#12095)
* Fix mistral 3 tokenizer code failing on latest transformers version. * Add requests to the requirements
This commit is contained in:
parent
7ee77ff038
commit
2129e7d278
@ -466,7 +466,7 @@ def load_embed(embedding_name, embedding_directory, embedding_size, embed_key=No
|
|||||||
return embed_out
|
return embed_out
|
||||||
|
|
||||||
class SDTokenizer:
|
class SDTokenizer:
|
||||||
def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, min_padding=None, pad_left=False, disable_weights=False, tokenizer_data={}, tokenizer_args={}):
|
def __init__(self, tokenizer_path=None, max_length=77, pad_with_end=True, embedding_directory=None, embedding_size=768, embedding_key='clip_l', tokenizer_class=CLIPTokenizer, has_start_token=True, has_end_token=True, pad_to_max_length=True, min_length=None, pad_token=None, end_token=None, start_token=None, min_padding=None, pad_left=False, disable_weights=False, tokenizer_data={}, tokenizer_args={}):
|
||||||
if tokenizer_path is None:
|
if tokenizer_path is None:
|
||||||
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer")
|
tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_tokenizer")
|
||||||
self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args)
|
self.tokenizer = tokenizer_class.from_pretrained(tokenizer_path, **tokenizer_args)
|
||||||
@ -479,8 +479,15 @@ class SDTokenizer:
|
|||||||
empty = self.tokenizer('')["input_ids"]
|
empty = self.tokenizer('')["input_ids"]
|
||||||
self.tokenizer_adds_end_token = has_end_token
|
self.tokenizer_adds_end_token = has_end_token
|
||||||
if has_start_token:
|
if has_start_token:
|
||||||
self.tokens_start = 1
|
if len(empty) > 0:
|
||||||
self.start_token = empty[0]
|
self.tokens_start = 1
|
||||||
|
self.start_token = empty[0]
|
||||||
|
else:
|
||||||
|
self.tokens_start = 0
|
||||||
|
self.start_token = start_token
|
||||||
|
if start_token is None:
|
||||||
|
logging.warning("WARNING: There's something wrong with your tokenizers.'")
|
||||||
|
|
||||||
if end_token is not None:
|
if end_token is not None:
|
||||||
self.end_token = end_token
|
self.end_token = end_token
|
||||||
else:
|
else:
|
||||||
@ -488,7 +495,7 @@ class SDTokenizer:
|
|||||||
self.end_token = empty[1]
|
self.end_token = empty[1]
|
||||||
else:
|
else:
|
||||||
self.tokens_start = 0
|
self.tokens_start = 0
|
||||||
self.start_token = None
|
self.start_token = start_token
|
||||||
if end_token is not None:
|
if end_token is not None:
|
||||||
self.end_token = end_token
|
self.end_token = end_token
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -118,7 +118,7 @@ class MistralTokenizerClass:
|
|||||||
class Mistral3Tokenizer(sd1_clip.SDTokenizer):
|
class Mistral3Tokenizer(sd1_clip.SDTokenizer):
|
||||||
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
def __init__(self, embedding_directory=None, tokenizer_data={}):
|
||||||
self.tekken_data = tokenizer_data.get("tekken_model", None)
|
self.tekken_data = tokenizer_data.get("tekken_model", None)
|
||||||
super().__init__("", pad_with_end=False, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data)
|
super().__init__("", pad_with_end=False, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data)
|
||||||
|
|
||||||
def state_dict(self):
|
def state_dict(self):
|
||||||
return {"tekken_model": self.tekken_data}
|
return {"tekken_model": self.tekken_data}
|
||||||
|
|||||||
@ -22,6 +22,7 @@ alembic
|
|||||||
SQLAlchemy
|
SQLAlchemy
|
||||||
av>=14.2.0
|
av>=14.2.0
|
||||||
comfy-kitchen>=0.2.7
|
comfy-kitchen>=0.2.7
|
||||||
|
requests
|
||||||
|
|
||||||
#non essential dependencies:
|
#non essential dependencies:
|
||||||
kornia>=0.7.1
|
kornia>=0.7.1
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user