diff --git a/comfy/supported_models.py b/comfy/supported_models.py index c96168b84..33b268ac1 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -27,6 +27,7 @@ import comfy.text_encoders.anima import comfy.text_encoders.ace15 import comfy.text_encoders.longcat_image import comfy.text_encoders.ernie +import comfy.text_encoders.cogvideo from . import supported_models_base from . import latent_formats @@ -1811,11 +1812,7 @@ class CogVideoX_T2V(supported_models_base.BASE): return out def clip_target(self, state_dict={}): - class CogVideoXT5Tokenizer(comfy.text_encoders.sd3_clip.T5XXLTokenizer): - def __init__(self, embedding_directory=None, tokenizer_data={}): - super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, min_length=226) - - return supported_models_base.ClipTarget(CogVideoXT5Tokenizer, comfy.text_encoders.sd3_clip.T5XXLModel) + return supported_models_base.ClipTarget(comfy.text_encoders.cogvideo.CogVideoXT5Tokenizer, comfy.text_encoders.sd3_clip.T5XXLModel) class CogVideoX_I2V(CogVideoX_T2V): unet_config = { diff --git a/comfy/text_encoders/cogvideo.py b/comfy/text_encoders/cogvideo.py new file mode 100644 index 000000000..f1e8e3f5d --- /dev/null +++ b/comfy/text_encoders/cogvideo.py @@ -0,0 +1,6 @@ +import comfy.text_encoders.sd3_clip + + +class CogVideoXT5Tokenizer(comfy.text_encoders.sd3_clip.T5XXLTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, min_length=226)