diff --git a/comfy/ldm/models/autoencoder.py b/comfy/ldm/models/autoencoder.py index bd698621c..fb6576ec7 100644 --- a/comfy/ldm/models/autoencoder.py +++ b/comfy/ldm/models/autoencoder.py @@ -53,6 +53,8 @@ class AutoencoderKL(torch.nn.Module): if path.lower().endswith(".safetensors"): import safetensors.torch sd = safetensors.torch.load_file(path, device="cpu") + elif path.lower().endswith(".pth") or path.lower().endswith(".pt"): + sd = torch.load(path, map_location='cpu') else: sd = torch.load(path, map_location="cpu")["state_dict"] keys = list(sd.keys()) diff --git a/comfy/sd.py b/comfy/sd.py index 2d7ff5ab0..bfca15f88 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -767,15 +767,23 @@ def load_style_model(ckpt_path): return StyleModel(model) -def load_clip(ckpt_path, embedding_directory=None): - clip_data = utils.load_torch_file(ckpt_path) - config = {} - if "text_model.encoder.layers.22.mlp.fc1.weight" in clip_data: - config['target'] = 'ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' +def load_clip(ckpt_path, version = None, embedding_directory=None): + if version is not None: + assert version in ("openai/clip-vit-large-patch14", ) + config = {} + if version == "openai/clip-vit-large-patch14": + config['target'] = 'ldm.modules.encoders.modules.FrozenCLIPEmbedder' + config["params"] = {"textmodel_path": version} + clip = CLIP(config=config, embedding_directory=embedding_directory) else: - config['target'] = 'ldm.modules.encoders.modules.FrozenCLIPEmbedder' - clip = CLIP(config=config, embedding_directory=embedding_directory) - clip.load_from_state_dict(clip_data) + clip_data = utils.load_torch_file(ckpt_path) + config = {} + if "text_model.encoder.layers.22.mlp.fc1.weight" in clip_data: + config['target'] = 'ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder' + else: + config['target'] = 'ldm.modules.encoders.modules.FrozenCLIPEmbedder' + clip = CLIP(config=config, embedding_directory=embedding_directory) + clip.load_from_state_dict(clip_data) return clip def load_checkpoint(config_path, ckpt_path, output_vae=True, output_clip=True, embedding_directory=None): diff --git a/nodes.py b/nodes.py index ffbba9f94..b1af0871b 100644 --- a/nodes.py +++ b/nodes.py @@ -373,6 +373,20 @@ class CLIPLoader: clip = comfy.sd.load_clip(ckpt_path=clip_path, embedding_directory=folder_paths.get_folder_paths("embeddings")) return (clip,) +class CLIPVersionLoader: + @classmethod + def INPUT_TYPES(s): + return {"required": { "clip_version": (["openai/clip-vit-large-patch14"], ), + }} + RETURN_TYPES = ("CLIP",) + FUNCTION = "load_clip" + + CATEGORY = "loaders" + + def load_clip(self, clip_version): + clip = comfy.sd.load_clip(ckpt_path=None, version=clip_version, embedding_directory=folder_paths.get_folder_paths("embeddings")) + return (clip,) + class CLIPVisionLoader: @classmethod def INPUT_TYPES(s): @@ -1065,6 +1079,7 @@ NODE_CLASS_MAPPINGS = { "LatentCrop": LatentCrop, "LoraLoader": LoraLoader, "CLIPLoader": CLIPLoader, + "CLIPVersionLoader": CLIPVersionLoader, "CLIPVisionEncode": CLIPVisionEncode, "StyleModelApply": StyleModelApply, "unCLIPConditioning": unCLIPConditioning,