Fix torch_dtype issues, missing DualCLIPLoader known model support

2026-03-03 08:17:33 +08:00 · 2024-08-20 23:00:12 -07:00 · 2024-08-20 23:00:12 -07:00 · 3e54f9da36
commit 3e54f9da36
parent 540c43fae7
7 changed files with 278 additions and 26 deletions
--- a/comfy/model_downloader.py
+++ b/comfy/model_downloader.py
@ -424,10 +424,10 @@ KNOWN_UNET_MODELS: Final[KnownDownloadables] = KnownDownloadables([

 KNOWN_CLIP_MODELS: Final[KnownDownloadables] = KnownDownloadables([
    # todo: is this correct?
-    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp16.safetensors", save_with_filename="t5xxl_fp16.safetensors"),
-    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp8_e4m3fn.safetensors", save_with_filename="t5xxl_fp8_e4m3fn.safetensors"),
-    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors", save_with_filename="clip_g.safetensors"),
-    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_l.safetensors", save_with_filename="clip_l.safetensors"),
+    HuggingFile("comfyanonymous/flux_text_encoders", "t5xxl_fp16.safetensors"),
+    HuggingFile("comfyanonymous/flux_text_encoders", "t5xxl_fp8_e4m3fn.safetensors"),
+    HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors"),
+    HuggingFile("comfyanonymous/flux_text_encoders", "clip_l.safetensors", save_with_filename="clip_l.safetensors"),
    # uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/
    HuggingFile("google-t5/t5-base", "model.safetensors", save_with_filename="t5_base.safetensors"),
 ], folder_name="clip")
--- a/comfy/nodes/base_nodes.py
+++ b/comfy/nodes/base_nodes.py
@ -643,7 +643,7 @@ class LoraLoaderModelOnly(LoraLoader):
    @classmethod
    def INPUT_TYPES(s):
        return {"required": { "model": ("MODEL",),
-                              "lora_name": (folder_paths.get_filename_list("loras"), ),
+                              "lora_name": (get_filename_list_with_downloadable("loras"), ),
                              "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}),
                              }}
    RETURN_TYPES = ("MODEL",)
@ -901,8 +901,8 @@ class CLIPLoader:
 class DualCLIPLoader:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": { "clip_name1": (folder_paths.get_filename_list("clip"),), "clip_name2": (
-        folder_paths.get_filename_list("clip"),),
+        return {"required": { "clip_name1": (get_filename_list_with_downloadable("clip"),), "clip_name2": (
+            get_filename_list_with_downloadable("clip"),),
                              "type": (["sdxl", "sd3", "flux"], ),
                             }}
    RETURN_TYPES = ("CLIP",)
@ -911,8 +911,8 @@ class DualCLIPLoader:
    CATEGORY = "advanced/loaders"

    def load_clip(self, clip_name1, clip_name2, type):
-        clip_path1 = folder_paths.get_full_path("clip", clip_name1)
-        clip_path2 = folder_paths.get_full_path("clip", clip_name2)
+        clip_path1 = get_or_download("clip", clip_name1)
+        clip_path2 = get_or_download("clip", clip_name2)
        if type == "sdxl":
            clip_type = sd.CLIPType.STABLE_DIFFUSION
        elif type == "sd3":
@ -958,7 +958,7 @@ class CLIPVisionEncode:
 class StyleModelLoader:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": { "style_model_name": (folder_paths.get_filename_list("style_models"),)}}
+        return {"required": { "style_model_name": (get_filename_list_with_downloadable("style_models"),)}}

    RETURN_TYPES = ("STYLE_MODEL",)
    FUNCTION = "load_style_model"
@ -966,7 +966,7 @@ class StyleModelLoader:
    CATEGORY = "loaders"

    def load_style_model(self, style_model_name):
-        style_model_path = folder_paths.get_full_path("style_models", style_model_name)
+        style_model_path = get_or_download("style_models", style_model_name)
        style_model = sd.load_style_model(style_model_path)
        return (style_model,)

--- a/comfy_extras/nodes/nodes_language.py
+++ b/comfy_extras/nodes/nodes_language.py
@ -294,6 +294,8 @@ class TransformersLoader(CustomNode):
                        raise exc_info
                    else:
                        logging.warning(f"tried to import transformers model {ckpt_name} but got exception when trying additional import args {props}", exc_info=exc_info)
+                finally:
+                    torch.set_default_dtype(torch.float32)

            for i, props in enumerate(kwargs_to_try):
                try:
@ -311,6 +313,8 @@ class TransformersLoader(CustomNode):
                except Exception as exc_info:
                    if i == len(kwargs_to_try) - 1:
                        raise exc_info
+                finally:
+                    torch.set_default_dtype(torch.float32)

        if model_management.xformers_enabled() and hasattr(model, "enable_xformers_memory_efficient_attention"):
            model.enable_xformers_memory_efficient_attention()
--- a/comfy_extras/nodes/nodes_open_api.py
+++ b/comfy_extras/nodes/nodes_open_api.py
@ -562,7 +562,7 @@ class SaveImagesResponse(CustomNode):

        exif_inst: ExifContainer
        for batch_number, (image, uri, metadata_uri, local_uri, exif_inst) in enumerate(zip(images, uris, metadata_uris, local_uris, exif)):
-            image_as_numpy_array: np.ndarray = 255. * image.cpu().numpy()
+            image_as_numpy_array: np.ndarray = 255. * image.float().cpu().numpy()
            image_as_numpy_array = np.ascontiguousarray(np.clip(image_as_numpy_array, 0, 255).astype(np.uint8))
            image_as_pil: PIL.Image = Image.fromarray(image_as_numpy_array)

--- a/comfy_extras/nodes/nodes_post_processing.py
+++ b/comfy_extras/nodes/nodes_post_processing.py
@ -161,7 +161,7 @@ class Quantize:
        result.add_(tiled_matrix[:result.shape[0],:result.shape[1]]).clamp_(0, 255)
        result = result.to(dtype=torch.uint8)

-        im = Image.fromarray(result.cpu().numpy())
+        im = Image.fromarray(result.float().cpu().numpy())
        im = im.quantize(palette=pal_im, dither=Image.Dither.NONE)
        return im

--- a/tests/inference/workflows/flux-0.json
+++ b/tests/inference/workflows/flux-0.json
@ -0,0 +1,175 @@
+{
+  "1": {
+    "inputs": {
+      "noise": [
+        "2",
+        0
+      ],
+      "guider": [
+        "3",
+        0
+      ],
+      "sampler": [
+        "6",
+        0
+      ],
+      "sigmas": [
+        "7",
+        0
+      ],
+      "latent_image": [
+        "9",
+        0
+      ]
+    },
+    "class_type": "SamplerCustomAdvanced",
+    "_meta": {
+      "title": "SamplerCustomAdvanced"
+    }
+  },
+  "2": {
+    "inputs": {
+      "noise_seed": 1038979118098399
+    },
+    "class_type": "RandomNoise",
+    "_meta": {
+      "title": "RandomNoise"
+    }
+  },
+  "3": {
+    "inputs": {
+      "model": [
+        "12",
+        0
+      ],
+      "conditioning": [
+        "4",
+        0
+      ]
+    },
+    "class_type": "BasicGuider",
+    "_meta": {
+      "title": "BasicGuider"
+    }
+  },
+  "4": {
+    "inputs": {
+      "guidance": 3,
+      "conditioning": [
+        "13",
+        0
+      ]
+    },
+    "class_type": "FluxGuidance",
+    "_meta": {
+      "title": "FluxGuidance"
+    }
+  },
+  "6": {
+    "inputs": {
+      "sampler_name": "euler"
+    },
+    "class_type": "KSamplerSelect",
+    "_meta": {
+      "title": "KSamplerSelect"
+    }
+  },
+  "7": {
+    "inputs": {
+      "scheduler": "ddim_uniform",
+      "steps": 1,
+      "denoise": 1,
+      "model": [
+        "12",
+        0
+      ]
+    },
+    "class_type": "BasicScheduler",
+    "_meta": {
+      "title": "BasicScheduler"
+    }
+  },
+  "9": {
+    "inputs": {
+      "width": 1344,
+      "height": 768,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "10": {
+    "inputs": {
+      "samples": [
+        "1",
+        0
+      ],
+      "vae": [
+        "11",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "11": {
+    "inputs": {
+      "vae_name": "ae.safetensors"
+    },
+    "class_type": "VAELoader",
+    "_meta": {
+      "title": "Load VAE"
+    }
+  },
+  "12": {
+    "inputs": {
+      "unet_name": "flux1-dev.safetensors",
+      "weight_dtype": "default"
+    },
+    "class_type": "UNETLoader",
+    "_meta": {
+      "title": "Load Diffusion Model"
+    }
+  },
+  "13": {
+    "inputs": {
+      "text": "A plastic Barbie doll is walking along Sunset Boulevard. Here is a list of essential elements of it:\n\nArt Deco and Streamline Moderne buildings from the 1920s and 1930s.\nThe Sunset Tower Hotel: A striking Art Deco landmark with a pale pink facade and stepped design.\nChateau Marmont: A Gothic-style castle-like hotel with white stucco walls and red tile roof.\nNumerous billboards and large advertisements, often for upcoming films or TV shows.\nPalm trees lining portions of the street",
+      "clip": [
+        "15",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Prompt)"
+    }
+  },
+  "15": {
+    "inputs": {
+      "clip_name1": "clip_l.safetensors",
+      "clip_name2": "t5xxl_fp16.safetensors",
+      "type": "flux"
+    },
+    "class_type": "DualCLIPLoader",
+    "_meta": {
+      "title": "DualCLIPLoader"
+    }
+  },
+  "16": {
+    "inputs": {
+      "filename_prefix": "ComfyUI",
+      "images": [
+        "10",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  }
+}
--- a/tests/inference/workflows/sdxl-union-controlnet-1.json
+++ b/tests/inference/workflows/sdxl-union-controlnet-1.json
@ -160,19 +160,6 @@
      "title": "VAE Decode"
    }
  },
-  "15": {
-    "inputs": {
-      "filename_prefix": "ComfyUI",
-      "images": [
-        "14",
-        0
-      ]
-    },
-    "class_type": "SaveImage",
-    "_meta": {
-      "title": "Save Image"
-    }
-  },
  "17": {
    "inputs": {
      "value": "https://upload.wikimedia.org/wikipedia/commons/5/5a/Gibson_Girl.png",
@ -298,5 +285,91 @@
    "_meta": {
      "title": "SetUnionControlNetType"
    }
+  },
+  "29": {
+    "inputs": {
+      "pil_save_format": "png",
+      "name": "",
+      "title": "",
+      "description": "",
+      "__required": true,
+      "images": [
+        "14",
+        0
+      ],
+      "uris": [
+        "30",
+        0
+      ],
+      "exif": [
+        "32",
+        0
+      ]
+    },
+    "class_type": "SaveImagesResponse",
+    "_meta": {
+      "title": "SaveImagesResponse"
+    }
+  },
+  "30": {
+    "inputs": {
+      "prefix": "ComfyUI_",
+      "suffix": "_.png",
+      "images": [
+        "14",
+        0
+      ]
+    },
+    "class_type": "LegacyOutputURIs",
+    "_meta": {
+      "title": "LegacyOutputURIs"
+    }
+  },
+  "31": {
+    "inputs": {
+      "CreationDate": "",
+      "Title": "Test Title",
+      "Description": "Test Description",
+      "Artist": "Artist",
+      "ImageNumber": "",
+      "Rating": "",
+      "UserComment": "Test Comment",
+      "images": [
+        "14",
+        0
+      ]
+    },
+    "class_type": "ImageExif",
+    "_meta": {
+      "title": "ImageExif"
+    }
+  },
+  "32": {
+    "inputs": {
+      "value0": [
+        "31",
+        0
+      ],
+      "value1": [
+        "33",
+        0
+      ]
+    },
+    "class_type": "ImageExifMerge",
+    "_meta": {
+      "title": "ImageExifMerge"
+    }
+  },
+  "33": {
+    "inputs": {
+      "images": [
+        "14",
+        0
+      ]
+    },
+    "class_type": "ImageExifCreationDateAndBatchNumber",
+    "_meta": {
+      "title": "ImageExifCreationDateAndBatchNumber"
+    }
  }
 }