From 3e54f9da364d97119af93385a590f7b5e380232f Mon Sep 17 00:00:00 2001 From: doctorpangloss <@hiddenswitch.com> Date: Tue, 20 Aug 2024 23:00:12 -0700 Subject: [PATCH] Fix torch_dtype issues, missing DualCLIPLoader known model support --- comfy/model_downloader.py | 8 +- comfy/nodes/base_nodes.py | 14 +- comfy_extras/nodes/nodes_language.py | 4 + comfy_extras/nodes/nodes_open_api.py | 2 +- comfy_extras/nodes/nodes_post_processing.py | 2 +- tests/inference/workflows/flux-0.json | 175 ++++++++++++++++++ .../workflows/sdxl-union-controlnet-1.json | 99 ++++++++-- 7 files changed, 278 insertions(+), 26 deletions(-) create mode 100644 tests/inference/workflows/flux-0.json diff --git a/comfy/model_downloader.py b/comfy/model_downloader.py index c20c83246..2ab57ab31 100644 --- a/comfy/model_downloader.py +++ b/comfy/model_downloader.py @@ -424,10 +424,10 @@ KNOWN_UNET_MODELS: Final[KnownDownloadables] = KnownDownloadables([ KNOWN_CLIP_MODELS: Final[KnownDownloadables] = KnownDownloadables([ # todo: is this correct? - HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp16.safetensors", save_with_filename="t5xxl_fp16.safetensors"), - HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/t5xxl_fp8_e4m3fn.safetensors", save_with_filename="t5xxl_fp8_e4m3fn.safetensors"), - HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors", save_with_filename="clip_g.safetensors"), - HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_l.safetensors", save_with_filename="clip_l.safetensors"), + HuggingFile("comfyanonymous/flux_text_encoders", "t5xxl_fp16.safetensors"), + HuggingFile("comfyanonymous/flux_text_encoders", "t5xxl_fp8_e4m3fn.safetensors"), + HuggingFile("stabilityai/stable-diffusion-3-medium", "text_encoders/clip_g.safetensors"), + HuggingFile("comfyanonymous/flux_text_encoders", "clip_l.safetensors", save_with_filename="clip_l.safetensors"), # uses names from https://comfyanonymous.github.io/ComfyUI_examples/audio/ HuggingFile("google-t5/t5-base", "model.safetensors", save_with_filename="t5_base.safetensors"), ], folder_name="clip") diff --git a/comfy/nodes/base_nodes.py b/comfy/nodes/base_nodes.py index 0e7a67feb..da2209133 100644 --- a/comfy/nodes/base_nodes.py +++ b/comfy/nodes/base_nodes.py @@ -643,7 +643,7 @@ class LoraLoaderModelOnly(LoraLoader): @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "lora_name": (folder_paths.get_filename_list("loras"), ), + "lora_name": (get_filename_list_with_downloadable("loras"), ), "strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}), }} RETURN_TYPES = ("MODEL",) @@ -901,8 +901,8 @@ class CLIPLoader: class DualCLIPLoader: @classmethod def INPUT_TYPES(s): - return {"required": { "clip_name1": (folder_paths.get_filename_list("clip"),), "clip_name2": ( - folder_paths.get_filename_list("clip"),), + return {"required": { "clip_name1": (get_filename_list_with_downloadable("clip"),), "clip_name2": ( + get_filename_list_with_downloadable("clip"),), "type": (["sdxl", "sd3", "flux"], ), }} RETURN_TYPES = ("CLIP",) @@ -911,8 +911,8 @@ class DualCLIPLoader: CATEGORY = "advanced/loaders" def load_clip(self, clip_name1, clip_name2, type): - clip_path1 = folder_paths.get_full_path("clip", clip_name1) - clip_path2 = folder_paths.get_full_path("clip", clip_name2) + clip_path1 = get_or_download("clip", clip_name1) + clip_path2 = get_or_download("clip", clip_name2) if type == "sdxl": clip_type = sd.CLIPType.STABLE_DIFFUSION elif type == "sd3": @@ -958,7 +958,7 @@ class CLIPVisionEncode: class StyleModelLoader: @classmethod def INPUT_TYPES(s): - return {"required": { "style_model_name": (folder_paths.get_filename_list("style_models"),)}} + return {"required": { "style_model_name": (get_filename_list_with_downloadable("style_models"),)}} RETURN_TYPES = ("STYLE_MODEL",) FUNCTION = "load_style_model" @@ -966,7 +966,7 @@ class StyleModelLoader: CATEGORY = "loaders" def load_style_model(self, style_model_name): - style_model_path = folder_paths.get_full_path("style_models", style_model_name) + style_model_path = get_or_download("style_models", style_model_name) style_model = sd.load_style_model(style_model_path) return (style_model,) diff --git a/comfy_extras/nodes/nodes_language.py b/comfy_extras/nodes/nodes_language.py index 39bc8dab5..0bf00b6f0 100644 --- a/comfy_extras/nodes/nodes_language.py +++ b/comfy_extras/nodes/nodes_language.py @@ -294,6 +294,8 @@ class TransformersLoader(CustomNode): raise exc_info else: logging.warning(f"tried to import transformers model {ckpt_name} but got exception when trying additional import args {props}", exc_info=exc_info) + finally: + torch.set_default_dtype(torch.float32) for i, props in enumerate(kwargs_to_try): try: @@ -311,6 +313,8 @@ class TransformersLoader(CustomNode): except Exception as exc_info: if i == len(kwargs_to_try) - 1: raise exc_info + finally: + torch.set_default_dtype(torch.float32) if model_management.xformers_enabled() and hasattr(model, "enable_xformers_memory_efficient_attention"): model.enable_xformers_memory_efficient_attention() diff --git a/comfy_extras/nodes/nodes_open_api.py b/comfy_extras/nodes/nodes_open_api.py index 7d71b8648..9b7b7d711 100644 --- a/comfy_extras/nodes/nodes_open_api.py +++ b/comfy_extras/nodes/nodes_open_api.py @@ -562,7 +562,7 @@ class SaveImagesResponse(CustomNode): exif_inst: ExifContainer for batch_number, (image, uri, metadata_uri, local_uri, exif_inst) in enumerate(zip(images, uris, metadata_uris, local_uris, exif)): - image_as_numpy_array: np.ndarray = 255. * image.cpu().numpy() + image_as_numpy_array: np.ndarray = 255. * image.float().cpu().numpy() image_as_numpy_array = np.ascontiguousarray(np.clip(image_as_numpy_array, 0, 255).astype(np.uint8)) image_as_pil: PIL.Image = Image.fromarray(image_as_numpy_array) diff --git a/comfy_extras/nodes/nodes_post_processing.py b/comfy_extras/nodes/nodes_post_processing.py index 19b04e747..215ade3d2 100644 --- a/comfy_extras/nodes/nodes_post_processing.py +++ b/comfy_extras/nodes/nodes_post_processing.py @@ -161,7 +161,7 @@ class Quantize: result.add_(tiled_matrix[:result.shape[0],:result.shape[1]]).clamp_(0, 255) result = result.to(dtype=torch.uint8) - im = Image.fromarray(result.cpu().numpy()) + im = Image.fromarray(result.float().cpu().numpy()) im = im.quantize(palette=pal_im, dither=Image.Dither.NONE) return im diff --git a/tests/inference/workflows/flux-0.json b/tests/inference/workflows/flux-0.json new file mode 100644 index 000000000..494e5294f --- /dev/null +++ b/tests/inference/workflows/flux-0.json @@ -0,0 +1,175 @@ +{ + "1": { + "inputs": { + "noise": [ + "2", + 0 + ], + "guider": [ + "3", + 0 + ], + "sampler": [ + "6", + 0 + ], + "sigmas": [ + "7", + 0 + ], + "latent_image": [ + "9", + 0 + ] + }, + "class_type": "SamplerCustomAdvanced", + "_meta": { + "title": "SamplerCustomAdvanced" + } + }, + "2": { + "inputs": { + "noise_seed": 1038979118098399 + }, + "class_type": "RandomNoise", + "_meta": { + "title": "RandomNoise" + } + }, + "3": { + "inputs": { + "model": [ + "12", + 0 + ], + "conditioning": [ + "4", + 0 + ] + }, + "class_type": "BasicGuider", + "_meta": { + "title": "BasicGuider" + } + }, + "4": { + "inputs": { + "guidance": 3, + "conditioning": [ + "13", + 0 + ] + }, + "class_type": "FluxGuidance", + "_meta": { + "title": "FluxGuidance" + } + }, + "6": { + "inputs": { + "sampler_name": "euler" + }, + "class_type": "KSamplerSelect", + "_meta": { + "title": "KSamplerSelect" + } + }, + "7": { + "inputs": { + "scheduler": "ddim_uniform", + "steps": 1, + "denoise": 1, + "model": [ + "12", + 0 + ] + }, + "class_type": "BasicScheduler", + "_meta": { + "title": "BasicScheduler" + } + }, + "9": { + "inputs": { + "width": 1344, + "height": 768, + "batch_size": 1 + }, + "class_type": "EmptySD3LatentImage", + "_meta": { + "title": "EmptySD3LatentImage" + } + }, + "10": { + "inputs": { + "samples": [ + "1", + 0 + ], + "vae": [ + "11", + 0 + ] + }, + "class_type": "VAEDecode", + "_meta": { + "title": "VAE Decode" + } + }, + "11": { + "inputs": { + "vae_name": "ae.safetensors" + }, + "class_type": "VAELoader", + "_meta": { + "title": "Load VAE" + } + }, + "12": { + "inputs": { + "unet_name": "flux1-dev.safetensors", + "weight_dtype": "default" + }, + "class_type": "UNETLoader", + "_meta": { + "title": "Load Diffusion Model" + } + }, + "13": { + "inputs": { + "text": "A plastic Barbie doll is walking along Sunset Boulevard. Here is a list of essential elements of it:\n\nArt Deco and Streamline Moderne buildings from the 1920s and 1930s.\nThe Sunset Tower Hotel: A striking Art Deco landmark with a pale pink facade and stepped design.\nChateau Marmont: A Gothic-style castle-like hotel with white stucco walls and red tile roof.\nNumerous billboards and large advertisements, often for upcoming films or TV shows.\nPalm trees lining portions of the street", + "clip": [ + "15", + 0 + ] + }, + "class_type": "CLIPTextEncode", + "_meta": { + "title": "CLIP Text Encode (Prompt)" + } + }, + "15": { + "inputs": { + "clip_name1": "clip_l.safetensors", + "clip_name2": "t5xxl_fp16.safetensors", + "type": "flux" + }, + "class_type": "DualCLIPLoader", + "_meta": { + "title": "DualCLIPLoader" + } + }, + "16": { + "inputs": { + "filename_prefix": "ComfyUI", + "images": [ + "10", + 0 + ] + }, + "class_type": "SaveImage", + "_meta": { + "title": "Save Image" + } + } +} \ No newline at end of file diff --git a/tests/inference/workflows/sdxl-union-controlnet-1.json b/tests/inference/workflows/sdxl-union-controlnet-1.json index 93db48c1b..3d1705862 100644 --- a/tests/inference/workflows/sdxl-union-controlnet-1.json +++ b/tests/inference/workflows/sdxl-union-controlnet-1.json @@ -160,19 +160,6 @@ "title": "VAE Decode" } }, - "15": { - "inputs": { - "filename_prefix": "ComfyUI", - "images": [ - "14", - 0 - ] - }, - "class_type": "SaveImage", - "_meta": { - "title": "Save Image" - } - }, "17": { "inputs": { "value": "https://upload.wikimedia.org/wikipedia/commons/5/5a/Gibson_Girl.png", @@ -298,5 +285,91 @@ "_meta": { "title": "SetUnionControlNetType" } + }, + "29": { + "inputs": { + "pil_save_format": "png", + "name": "", + "title": "", + "description": "", + "__required": true, + "images": [ + "14", + 0 + ], + "uris": [ + "30", + 0 + ], + "exif": [ + "32", + 0 + ] + }, + "class_type": "SaveImagesResponse", + "_meta": { + "title": "SaveImagesResponse" + } + }, + "30": { + "inputs": { + "prefix": "ComfyUI_", + "suffix": "_.png", + "images": [ + "14", + 0 + ] + }, + "class_type": "LegacyOutputURIs", + "_meta": { + "title": "LegacyOutputURIs" + } + }, + "31": { + "inputs": { + "CreationDate": "", + "Title": "Test Title", + "Description": "Test Description", + "Artist": "Artist", + "ImageNumber": "", + "Rating": "", + "UserComment": "Test Comment", + "images": [ + "14", + 0 + ] + }, + "class_type": "ImageExif", + "_meta": { + "title": "ImageExif" + } + }, + "32": { + "inputs": { + "value0": [ + "31", + 0 + ], + "value1": [ + "33", + 0 + ] + }, + "class_type": "ImageExifMerge", + "_meta": { + "title": "ImageExifMerge" + } + }, + "33": { + "inputs": { + "images": [ + "14", + 0 + ] + }, + "class_type": "ImageExifCreationDateAndBatchNumber", + "_meta": { + "title": "ImageExifCreationDateAndBatchNumber" + } } } \ No newline at end of file