From dfa36e68552c2d115bbcbec5f8a45eb36fbd5814 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Mar 2025 13:31:40 -0500 Subject: [PATCH 01/77] Fix some things breaking when embeddings fail to apply. --- comfy/sd1_clip.py | 1 + 1 file changed, 1 insertion(+) diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 22adcbac9..be21ec18d 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -228,6 +228,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): if pad_extra > 0: padd_embed = self.transformer.get_input_embeddings()(torch.tensor([[self.special_tokens["pad"]] * pad_extra], device=device, dtype=torch.long), out_dtype=torch.float32) tokens_embed = torch.cat([tokens_embed, padd_embed], dim=1) + attention_mask = attention_mask + [0] * pad_extra embeds_out.append(tokens_embed) attention_masks.append(attention_mask) From a13125840c47c2342fa80aec8fdaee8626dff135 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 6 Mar 2025 13:53:48 -0500 Subject: [PATCH 02/77] ComfyUI version v0.3.24 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index ac257abf8..a68a65323 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.23" +__version__ = "0.3.24" diff --git a/pyproject.toml b/pyproject.toml index 824887a94..4c11c71bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.23" +version = "0.3.24" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" From 1650cda030daa32c9d12a5d92c02663bd076b071 Mon Sep 17 00:00:00 2001 From: "Dr.Lt.Data" <128333288+ltdrdata@users.noreply.github.com> Date: Fri, 7 Mar 2025 05:23:23 +0900 Subject: [PATCH 03/77] Fixed: Incorrect guide message for missing frontend. (#7105) `{sys.executable} -m pip` -> `{sys.executable} -s -m pip` https://github.com/comfyanonymous/ComfyUI/pull/7047#issuecomment-2697876793 --- app/frontend_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index e4d589209..9feb1e965 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -23,7 +23,7 @@ try: except ImportError: # TODO: Remove the check after roll out of 0.3.16 req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt')) - logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. Please install the updated requirements.txt file by running:\n{sys.executable} -m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem\n********** ERROR **********\n") + logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. Please install the updated requirements.txt file by running:\n{sys.executable} -s -m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem\n********** ERROR **********\n") exit(-1) From e62d72e8caaac32474a30096f426bc16b2fce679 Mon Sep 17 00:00:00 2001 From: JettHu <35261585+JettHu@users.noreply.github.com> Date: Fri, 7 Mar 2025 04:24:04 +0800 Subject: [PATCH 04/77] Typo in node_typing.py (#7092) --- comfy/comfy_types/node_typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index fe130567d..4967de716 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -114,7 +114,7 @@ class InputTypeOptions(TypedDict): # default: bool label_on: str """The label to use in the UI when the bool is True (``BOOLEAN``)""" - label_on: str + label_off: str """The label to use in the UI when the bool is False (``BOOLEAN``)""" # class InputTypeString(InputTypeOptions): # default: str From e1474150de36b5b6477ce42c2a2801577ad42fff Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 04:37:58 -0500 Subject: [PATCH 05/77] Support fp8_scaled diffusion models that don't use fp8 matrix mult. --- comfy/model_base.py | 2 +- comfy/model_detection.py | 4 ++++ comfy/ops.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index a304c58bd..2fa1ee911 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -108,7 +108,7 @@ class BaseModel(torch.nn.Module): if not unet_config.get("disable_unet_model_creation", False): if model_config.custom_operations is None: - fp8 = model_config.optimizations.get("fp8", model_config.scaled_fp8 is not None) + fp8 = model_config.optimizations.get("fp8", False) operations = comfy.ops.pick_operations(unet_config.get("dtype", None), self.manual_cast_dtype, fp8_optimizations=fp8, scaled_fp8=model_config.scaled_fp8) else: operations = model_config.custom_operations diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 1aef549f4..403da5855 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -471,6 +471,10 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_base_if_no_match=Fal model_config.scaled_fp8 = scaled_fp8_weight.dtype if model_config.scaled_fp8 == torch.float32: model_config.scaled_fp8 = torch.float8_e4m3fn + if scaled_fp8_weight.nelement() == 2: + model_config.optimizations["fp8"] = False + else: + model_config.optimizations["fp8"] = True return model_config diff --git a/comfy/ops.py b/comfy/ops.py index 358c6ec60..3303c6fcd 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -17,6 +17,7 @@ """ import torch +import logging import comfy.model_management from comfy.cli_args import args, PerformanceFeature import comfy.float @@ -308,6 +309,7 @@ class fp8_ops(manual_cast): return torch.nn.functional.linear(input, weight, bias) def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None): + logging.info("Using scaled fp8: fp8 matrix mult: {}, scale input: {}".format(fp8_matrix_mult, scale_input)) class scaled_fp8_op(manual_cast): class Linear(manual_cast.Linear): def __init__(self, *args, **kwargs): @@ -358,7 +360,7 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None): fp8_compute = comfy.model_management.supports_fp8_compute(load_device) if scaled_fp8 is not None: - return scaled_fp8_ops(fp8_matrix_mult=fp8_compute, scale_input=True, override_dtype=scaled_fp8) + return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=True, override_dtype=scaled_fp8) if ( fp8_compute and From 70e15fd743e85554f907cef164703fce1715cd7d Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 04:49:20 -0500 Subject: [PATCH 06/77] No need for scale_input when fp8 matrix mult is disabled. --- comfy/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/ops.py b/comfy/ops.py index 3303c6fcd..ced461011 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -360,7 +360,7 @@ def scaled_fp8_ops(fp8_matrix_mult=False, scale_input=False, override_dtype=None def pick_operations(weight_dtype, compute_dtype, load_device=None, disable_fast_fp8=False, fp8_optimizations=False, scaled_fp8=None): fp8_compute = comfy.model_management.supports_fp8_compute(load_device) if scaled_fp8 is not None: - return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=True, override_dtype=scaled_fp8) + return scaled_fp8_ops(fp8_matrix_mult=fp8_compute and fp8_optimizations, scale_input=fp8_optimizations, override_dtype=scaled_fp8) if ( fp8_compute and From 11b1f27cb17938bbb2f723f8d71ac78bb9f2e40f Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 04:52:36 -0500 Subject: [PATCH 07/77] Set WAN default compute dtype to fp16. --- comfy/supported_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index 7157a15f2..b4d7bfe20 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -931,7 +931,7 @@ class WAN21_T2V(supported_models_base.BASE): memory_usage_factor = 1.0 - supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + supported_inference_dtypes = [torch.float16, torch.bfloat16, torch.float32] vae_key_prefix = ["vae."] text_encoder_key_prefix = ["text_encoders."] From 4ab1875283ce985e77be7ffb4b499db11d937f73 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 07:45:40 -0500 Subject: [PATCH 08/77] Add .bat file to nightly package to run with fp16 accumulation. --- .../run_nvidia_gpu_fast_fp16_accumulation.bat | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat diff --git a/.ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat b/.ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat new file mode 100644 index 000000000..38f06ecb2 --- /dev/null +++ b/.ci/windows_nightly_base_files/run_nvidia_gpu_fast_fp16_accumulation.bat @@ -0,0 +1,2 @@ +.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation +pause From 5dbd25096513838785143c493b94e6c518e71c0b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 07:57:59 -0500 Subject: [PATCH 09/77] Update nightly instructions in readme. --- .github/workflows/windows_release_nightly_pytorch.yml | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index f90488705..cea9aae17 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -7,7 +7,7 @@ on: description: 'cuda version' required: true type: string - default: "126" + default: "128" python_minor: description: 'python minor version' @@ -19,7 +19,7 @@ on: description: 'python patch version' required: true type: string - default: "1" + default: "2" # push: # branches: # - master diff --git a/README.md b/README.md index 9190dd493..a807ea9d6 100644 --- a/README.md +++ b/README.md @@ -215,9 +215,9 @@ Nvidia users should install stable pytorch using this command: ```pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu126``` -This is the command to install pytorch nightly instead which might have performance improvements: +This is the command to install pytorch nightly instead which supports the new blackwell 50xx series GPUs and might have performance improvements. -```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126``` +```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128``` #### Troubleshooting From d60fe0af4ae3056edb8d05c585e06c5cb36bbbed Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 08:30:01 -0500 Subject: [PATCH 10/77] Reduce size of nightly package. --- .github/workflows/windows_release_nightly_pytorch.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index cea9aae17..49a9fd8bc 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -34,7 +34,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 + fetch-depth: 30 persist-credentials: false - uses: actions/setup-python@v5 with: @@ -56,7 +56,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + #cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable_nightly_pytorch mv python_embeded ComfyUI_windows_portable_nightly_pytorch @@ -74,7 +74,7 @@ jobs: pause" > ./update/update_comfyui_and_python_dependencies.bat cd .. - "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=8 -mfb=64 -md=32m -ms=on -mf=BCJ2 ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch + "C:\Program Files\7-Zip\7z.exe" a -t7z -m0=lzma2 -mx=9 -mfb=128 -md=512m -ms=on -mf=BCJ2 ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI_windows_portable_nightly_pytorch mv ComfyUI_windows_portable_nightly_pytorch.7z ComfyUI/ComfyUI_windows_portable_nvidia_or_cpu_nightly_pytorch.7z cd ComfyUI_windows_portable_nightly_pytorch From ebbb9201637a3bfdf96399396f636d8513dc7aa4 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 14:56:09 -0500 Subject: [PATCH 11/77] Add back taesd to nightly package. --- .github/workflows/windows_release_nightly_pytorch.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows_release_nightly_pytorch.yml b/.github/workflows/windows_release_nightly_pytorch.yml index 49a9fd8bc..24599249a 100644 --- a/.github/workflows/windows_release_nightly_pytorch.yml +++ b/.github/workflows/windows_release_nightly_pytorch.yml @@ -56,7 +56,7 @@ jobs: cd .. git clone --depth 1 https://github.com/comfyanonymous/taesd - #cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ + cp taesd/*.pth ./ComfyUI_copy/models/vae_approx/ mkdir ComfyUI_windows_portable_nightly_pytorch mv python_embeded ComfyUI_windows_portable_nightly_pytorch From 84cc9cb5287a6b0345b681174a8e85bd3ca41515 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 7 Mar 2025 19:02:13 -0500 Subject: [PATCH 12/77] Update frontend to 1.11.8 (#7119) * Update frontend to 1.11.7 * Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4ad5f3b8a..e1316ccff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.10.17 +comfyui-frontend-package==1.11.8 torch torchsde torchvision From c3d9cc4592310d22f414c93a7840b541f3a7b497 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 19:53:07 -0500 Subject: [PATCH 13/77] Print the frontend version in the log. --- app/frontend_management.py | 6 ++++++ main.py | 3 +++ 2 files changed, 9 insertions(+) diff --git a/app/frontend_management.py b/app/frontend_management.py index 9feb1e965..94293af1e 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -27,6 +27,12 @@ except ImportError: exit(-1) +try: + frontend_version = tuple(map(int, comfyui_frontend_package.__version__.split("."))) +except: + frontend_version = (0,) + pass + REQUEST_TIMEOUT = 10 # seconds diff --git a/main.py b/main.py index f6510c90a..57fa397e6 100644 --- a/main.py +++ b/main.py @@ -139,6 +139,7 @@ from server import BinaryEventTypes import nodes import comfy.model_management import comfyui_version +import app.frontend_management def cuda_malloc_warning(): @@ -295,6 +296,8 @@ def start_comfyui(asyncio_loop=None): if __name__ == "__main__": # Running directly, just start ComfyUI. logging.info("ComfyUI version: {}".format(comfyui_version.__version__)) + logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, app.frontend_management.frontend_version)))) + event_loop, _, start_all_func = start_comfyui() try: event_loop.run_until_complete(start_all_func()) From be4e760648e0234f9202b9cbe7dcfb3bd307acb9 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Mar 2025 19:56:11 -0500 Subject: [PATCH 14/77] Add an image_interleave option to the Hunyuan image to video encode node. See the tooltip for what it does. --- comfy/text_encoders/hunyuan_video.py | 28 +++++++++++++++++----------- comfy_extras/nodes_hunyuan.py | 5 +++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py index 1d814aadd..dbb259e54 100644 --- a/comfy/text_encoders/hunyuan_video.py +++ b/comfy/text_encoders/hunyuan_video.py @@ -42,7 +42,7 @@ class HunyuanVideoTokenizer: self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>""" # 95 tokens self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1) - def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, **kwargs): + def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, image_interleave=1, **kwargs): out = {} out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids) @@ -56,7 +56,7 @@ class HunyuanVideoTokenizer: for i in range(len(r)): if r[i][0] == 128257: if image_embeds is not None and embed_count < image_embeds.shape[0]: - r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image"},) + r[i][1:] + r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image", "image_interleave": image_interleave},) + r[i][1:] embed_count += 1 out["llama"] = llama_text_tokens return out @@ -92,10 +92,10 @@ class HunyuanVideoClipModel(torch.nn.Module): llama_out, llama_pooled, llama_extra_out = self.llama.encode_token_weights(token_weight_pairs_llama) template_end = 0 - image_start = None - image_end = None + extra_template_end = 0 extra_sizes = 0 user_end = 9999999999999 + images = [] tok_pairs = token_weight_pairs_llama[0] for i, v in enumerate(tok_pairs): @@ -112,22 +112,28 @@ class HunyuanVideoClipModel(torch.nn.Module): else: if elem.get("original_type") == "image": elem_size = elem.get("data").shape[0] - if image_start is None: + if template_end > 0: + if user_end == -1: + extra_template_end += elem_size - 1 + else: image_start = i + extra_sizes image_end = i + elem_size + extra_sizes - extra_sizes += elem_size - 1 + images.append((image_start, image_end, elem.get("image_interleave", 1))) + extra_sizes += elem_size - 1 if llama_out.shape[1] > (template_end + 2): if tok_pairs[template_end + 1][0] == 271: template_end += 2 - llama_output = llama_out[:, template_end + extra_sizes:user_end + extra_sizes] - llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end + extra_sizes:user_end + extra_sizes] + llama_output = llama_out[:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end] + llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end] if llama_extra_out["attention_mask"].sum() == torch.numel(llama_extra_out["attention_mask"]): llama_extra_out.pop("attention_mask") # attention mask is useless if no masked elements - if image_start is not None: - image_output = llama_out[:, image_start: image_end] - llama_output = torch.cat([image_output[:, ::2], llama_output], dim=1) + if len(images) > 0: + out = [] + for i in images: + out.append(llama_out[:, i[0]: i[1]: i[2]]) + llama_output = torch.cat(out + [llama_output], dim=1) l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l) return llama_output, l_pooled, llama_extra_out diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py index 4f700bbe6..56aef9b01 100644 --- a/comfy_extras/nodes_hunyuan.py +++ b/comfy_extras/nodes_hunyuan.py @@ -57,14 +57,15 @@ class TextEncodeHunyuanVideo_ImageToVideo: "clip": ("CLIP", ), "clip_vision_output": ("CLIP_VISION_OUTPUT", ), "prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}), + "image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "encode" CATEGORY = "advanced/conditioning" - def encode(self, clip, clip_vision_output, prompt): - tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected) + def encode(self, clip, clip_vision_output, prompt, image_interleave): + tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave) return (clip.encode_from_tokens_scheduled(tokens), ) From 29832b3b61591633d8f312f7df727c1bb8b4d9e4 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 8 Mar 2025 03:51:36 -0500 Subject: [PATCH 15/77] Warn if frontend package is older than the one in requirements.txt --- app/frontend_management.py | 10 ++++++++-- main.py | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index 94293af1e..308f71da6 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -18,12 +18,18 @@ from typing_extensions import NotRequired from comfy.cli_args import DEFAULT_VERSION_STRING +def frontend_install_warning_message(): + req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt')) + extra = "" + if sys.flags.no_user_site: + extra = "-s " + return f"Please install the updated requirements.txt file by running:\n{sys.executable} {extra}-m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem" + try: import comfyui_frontend_package except ImportError: # TODO: Remove the check after roll out of 0.3.16 - req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt')) - logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. Please install the updated requirements.txt file by running:\n{sys.executable} -s -m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem\n********** ERROR **********\n") + logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n") exit(-1) diff --git a/main.py b/main.py index 57fa397e6..6fa1cfb0f 100644 --- a/main.py +++ b/main.py @@ -293,14 +293,29 @@ def start_comfyui(asyncio_loop=None): return asyncio_loop, prompt_server, start_all +def warn_frontend_version(frontend_version): + try: + required_frontend = (0,) + req_path = os.path.join(os.path.dirname(__file__), 'requirements.txt') + with open(req_path, 'r') as f: + required_frontend = tuple(map(int, f.readline().split('=')[-1].split('.'))) + if frontend_version < required_frontend: + logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), app.frontend_management.frontend_install_warning_message())) + except: + pass + + if __name__ == "__main__": # Running directly, just start ComfyUI. logging.info("ComfyUI version: {}".format(comfyui_version.__version__)) - logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, app.frontend_management.frontend_version)))) + frontend_version = app.frontend_management.frontend_version + logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, frontend_version)))) event_loop, _, start_all_func = start_comfyui() try: - event_loop.run_until_complete(start_all_func()) + x = start_all_func() + warn_frontend_version(frontend_version) + event_loop.run_until_complete(x) except KeyboardInterrupt: logging.info("\nStopped server") From 0952569493f0f57a59a4a8aaad439949d9d4ef2e Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 8 Mar 2025 20:24:04 -0500 Subject: [PATCH 16/77] Fix stable cascade VAE on some lowvram machines. --- comfy/ldm/cascade/stage_a.py | 28 ++++++++++++++++------------ comfy/ldm/cascade/stage_c_coder.py | 25 ++++++++++++++----------- comfy/model_management.py | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/comfy/ldm/cascade/stage_a.py b/comfy/ldm/cascade/stage_a.py index ca8867eaf..145e6e69a 100644 --- a/comfy/ldm/cascade/stage_a.py +++ b/comfy/ldm/cascade/stage_a.py @@ -19,6 +19,10 @@ import torch from torch import nn from torch.autograd import Function +import comfy.ops + +ops = comfy.ops.disable_weight_init + class vector_quantize(Function): @staticmethod @@ -121,15 +125,15 @@ class ResBlock(nn.Module): self.norm1 = nn.LayerNorm(c, elementwise_affine=False, eps=1e-6) self.depthwise = nn.Sequential( nn.ReplicationPad2d(1), - nn.Conv2d(c, c, kernel_size=3, groups=c) + ops.Conv2d(c, c, kernel_size=3, groups=c) ) # channelwise self.norm2 = nn.LayerNorm(c, elementwise_affine=False, eps=1e-6) self.channelwise = nn.Sequential( - nn.Linear(c, c_hidden), + ops.Linear(c, c_hidden), nn.GELU(), - nn.Linear(c_hidden, c), + ops.Linear(c_hidden, c), ) self.gammas = nn.Parameter(torch.zeros(6), requires_grad=True) @@ -171,16 +175,16 @@ class StageA(nn.Module): # Encoder blocks self.in_block = nn.Sequential( nn.PixelUnshuffle(2), - nn.Conv2d(3 * 4, c_levels[0], kernel_size=1) + ops.Conv2d(3 * 4, c_levels[0], kernel_size=1) ) down_blocks = [] for i in range(levels): if i > 0: - down_blocks.append(nn.Conv2d(c_levels[i - 1], c_levels[i], kernel_size=4, stride=2, padding=1)) + down_blocks.append(ops.Conv2d(c_levels[i - 1], c_levels[i], kernel_size=4, stride=2, padding=1)) block = ResBlock(c_levels[i], c_levels[i] * 4) down_blocks.append(block) down_blocks.append(nn.Sequential( - nn.Conv2d(c_levels[-1], c_latent, kernel_size=1, bias=False), + ops.Conv2d(c_levels[-1], c_latent, kernel_size=1, bias=False), nn.BatchNorm2d(c_latent), # then normalize them to have mean 0 and std 1 )) self.down_blocks = nn.Sequential(*down_blocks) @@ -191,7 +195,7 @@ class StageA(nn.Module): # Decoder blocks up_blocks = [nn.Sequential( - nn.Conv2d(c_latent, c_levels[-1], kernel_size=1) + ops.Conv2d(c_latent, c_levels[-1], kernel_size=1) )] for i in range(levels): for j in range(bottleneck_blocks if i == 0 else 1): @@ -199,11 +203,11 @@ class StageA(nn.Module): up_blocks.append(block) if i < levels - 1: up_blocks.append( - nn.ConvTranspose2d(c_levels[levels - 1 - i], c_levels[levels - 2 - i], kernel_size=4, stride=2, + ops.ConvTranspose2d(c_levels[levels - 1 - i], c_levels[levels - 2 - i], kernel_size=4, stride=2, padding=1)) self.up_blocks = nn.Sequential(*up_blocks) self.out_block = nn.Sequential( - nn.Conv2d(c_levels[0], 3 * 4, kernel_size=1), + ops.Conv2d(c_levels[0], 3 * 4, kernel_size=1), nn.PixelShuffle(2), ) @@ -232,17 +236,17 @@ class Discriminator(nn.Module): super().__init__() d = max(depth - 3, 3) layers = [ - nn.utils.spectral_norm(nn.Conv2d(c_in, c_hidden // (2 ** d), kernel_size=3, stride=2, padding=1)), + nn.utils.spectral_norm(ops.Conv2d(c_in, c_hidden // (2 ** d), kernel_size=3, stride=2, padding=1)), nn.LeakyReLU(0.2), ] for i in range(depth - 1): c_in = c_hidden // (2 ** max((d - i), 0)) c_out = c_hidden // (2 ** max((d - 1 - i), 0)) - layers.append(nn.utils.spectral_norm(nn.Conv2d(c_in, c_out, kernel_size=3, stride=2, padding=1))) + layers.append(nn.utils.spectral_norm(ops.Conv2d(c_in, c_out, kernel_size=3, stride=2, padding=1))) layers.append(nn.InstanceNorm2d(c_out)) layers.append(nn.LeakyReLU(0.2)) self.encoder = nn.Sequential(*layers) - self.shuffle = nn.Conv2d((c_hidden + c_cond) if c_cond > 0 else c_hidden, 1, kernel_size=1) + self.shuffle = ops.Conv2d((c_hidden + c_cond) if c_cond > 0 else c_hidden, 1, kernel_size=1) self.logits = nn.Sigmoid() def forward(self, x, cond=None): diff --git a/comfy/ldm/cascade/stage_c_coder.py b/comfy/ldm/cascade/stage_c_coder.py index 0cb7c49fc..b467a70a8 100644 --- a/comfy/ldm/cascade/stage_c_coder.py +++ b/comfy/ldm/cascade/stage_c_coder.py @@ -19,6 +19,9 @@ import torch import torchvision from torch import nn +import comfy.ops + +ops = comfy.ops.disable_weight_init # EfficientNet class EfficientNetEncoder(nn.Module): @@ -26,7 +29,7 @@ class EfficientNetEncoder(nn.Module): super().__init__() self.backbone = torchvision.models.efficientnet_v2_s().features.eval() self.mapper = nn.Sequential( - nn.Conv2d(1280, c_latent, kernel_size=1, bias=False), + ops.Conv2d(1280, c_latent, kernel_size=1, bias=False), nn.BatchNorm2d(c_latent, affine=False), # then normalize them to have mean 0 and std 1 ) self.mean = nn.Parameter(torch.tensor([0.485, 0.456, 0.406])) @@ -34,7 +37,7 @@ class EfficientNetEncoder(nn.Module): def forward(self, x): x = x * 0.5 + 0.5 - x = (x - self.mean.view([3,1,1])) / self.std.view([3,1,1]) + x = (x - self.mean.view([3,1,1]).to(device=x.device, dtype=x.dtype)) / self.std.view([3,1,1]).to(device=x.device, dtype=x.dtype) o = self.mapper(self.backbone(x)) return o @@ -44,39 +47,39 @@ class Previewer(nn.Module): def __init__(self, c_in=16, c_hidden=512, c_out=3): super().__init__() self.blocks = nn.Sequential( - nn.Conv2d(c_in, c_hidden, kernel_size=1), # 16 channels to 512 channels + ops.Conv2d(c_in, c_hidden, kernel_size=1), # 16 channels to 512 channels nn.GELU(), nn.BatchNorm2d(c_hidden), - nn.Conv2d(c_hidden, c_hidden, kernel_size=3, padding=1), + ops.Conv2d(c_hidden, c_hidden, kernel_size=3, padding=1), nn.GELU(), nn.BatchNorm2d(c_hidden), - nn.ConvTranspose2d(c_hidden, c_hidden // 2, kernel_size=2, stride=2), # 16 -> 32 + ops.ConvTranspose2d(c_hidden, c_hidden // 2, kernel_size=2, stride=2), # 16 -> 32 nn.GELU(), nn.BatchNorm2d(c_hidden // 2), - nn.Conv2d(c_hidden // 2, c_hidden // 2, kernel_size=3, padding=1), + ops.Conv2d(c_hidden // 2, c_hidden // 2, kernel_size=3, padding=1), nn.GELU(), nn.BatchNorm2d(c_hidden // 2), - nn.ConvTranspose2d(c_hidden // 2, c_hidden // 4, kernel_size=2, stride=2), # 32 -> 64 + ops.ConvTranspose2d(c_hidden // 2, c_hidden // 4, kernel_size=2, stride=2), # 32 -> 64 nn.GELU(), nn.BatchNorm2d(c_hidden // 4), - nn.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1), + ops.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1), nn.GELU(), nn.BatchNorm2d(c_hidden // 4), - nn.ConvTranspose2d(c_hidden // 4, c_hidden // 4, kernel_size=2, stride=2), # 64 -> 128 + ops.ConvTranspose2d(c_hidden // 4, c_hidden // 4, kernel_size=2, stride=2), # 64 -> 128 nn.GELU(), nn.BatchNorm2d(c_hidden // 4), - nn.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1), + ops.Conv2d(c_hidden // 4, c_hidden // 4, kernel_size=3, padding=1), nn.GELU(), nn.BatchNorm2d(c_hidden // 4), - nn.Conv2d(c_hidden // 4, c_out, kernel_size=1), + ops.Conv2d(c_hidden // 4, c_out, kernel_size=1), ) def forward(self, x): diff --git a/comfy/model_management.py b/comfy/model_management.py index bc90e3dff..3a4c93e30 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -581,7 +581,7 @@ def load_models_gpu(models, memory_required=0, force_patch_weights=False, minimu loaded_memory = loaded_model.model_loaded_memory() current_free_mem = get_free_memory(torch_dev) + loaded_memory - lowvram_model_memory = max(64 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory())) + lowvram_model_memory = max(128 * 1024 * 1024, (current_free_mem - minimum_memory_required), min(current_free_mem * MIN_WEIGHT_MEMORY_RATIO, current_free_mem - minimum_inference_memory())) lowvram_model_memory = max(0.1, lowvram_model_memory - loaded_memory) if vram_set_state == VRAMState.NO_VRAM: From 7395b0c0d1ae8ed8867b78135ddc5436deaeaaa4 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 8 Mar 2025 20:25:14 -0500 Subject: [PATCH 17/77] Support new hunyuan video i2v model. Use the new "v2 (replace)" guidance type in HunyuanImageToVideo and set image_interleave to 4 on the "Text Encode Hunyuan Video" node. --- comfy/ldm/flux/layers.py | 47 ++++++++++++++++++++++---------- comfy/ldm/hunyuan_video/model.py | 21 ++++++++++---- comfy/model_base.py | 11 ++++++++ comfy_extras/nodes_hunyuan.py | 17 +++++++++--- 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/comfy/ldm/flux/layers.py b/comfy/ldm/flux/layers.py index 59a62e0df..1b3e9f313 100644 --- a/comfy/ldm/flux/layers.py +++ b/comfy/ldm/flux/layers.py @@ -105,7 +105,9 @@ class Modulation(nn.Module): self.lin = operations.Linear(dim, self.multiplier * dim, bias=True, dtype=dtype, device=device) def forward(self, vec: Tensor) -> tuple: - out = self.lin(nn.functional.silu(vec))[:, None, :].chunk(self.multiplier, dim=-1) + if vec.ndim == 2: + vec = vec[:, None, :] + out = self.lin(nn.functional.silu(vec)).chunk(self.multiplier, dim=-1) return ( ModulationOut(*out[:3]), @@ -113,6 +115,20 @@ class Modulation(nn.Module): ) +def apply_mod(tensor, m_mult, m_add=None, modulation_dims=None): + if modulation_dims is None: + if m_add is not None: + return tensor * m_mult + m_add + else: + return tensor * m_mult + else: + for d in modulation_dims: + tensor[:, d[0]:d[1]] *= m_mult[:, d[2]] + if m_add is not None: + tensor[:, d[0]:d[1]] += m_add[:, d[2]] + return tensor + + class DoubleStreamBlock(nn.Module): def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, dtype=None, device=None, operations=None): super().__init__() @@ -143,20 +159,20 @@ class DoubleStreamBlock(nn.Module): ) self.flipped_img_txt = flipped_img_txt - def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None): + def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims=None): img_mod1, img_mod2 = self.img_mod(vec) txt_mod1, txt_mod2 = self.txt_mod(vec) # prepare image for attention img_modulated = self.img_norm1(img) - img_modulated = (1 + img_mod1.scale) * img_modulated + img_mod1.shift + img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims) img_qkv = self.img_attn.qkv(img_modulated) img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) # prepare txt for attention txt_modulated = self.txt_norm1(txt) - txt_modulated = (1 + txt_mod1.scale) * txt_modulated + txt_mod1.shift + txt_modulated = apply_mod(txt_modulated, (1 + txt_mod1.scale), txt_mod1.shift, modulation_dims) txt_qkv = self.txt_attn.qkv(txt_modulated) txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) @@ -179,12 +195,12 @@ class DoubleStreamBlock(nn.Module): txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:] # calculate the img bloks - img = img + img_mod1.gate * self.img_attn.proj(img_attn) - img = img + img_mod2.gate * self.img_mlp((1 + img_mod2.scale) * self.img_norm2(img) + img_mod2.shift) + img = img + apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims) + img = img + apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims)), img_mod2.gate, None, modulation_dims) # calculate the txt bloks - txt += txt_mod1.gate * self.txt_attn.proj(txt_attn) - txt += txt_mod2.gate * self.txt_mlp((1 + txt_mod2.scale) * self.txt_norm2(txt) + txt_mod2.shift) + txt += apply_mod(self.txt_attn.proj(txt_attn), txt_mod1.gate, None, modulation_dims) + txt += apply_mod(self.txt_mlp(apply_mod(self.txt_norm2(txt), (1 + txt_mod2.scale), txt_mod2.shift, modulation_dims)), txt_mod2.gate, None, modulation_dims) if txt.dtype == torch.float16: txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504) @@ -228,9 +244,9 @@ class SingleStreamBlock(nn.Module): self.mlp_act = nn.GELU(approximate="tanh") self.modulation = Modulation(hidden_size, double=False, dtype=dtype, device=device, operations=operations) - def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask=None) -> Tensor: + def forward(self, x: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims=None) -> Tensor: mod, _ = self.modulation(vec) - qkv, mlp = torch.split(self.linear1((1 + mod.scale) * self.pre_norm(x) + mod.shift), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) + qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim], dim=-1) q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) q, k = self.norm(q, k, v) @@ -239,7 +255,7 @@ class SingleStreamBlock(nn.Module): attn = attention(q, k, v, pe=pe, mask=attn_mask) # compute activation in mlp stream, cat again and run second linear layer output = self.linear2(torch.cat((attn, self.mlp_act(mlp)), 2)) - x += mod.gate * output + x += apply_mod(output, mod.gate, None, modulation_dims) if x.dtype == torch.float16: x = torch.nan_to_num(x, nan=0.0, posinf=65504, neginf=-65504) return x @@ -252,8 +268,11 @@ class LastLayer(nn.Module): self.linear = operations.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True, dtype=dtype, device=device) self.adaLN_modulation = nn.Sequential(nn.SiLU(), operations.Linear(hidden_size, 2 * hidden_size, bias=True, dtype=dtype, device=device)) - def forward(self, x: Tensor, vec: Tensor) -> Tensor: - shift, scale = self.adaLN_modulation(vec).chunk(2, dim=1) - x = (1 + scale[:, None, :]) * self.norm_final(x) + shift[:, None, :] + def forward(self, x: Tensor, vec: Tensor, modulation_dims=None) -> Tensor: + if vec.ndim == 2: + vec = vec[:, None, :] + + shift, scale = self.adaLN_modulation(vec).chunk(2, dim=-1) + x = apply_mod(self.norm_final(x), (1 + scale), shift, modulation_dims) x = self.linear(x) return x diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py index f3f445843..001e302b5 100644 --- a/comfy/ldm/hunyuan_video/model.py +++ b/comfy/ldm/hunyuan_video/model.py @@ -227,6 +227,7 @@ class HunyuanVideo(nn.Module): timesteps: Tensor, y: Tensor, guidance: Tensor = None, + guiding_frame_index=None, control=None, transformer_options={}, ) -> Tensor: @@ -237,7 +238,15 @@ class HunyuanVideo(nn.Module): img = self.img_in(img) vec = self.time_in(timestep_embedding(timesteps, 256, time_factor=1.0).to(img.dtype)) - vec = vec + self.vector_in(y[:, :self.params.vec_in_dim]) + if guiding_frame_index is not None: + token_replace_vec = self.time_in(timestep_embedding(guiding_frame_index, 256, time_factor=1.0)) + vec_ = self.vector_in(y[:, :self.params.vec_in_dim]) + vec = torch.cat([(vec_ + token_replace_vec).unsqueeze(1), (vec_ + vec).unsqueeze(1)], dim=1) + frame_tokens = (initial_shape[-1] // self.patch_size[-1]) * (initial_shape[-2] // self.patch_size[-2]) + modulation_dims = [(0, frame_tokens, 0), (frame_tokens, None, 1)] + else: + vec = vec + self.vector_in(y[:, :self.params.vec_in_dim]) + modulation_dims = None if self.params.guidance_embed: if guidance is not None: @@ -271,7 +280,7 @@ class HunyuanVideo(nn.Module): txt = out["txt"] img = out["img"] else: - img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask) + img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims=modulation_dims) if control is not None: # Controlnet control_i = control.get("input") @@ -292,7 +301,7 @@ class HunyuanVideo(nn.Module): out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe, "attention_mask": attn_mask}, {"original_block": block_wrap}) img = out["img"] else: - img = block(img, vec=vec, pe=pe, attn_mask=attn_mask) + img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims=modulation_dims) if control is not None: # Controlnet control_o = control.get("output") @@ -303,7 +312,7 @@ class HunyuanVideo(nn.Module): img = img[:, : img_len] - img = self.final_layer(img, vec) # (N, T, patch_size ** 2 * out_channels) + img = self.final_layer(img, vec, modulation_dims=modulation_dims) # (N, T, patch_size ** 2 * out_channels) shape = initial_shape[-3:] for i in range(len(shape)): @@ -313,7 +322,7 @@ class HunyuanVideo(nn.Module): img = img.reshape(initial_shape[0], self.out_channels, initial_shape[2], initial_shape[3], initial_shape[4]) return img - def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, control=None, transformer_options={}, **kwargs): + def forward(self, x, timestep, context, y, guidance=None, attention_mask=None, guiding_frame_index=None, control=None, transformer_options={}, **kwargs): bs, c, t, h, w = x.shape patch_size = self.patch_size t_len = ((t + (patch_size[0] // 2)) // patch_size[0]) @@ -325,5 +334,5 @@ class HunyuanVideo(nn.Module): img_ids[:, :, :, 2] = img_ids[:, :, :, 2] + torch.linspace(0, w_len - 1, steps=w_len, device=x.device, dtype=x.dtype).reshape(1, 1, -1) img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs) txt_ids = torch.zeros((bs, context.shape[1], 3), device=x.device, dtype=x.dtype) - out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, guidance, control, transformer_options) + out = self.forward_orig(x, img_ids, context, txt_ids, attention_mask, timestep, y, guidance, guiding_frame_index, control, transformer_options) return out diff --git a/comfy/model_base.py b/comfy/model_base.py index 2fa1ee911..bf4ebefa1 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -898,20 +898,31 @@ class HunyuanVideo(BaseModel): guidance = kwargs.get("guidance", 6.0) if guidance is not None: out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance])) + + guiding_frame_index = kwargs.get("guiding_frame_index", None) + if guiding_frame_index is not None: + out['guiding_frame_index'] = comfy.conds.CONDRegular(torch.FloatTensor([guiding_frame_index])) + return out + def scale_latent_inpaint(self, latent_image, **kwargs): + return latent_image class HunyuanVideoI2V(HunyuanVideo): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device) self.concat_keys = ("concat_image", "mask_inverted") + def scale_latent_inpaint(self, latent_image, **kwargs): + return super().scale_latent_inpaint(latent_image=latent_image, **kwargs) class HunyuanVideoSkyreelsI2V(HunyuanVideo): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device) self.concat_keys = ("concat_image",) + def scale_latent_inpaint(self, latent_image, **kwargs): + return super().scale_latent_inpaint(latent_image=latent_image, **kwargs) class CosmosVideo(BaseModel): def __init__(self, model_config, model_type=ModelType.EDM, image_to_video=False, device=None): diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py index 56aef9b01..504010ad0 100644 --- a/comfy_extras/nodes_hunyuan.py +++ b/comfy_extras/nodes_hunyuan.py @@ -68,7 +68,6 @@ class TextEncodeHunyuanVideo_ImageToVideo: tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave) return (clip.encode_from_tokens_scheduled(tokens), ) - class HunyuanImageToVideo: @classmethod def INPUT_TYPES(s): @@ -78,6 +77,7 @@ class HunyuanImageToVideo: "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), "length": ("INT", {"default": 53, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + "guidance_type": (["v1 (concat)", "v2 (replace)"], ) }, "optional": {"start_image": ("IMAGE", ), }} @@ -88,8 +88,10 @@ class HunyuanImageToVideo: CATEGORY = "conditioning/video_models" - def encode(self, positive, vae, width, height, length, batch_size, start_image=None): + def encode(self, positive, vae, width, height, length, batch_size, guidance_type, start_image=None): latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + out_latent = {} + if start_image is not None: start_image = comfy.utils.common_upscale(start_image[:length, :, :, :3].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) @@ -97,13 +99,20 @@ class HunyuanImageToVideo: mask = torch.ones((1, 1, latent.shape[2], concat_latent_image.shape[-2], concat_latent_image.shape[-1]), device=start_image.device, dtype=start_image.dtype) mask[:, :, :((start_image.shape[0] - 1) // 4) + 1] = 0.0 - positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent_image, "concat_mask": mask}) + if guidance_type == "v1 (concat)": + cond = {"concat_latent_image": concat_latent_image, "concat_mask": mask} + else: + cond = {'guiding_frame_index': 0} + latent[:, :, :concat_latent_image.shape[2]] = concat_latent_image + out_latent["noise_mask"] = mask + + positive = node_helpers.conditioning_set_values(positive, cond) - out_latent = {} out_latent["samples"] = latent return (positive, out_latent) + NODE_CLASS_MAPPINGS = { "CLIPTextEncodeHunyuanDiT": CLIPTextEncodeHunyuanDiT, "TextEncodeHunyuanVideo_ImageToVideo": TextEncodeHunyuanVideo_ImageToVideo, From 2bc4b5968f7fbf0b6e65f2465b064c6af48f965a Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 9 Mar 2025 03:30:20 -0400 Subject: [PATCH 18/77] ComfyUI version v0.3.25 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index a68a65323..9cf4c13fa 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.24" +__version__ = "0.3.25" diff --git a/pyproject.toml b/pyproject.toml index 4c11c71bb..3b53d1492 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.24" +version = "0.3.25" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" From 528d1b35638ad4a5d08b8584f7bacb19afe785cc Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Sun, 9 Mar 2025 03:26:31 -0500 Subject: [PATCH 19/77] When cached_hook_patches contain weights for hooks, only use hook_backup for unused keys (#7067) --- comfy/model_patcher.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 8a1f8fb63..e291158ce 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -1089,7 +1089,6 @@ class ModelPatcher: def patch_hooks(self, hooks: comfy.hooks.HookGroup): with self.use_ejected(): - self.unpatch_hooks() if hooks is not None: model_sd_keys = list(self.model_state_dict().keys()) memory_counter = None @@ -1100,12 +1099,16 @@ class ModelPatcher: # if have cached weights for hooks, use it cached_weights = self.cached_hook_patches.get(hooks, None) if cached_weights is not None: + model_sd_keys_set = set(model_sd_keys) for key in cached_weights: if key not in model_sd_keys: logging.warning(f"Cached hook could not patch. Key does not exist in model: {key}") continue self.patch_cached_hook_weights(cached_weights=cached_weights, key=key, memory_counter=memory_counter) + model_sd_keys_set.remove(key) + self.unpatch_hooks(model_sd_keys_set) else: + self.unpatch_hooks() relevant_patches = self.get_combined_hook_patches(hooks=hooks) original_weights = None if len(relevant_patches) > 0: @@ -1116,6 +1119,8 @@ class ModelPatcher: continue self.patch_hook_weight_to_device(hooks=hooks, combined_patches=relevant_patches, key=key, original_weights=original_weights, memory_counter=memory_counter) + else: + self.unpatch_hooks() self.current_hooks = hooks def patch_cached_hook_weights(self, cached_weights: dict, key: str, memory_counter: MemoryCounter): @@ -1172,17 +1177,23 @@ class ModelPatcher: del out_weight del weight - def unpatch_hooks(self) -> None: + def unpatch_hooks(self, whitelist_keys_set: set[str]=None) -> None: with self.use_ejected(): if len(self.hook_backup) == 0: self.current_hooks = None return keys = list(self.hook_backup.keys()) - for k in keys: - comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1])) + if whitelist_keys_set: + for k in keys: + if k in whitelist_keys_set: + comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1])) + self.hook_backup.pop(k) + else: + for k in keys: + comfy.utils.copy_to_param(self.model, k, self.hook_backup[k][0].to(device=self.hook_backup[k][1])) - self.hook_backup.clear() - self.current_hooks = None + self.hook_backup.clear() + self.current_hooks = None def clean_hooks(self): self.unpatch_hooks() From 9aac21f894a122ddb8d825c57ad61c0db5e630db Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 9 Mar 2025 04:59:15 -0400 Subject: [PATCH 20/77] Fix issues with new hunyuan img2vid model and bumb version to v0.3.26 --- comfy/ldm/flux/layers.py | 14 +++++++------- comfy/ldm/hunyuan_video/model.py | 12 +++++++----- comfyui_version.py | 2 +- pyproject.toml | 2 +- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/comfy/ldm/flux/layers.py b/comfy/ldm/flux/layers.py index 1b3e9f313..76af967e6 100644 --- a/comfy/ldm/flux/layers.py +++ b/comfy/ldm/flux/layers.py @@ -159,20 +159,20 @@ class DoubleStreamBlock(nn.Module): ) self.flipped_img_txt = flipped_img_txt - def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims=None): + def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None): img_mod1, img_mod2 = self.img_mod(vec) txt_mod1, txt_mod2 = self.txt_mod(vec) # prepare image for attention img_modulated = self.img_norm1(img) - img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims) + img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img) img_qkv = self.img_attn.qkv(img_modulated) img_q, img_k, img_v = img_qkv.view(img_qkv.shape[0], img_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) img_q, img_k = self.img_attn.norm(img_q, img_k, img_v) # prepare txt for attention txt_modulated = self.txt_norm1(txt) - txt_modulated = apply_mod(txt_modulated, (1 + txt_mod1.scale), txt_mod1.shift, modulation_dims) + txt_modulated = apply_mod(txt_modulated, (1 + txt_mod1.scale), txt_mod1.shift, modulation_dims_txt) txt_qkv = self.txt_attn.qkv(txt_modulated) txt_q, txt_k, txt_v = txt_qkv.view(txt_qkv.shape[0], txt_qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) @@ -195,12 +195,12 @@ class DoubleStreamBlock(nn.Module): txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:] # calculate the img bloks - img = img + apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims) - img = img + apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims)), img_mod2.gate, None, modulation_dims) + img = img + apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img) + img = img + apply_mod(self.img_mlp(apply_mod(self.img_norm2(img), (1 + img_mod2.scale), img_mod2.shift, modulation_dims_img)), img_mod2.gate, None, modulation_dims_img) # calculate the txt bloks - txt += apply_mod(self.txt_attn.proj(txt_attn), txt_mod1.gate, None, modulation_dims) - txt += apply_mod(self.txt_mlp(apply_mod(self.txt_norm2(txt), (1 + txt_mod2.scale), txt_mod2.shift, modulation_dims)), txt_mod2.gate, None, modulation_dims) + txt += apply_mod(self.txt_attn.proj(txt_attn), txt_mod1.gate, None, modulation_dims_txt) + txt += apply_mod(self.txt_mlp(apply_mod(self.txt_norm2(txt), (1 + txt_mod2.scale), txt_mod2.shift, modulation_dims_txt)), txt_mod2.gate, None, modulation_dims_txt) if txt.dtype == torch.float16: txt = torch.nan_to_num(txt, nan=0.0, posinf=65504, neginf=-65504) diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py index 001e302b5..72af3d5bb 100644 --- a/comfy/ldm/hunyuan_video/model.py +++ b/comfy/ldm/hunyuan_video/model.py @@ -244,9 +244,11 @@ class HunyuanVideo(nn.Module): vec = torch.cat([(vec_ + token_replace_vec).unsqueeze(1), (vec_ + vec).unsqueeze(1)], dim=1) frame_tokens = (initial_shape[-1] // self.patch_size[-1]) * (initial_shape[-2] // self.patch_size[-2]) modulation_dims = [(0, frame_tokens, 0), (frame_tokens, None, 1)] + modulation_dims_txt = [(0, None, 1)] else: vec = vec + self.vector_in(y[:, :self.params.vec_in_dim]) modulation_dims = None + modulation_dims_txt = None if self.params.guidance_embed: if guidance is not None: @@ -273,14 +275,14 @@ class HunyuanVideo(nn.Module): if ("double_block", i) in blocks_replace: def block_wrap(args): out = {} - out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"]) + out["img"], out["txt"] = block(img=args["img"], txt=args["txt"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"], modulation_dims_img=args["modulation_dims_img"], modulation_dims_txt=args["modulation_dims_txt"]) return out - out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe, "attention_mask": attn_mask}, {"original_block": block_wrap}) + out = blocks_replace[("double_block", i)]({"img": img, "txt": txt, "vec": vec, "pe": pe, "attention_mask": attn_mask, 'modulation_dims_img': modulation_dims, 'modulation_dims_txt': modulation_dims_txt}, {"original_block": block_wrap}) txt = out["txt"] img = out["img"] else: - img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims=modulation_dims) + img, txt = block(img=img, txt=txt, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims_img=modulation_dims, modulation_dims_txt=modulation_dims_txt) if control is not None: # Controlnet control_i = control.get("input") @@ -295,10 +297,10 @@ class HunyuanVideo(nn.Module): if ("single_block", i) in blocks_replace: def block_wrap(args): out = {} - out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"]) + out["img"] = block(args["img"], vec=args["vec"], pe=args["pe"], attn_mask=args["attention_mask"], modulation_dims=args["modulation_dims"]) return out - out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe, "attention_mask": attn_mask}, {"original_block": block_wrap}) + out = blocks_replace[("single_block", i)]({"img": img, "vec": vec, "pe": pe, "attention_mask": attn_mask, 'modulation_dims': modulation_dims}, {"original_block": block_wrap}) img = out["img"] else: img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, modulation_dims=modulation_dims) diff --git a/comfyui_version.py b/comfyui_version.py index 9cf4c13fa..b5e6fbead 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.25" +__version__ = "0.3.26" diff --git a/pyproject.toml b/pyproject.toml index 3b53d1492..f13fed8dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.25" +version = "0.3.26" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" From a73410aafa573940ebaba9a9a908476a538a8981 Mon Sep 17 00:00:00 2001 From: bymyself Date: Sun, 9 Mar 2025 03:46:08 -0700 Subject: [PATCH 21/77] remove overrides --- nodes.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/nodes.py b/nodes.py index bbf49915c..e43c29295 100644 --- a/nodes.py +++ b/nodes.py @@ -1785,14 +1785,7 @@ class LoadImageOutput(LoadImage): DESCRIPTION = "Load an image from the output folder. When the refresh button is clicked, the node will update the image list and automatically select the first image, allowing for easy iteration." EXPERIMENTAL = True - FUNCTION = "load_image_output" - - def load_image_output(self, image): - return self.load_image(f"{image} [output]") - - @classmethod - def VALIDATE_INPUTS(s, image): - return True + FUNCTION = "load_image" class ImageScale: From e1da98a14a21f5d4af31935832437b55e81d2399 Mon Sep 17 00:00:00 2001 From: Terry Jia Date: Sun, 9 Mar 2025 14:07:09 -0400 Subject: [PATCH 22/77] remove unused params (#6931) --- comfy_extras/nodes_load_3d.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py index 53a66b95a..8b43cf218 100644 --- a/comfy_extras/nodes_load_3d.py +++ b/comfy_extras/nodes_load_3d.py @@ -19,8 +19,6 @@ class Load3D(): "image": ("LOAD_3D", {}), "width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), - "material": (["original", "normal", "wireframe", "depth"],), - "up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],), }} RETURN_TYPES = ("IMAGE", "MASK", "STRING") @@ -55,8 +53,6 @@ class Load3DAnimation(): "image": ("LOAD_3D_ANIMATION", {}), "width": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), - "material": (["original", "normal", "wireframe", "depth"],), - "up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],), }} RETURN_TYPES = ("IMAGE", "MASK", "STRING") @@ -82,8 +78,6 @@ class Preview3D(): def INPUT_TYPES(s): return {"required": { "model_file": ("STRING", {"default": "", "multiline": False}), - "material": (["original", "normal", "wireframe", "depth"],), - "up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],), }} OUTPUT_NODE = True @@ -102,8 +96,6 @@ class Preview3DAnimation(): def INPUT_TYPES(s): return {"required": { "model_file": ("STRING", {"default": "", "multiline": False}), - "material": (["original", "normal", "wireframe", "depth"],), - "up_direction": (["original", "-x", "+x", "-y", "+y", "-z", "+z"],), }} OUTPUT_NODE = True From 6f8e766509c0c44ae2e04a79ab05a06e4467b51b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 10 Mar 2025 03:33:17 -0400 Subject: [PATCH 23/77] Prevent custom nodes from accidentally overwriting global modules. --- nodes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nodes.py b/nodes.py index bbf49915c..43697a24d 100644 --- a/nodes.py +++ b/nodes.py @@ -2129,10 +2129,12 @@ def get_module_name(module_path: str) -> str: def load_custom_node(module_path: str, ignore=set(), module_parent="custom_nodes") -> bool: - module_name = os.path.basename(module_path) if os.path.isfile(module_path): sp = os.path.splitext(module_path) module_name = sp[0] + elif os.path.isdir(module_path): + module_name = module_path + try: logging.debug("Trying to load custom node {}".format(module_path)) if os.path.isfile(module_path): From 67c7184b7432105d2db52cc19fc82ccd4aa06fb3 Mon Sep 17 00:00:00 2001 From: Andrew Kvochko Date: Mon, 10 Mar 2025 10:11:48 +0200 Subject: [PATCH 24/77] ltxv: relax frame_idx divisibility for single frames. (#7146) This commit relaxes divisibility constraint for single-frame conditionings. For single frames, the index can be arbitrary, while multi-frame conditionings (>= 9 frames) must still be aligned to 8 frames. Co-authored-by: Andrew Kvochko --- comfy_extras/nodes_lt.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index b608b9407..fdc6c7c13 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -99,12 +99,13 @@ class LTXVAddGuide: "negative": ("CONDITIONING", ), "vae": ("VAE",), "latent": ("LATENT",), - "image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames." \ + "image": ("IMAGE", {"tooltip": "Image or video to condition the latent video on. Must be 8*n + 1 frames." "If the video is not 8*n + 1 frames, it will be cropped to the nearest 8*n + 1 frames."}), "frame_idx": ("INT", {"default": 0, "min": -9999, "max": 9999, - "tooltip": "Frame index to start the conditioning at. Must be divisible by 8. " \ - "If a frame is not divisible by 8, it will be rounded down to the nearest multiple of 8. " \ - "Negative values are counted from the end of the video."}), + "tooltip": "Frame index to start the conditioning at. For single-frame images or " + "videos with 1-8 frames, any frame_idx value is acceptable. For videos with 9+ " + "frames, frame_idx must be divisible by 8, otherwise it will be rounded down to " + "the nearest multiple of 8. Negative values are counted from the end of the video."}), "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), } } @@ -127,12 +128,13 @@ class LTXVAddGuide: t = vae.encode(encode_pixels) return encode_pixels, t - def get_latent_index(self, cond, latent_length, frame_idx, scale_factors): + def get_latent_index(self, cond, latent_length, guide_length, frame_idx, scale_factors): time_scale_factor, _, _ = scale_factors _, num_keyframes = get_keyframe_idxs(cond) latent_count = latent_length - num_keyframes - frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * 8 + 1 + frame_idx, 0) - frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8 + frame_idx = frame_idx if frame_idx >= 0 else max((latent_count - 1) * time_scale_factor + 1 + frame_idx, 0) + if guide_length > 1: + frame_idx = frame_idx // time_scale_factor * time_scale_factor # frame index must be divisible by 8 latent_idx = (frame_idx + time_scale_factor - 1) // time_scale_factor @@ -191,7 +193,7 @@ class LTXVAddGuide: _, _, latent_length, latent_height, latent_width = latent_image.shape image, t = self.encode(vae, latent_width, latent_height, image, scale_factors) - frame_idx, latent_idx = self.get_latent_index(positive, latent_length, frame_idx, scale_factors) + frame_idx, latent_idx = self.get_latent_index(positive, latent_length, len(image), frame_idx, scale_factors) assert latent_idx + t.shape[2] <= latent_length, "Conditioning frames exceed the length of the latent sequence." num_prefix_frames = min(self._num_prefix_frames, t.shape[2]) From 35e2dcf5d710f258f40f107f70f24a4cd58ba223 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 10 Mar 2025 06:14:43 -0400 Subject: [PATCH 25/77] Hack to fix broken manager. --- nodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nodes.py b/nodes.py index 43697a24d..4608a0d36 100644 --- a/nodes.py +++ b/nodes.py @@ -2134,6 +2134,8 @@ def load_custom_node(module_path: str, ignore=set(), module_parent="custom_nodes module_name = sp[0] elif os.path.isdir(module_path): module_name = module_path + if module_path.endswith("comfyui-manager"): #TODO: remove this eventually + module_name = get_module_name(module_path) try: logging.debug("Trying to load custom node {}".format(module_path)) From b779349b55e79aff81a98b752f5cb486c71812db Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 10 Mar 2025 06:30:17 -0400 Subject: [PATCH 26/77] Temporarily revert fix to give time for people to update their nodes. --- nodes.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/nodes.py b/nodes.py index 4608a0d36..bbf49915c 100644 --- a/nodes.py +++ b/nodes.py @@ -2129,14 +2129,10 @@ def get_module_name(module_path: str) -> str: def load_custom_node(module_path: str, ignore=set(), module_parent="custom_nodes") -> bool: + module_name = os.path.basename(module_path) if os.path.isfile(module_path): sp = os.path.splitext(module_path) module_name = sp[0] - elif os.path.isdir(module_path): - module_name = module_path - if module_path.endswith("comfyui-manager"): #TODO: remove this eventually - module_name = get_module_name(module_path) - try: logging.debug("Trying to load custom node {}".format(module_path)) if os.path.isfile(module_path): From 1f138dd382bd4fe40c46a1fd1954dfbf0ddae924 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 10 Mar 2025 15:07:44 -0400 Subject: [PATCH 27/77] Only check frontend package if using default frontend --- app/frontend_management.py | 51 +++++++++++++++++++++++++++----------- main.py | 15 ----------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index 308f71da6..f5a0358e6 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -17,27 +17,38 @@ from typing_extensions import NotRequired from comfy.cli_args import DEFAULT_VERSION_STRING +# The path to the requirements.txt file +req_path = Path(__file__).parents[1] / "requirements.txt" def frontend_install_warning_message(): - req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt')) + """The warning message to display when the frontend version is not up to date.""" + extra = "" if sys.flags.no_user_site: extra = "-s " return f"Please install the updated requirements.txt file by running:\n{sys.executable} {extra}-m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem" -try: - import comfyui_frontend_package -except ImportError: - # TODO: Remove the check after roll out of 0.3.16 - logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n") - exit(-1) +def check_frontend_version(): + """Check if the frontend version is up to date.""" + + def parse_version(version: str) -> tuple[int, int, int]: + return tuple(map(int, version.split("."))) + + try: + import comfyui_frontend_package + + frontend_version = parse_version(comfyui_frontend_package.__version__) + required_frontend = parse_version((0,)) + with open(req_path, 'r', encoding='utf-8') as f: + required_frontend = parse_version(f.readline().split('=')[-1]) + if frontend_version < required_frontend: + logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) + else: + logging.info("ComfyUI frontend version: {}".format(comfyui_frontend_package.__version__)) + except Exception as e: + logging.error(f"Failed to check frontend version: {e}") -try: - frontend_version = tuple(map(int, comfyui_frontend_package.__version__.split("."))) -except: - frontend_version = (0,) - pass REQUEST_TIMEOUT = 10 # seconds @@ -133,9 +144,17 @@ def download_release_asset_zip(release: Release, destination_path: str) -> None: class FrontendManager: - DEFAULT_FRONTEND_PATH = str(importlib.resources.files(comfyui_frontend_package) / "static") CUSTOM_FRONTENDS_ROOT = str(Path(__file__).parents[1] / "web_custom_versions") + @classmethod + def default_frontend_path(cls) -> str: + try: + import comfyui_frontend_package + return str(importlib.resources.files(comfyui_frontend_package) / "static") + except ImportError: + logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n") + sys.exit(-1) + @classmethod def parse_version_string(cls, value: str) -> tuple[str, str, str]: """ @@ -172,7 +191,8 @@ class FrontendManager: main error source might be request timeout or invalid URL. """ if version_string == DEFAULT_VERSION_STRING: - return cls.DEFAULT_FRONTEND_PATH + check_frontend_version() + return cls.default_frontend_path() repo_owner, repo_name, version = cls.parse_version_string(version_string) @@ -225,4 +245,5 @@ class FrontendManager: except Exception as e: logging.error("Failed to initialize frontend: %s", e) logging.info("Falling back to the default frontend.") - return cls.DEFAULT_FRONTEND_PATH + check_frontend_version() + return cls.default_frontend_path() diff --git a/main.py b/main.py index 6fa1cfb0f..dbc15b8ba 100644 --- a/main.py +++ b/main.py @@ -293,28 +293,13 @@ def start_comfyui(asyncio_loop=None): return asyncio_loop, prompt_server, start_all -def warn_frontend_version(frontend_version): - try: - required_frontend = (0,) - req_path = os.path.join(os.path.dirname(__file__), 'requirements.txt') - with open(req_path, 'r') as f: - required_frontend = tuple(map(int, f.readline().split('=')[-1].split('.'))) - if frontend_version < required_frontend: - logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), app.frontend_management.frontend_install_warning_message())) - except: - pass - - if __name__ == "__main__": # Running directly, just start ComfyUI. logging.info("ComfyUI version: {}".format(comfyui_version.__version__)) - frontend_version = app.frontend_management.frontend_version - logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, frontend_version)))) event_loop, _, start_all_func = start_comfyui() try: x = start_all_func() - warn_frontend_version(frontend_version) event_loop.run_until_complete(x) except KeyboardInterrupt: logging.info("\nStopped server") From 6f6349b6a76fde39ab65d4952aa0aee7d2eade15 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 10 Mar 2025 15:10:40 -0400 Subject: [PATCH 28/77] nit --- main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main.py b/main.py index dbc15b8ba..c6f5c3c1e 100644 --- a/main.py +++ b/main.py @@ -139,7 +139,6 @@ from server import BinaryEventTypes import nodes import comfy.model_management import comfyui_version -import app.frontend_management def cuda_malloc_warning(): From 79460497941d090bc197898dc6e9c5e4feaf0c1d Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 10 Mar 2025 15:14:40 -0400 Subject: [PATCH 29/77] nit --- app/frontend_management.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index f5a0358e6..95df5dee4 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -39,9 +39,8 @@ def check_frontend_version(): import comfyui_frontend_package frontend_version = parse_version(comfyui_frontend_package.__version__) - required_frontend = parse_version((0,)) - with open(req_path, 'r', encoding='utf-8') as f: - required_frontend = parse_version(f.readline().split('=')[-1]) + with open(req_path, "r", encoding="utf-8") as f: + required_frontend = parse_version(f.readline().split("=")[-1]) if frontend_version < required_frontend: logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) else: From db9f2a34fc87d49abea4e5aa29a8573f5073e0ce Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 10 Mar 2025 15:19:52 -0400 Subject: [PATCH 30/77] Fix unit test --- tests-unit/app_test/frontend_manager_test.py | 57 +++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/tests-unit/app_test/frontend_manager_test.py b/tests-unit/app_test/frontend_manager_test.py index a8df52484..7a91ad410 100644 --- a/tests-unit/app_test/frontend_manager_test.py +++ b/tests-unit/app_test/frontend_manager_test.py @@ -70,7 +70,7 @@ def test_get_release_invalid_version(mock_provider): def test_init_frontend_default(): version_string = DEFAULT_VERSION_STRING frontend_path = FrontendManager.init_frontend(version_string) - assert frontend_path == FrontendManager.DEFAULT_FRONTEND_PATH + assert frontend_path == FrontendManager.default_frontend_path() def test_init_frontend_invalid_version(): @@ -84,24 +84,29 @@ def test_init_frontend_invalid_provider(): with pytest.raises(HTTPError): FrontendManager.init_frontend_unsafe(version_string) + @pytest.fixture def mock_os_functions(): - with patch('app.frontend_management.os.makedirs') as mock_makedirs, \ - patch('app.frontend_management.os.listdir') as mock_listdir, \ - patch('app.frontend_management.os.rmdir') as mock_rmdir: + with ( + patch("app.frontend_management.os.makedirs") as mock_makedirs, + patch("app.frontend_management.os.listdir") as mock_listdir, + patch("app.frontend_management.os.rmdir") as mock_rmdir, + ): mock_listdir.return_value = [] # Simulate empty directory yield mock_makedirs, mock_listdir, mock_rmdir + @pytest.fixture def mock_download(): - with patch('app.frontend_management.download_release_asset_zip') as mock: + with patch("app.frontend_management.download_release_asset_zip") as mock: mock.side_effect = Exception("Download failed") # Simulate download failure yield mock + def test_finally_block(mock_os_functions, mock_download, mock_provider): # Arrange mock_makedirs, mock_listdir, mock_rmdir = mock_os_functions - version_string = 'test-owner/test-repo@1.0.0' + version_string = "test-owner/test-repo@1.0.0" # Act & Assert with pytest.raises(Exception): @@ -128,3 +133,43 @@ def test_parse_version_string_invalid(): version_string = "invalid" with pytest.raises(argparse.ArgumentTypeError): FrontendManager.parse_version_string(version_string) + + +def test_init_frontend_default_with_mocks(): + # Arrange + version_string = DEFAULT_VERSION_STRING + + # Act + with ( + patch("app.frontend_management.check_frontend_version") as mock_check, + patch.object( + FrontendManager, "default_frontend_path", return_value="/mocked/path" + ), + ): + frontend_path = FrontendManager.init_frontend(version_string) + + # Assert + assert frontend_path == "/mocked/path" + mock_check.assert_called_once() + + +def test_init_frontend_fallback_on_error(): + # Arrange + version_string = "test-owner/test-repo@1.0.0" + + # Act + with ( + patch.object( + FrontendManager, "init_frontend_unsafe", side_effect=Exception("Test error") + ), + patch("app.frontend_management.check_frontend_version") as mock_check, + patch.object( + FrontendManager, "default_frontend_path", return_value="/default/path" + ), + ): + frontend_path = FrontendManager.init_frontend(version_string) + + # Assert + assert frontend_path == "/default/path" + mock_check.assert_called_once() + From 65ea778a5e5f69ec83e59a1f08678272fb2725d3 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Mon, 10 Mar 2025 15:19:59 -0400 Subject: [PATCH 31/77] nit --- tests-unit/app_test/frontend_manager_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests-unit/app_test/frontend_manager_test.py b/tests-unit/app_test/frontend_manager_test.py index 7a91ad410..ce67df6c6 100644 --- a/tests-unit/app_test/frontend_manager_test.py +++ b/tests-unit/app_test/frontend_manager_test.py @@ -172,4 +172,3 @@ def test_init_frontend_fallback_on_error(): # Assert assert frontend_path == "/default/path" mock_check.assert_called_once() - From ca8efab79fa19bc9745b4f7346d38a49ba1b1b7c Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 10 Mar 2025 17:23:13 -0400 Subject: [PATCH 32/77] Support control loras on Wan. --- comfy/model_base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index bf4ebefa1..976702b60 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -973,11 +973,11 @@ class WAN21(BaseModel): self.image_to_video = image_to_video def concat_cond(self, **kwargs): - if not self.image_to_video: + noise = kwargs.get("noise", None) + if self.diffusion_model.patch_embedding.weight.shape[1] == noise.shape[1]: return None image = kwargs.get("concat_latent_image", None) - noise = kwargs.get("noise", None) device = kwargs["device"] if image is None: @@ -987,6 +987,9 @@ class WAN21(BaseModel): image = self.process_latent_in(image) image = utils.resize_to_batch_size(image, noise.shape[0]) + if not self.image_to_video: + return image + mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None)) if mask is None: mask = torch.zeros_like(noise)[:, :4] From cfbe4b49ca63eae79fe4f3206d03a41b43ef275e Mon Sep 17 00:00:00 2001 From: huchenlei Date: Mon, 10 Mar 2025 20:43:59 -0400 Subject: [PATCH 33/77] Access package version --- app/frontend_management.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index 95df5dee4..4b7dfbb98 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -11,6 +11,7 @@ from dataclasses import dataclass from functools import cached_property from pathlib import Path from typing import TypedDict, Optional +from importlib.metadata import version import requests from typing_extensions import NotRequired @@ -36,15 +37,14 @@ def check_frontend_version(): return tuple(map(int, version.split("."))) try: - import comfyui_frontend_package - - frontend_version = parse_version(comfyui_frontend_package.__version__) + frontend_version_str = version("comfyui-frontend-package") + frontend_version = parse_version(frontend_version_str) with open(req_path, "r", encoding="utf-8") as f: required_frontend = parse_version(f.readline().split("=")[-1]) if frontend_version < required_frontend: logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) else: - logging.info("ComfyUI frontend version: {}".format(comfyui_frontend_package.__version__)) + logging.info("ComfyUI frontend version: {}".format(frontend_version_str)) except Exception as e: logging.error(f"Failed to check frontend version: {e}") From 2330754b0ed3e4864c8ba8165e57ea18aafa30b8 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 11 Mar 2025 15:07:00 -0400 Subject: [PATCH 34/77] Fix error saving some latents. --- nodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodes.py b/nodes.py index e43c29295..63791e208 100644 --- a/nodes.py +++ b/nodes.py @@ -489,7 +489,7 @@ class SaveLatent: file = os.path.join(full_output_folder, file) output = {} - output["latent_tensor"] = samples["samples"] + output["latent_tensor"] = samples["samples"].contiguous() output["latent_format_version_0"] = torch.tensor([]) comfy.utils.save_torch_file(output, file, metadata=metadata) From 01015bff166988c926e5ed1d03842fddc9a0f925 Mon Sep 17 00:00:00 2001 From: chaObserv <154517000+chaObserv@users.noreply.github.com> Date: Wed, 12 Mar 2025 14:42:37 +0800 Subject: [PATCH 35/77] Add er_sde sampler (#7187) --- comfy/k_diffusion/sampling.py | 56 +++++++++++++++++++++++++++++++++++ comfy/samplers.py | 2 +- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 456679989..78678abd7 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1366,3 +1366,59 @@ def sample_gradient_estimation(model, x, sigmas, extra_args=None, callback=None, x = x + d_bar * dt old_d = d return x + +@torch.no_grad() +def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, s_noise=1., noise_sampler=None, noise_scaler=None, max_stage=3): + """ + Extended Reverse-Time SDE solver (VE ER-SDE-Solver-3). Arxiv: https://arxiv.org/abs/2309.06169. + Code reference: https://github.com/QinpengCui/ER-SDE-Solver/blob/main/er_sde_solver.py. + """ + extra_args = {} if extra_args is None else extra_args + seed = extra_args.get("seed", None) + noise_sampler = default_noise_sampler(x, seed=seed) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + def default_noise_scaler(sigma): + return sigma * ((sigma ** 0.3).exp() + 10.0) + noise_scaler = default_noise_scaler if noise_scaler is None else noise_scaler + num_integration_points = 200.0 + point_indice = torch.arange(0, num_integration_points, dtype=torch.float32, device=x.device) + + old_denoised = None + old_denoised_d = None + + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) + if callback is not None: + callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised}) + stage_used = min(max_stage, i + 1) + if sigmas[i + 1] == 0: + x = denoised + elif stage_used == 1: + r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i]) + x = r * x + (1 - r) * denoised + else: + r = noise_scaler(sigmas[i + 1]) / noise_scaler(sigmas[i]) + x = r * x + (1 - r) * denoised + + dt = sigmas[i + 1] - sigmas[i] + sigma_step_size = -dt / num_integration_points + sigma_pos = sigmas[i + 1] + point_indice * sigma_step_size + scaled_pos = noise_scaler(sigma_pos) + + # Stage 2 + s = torch.sum(1 / scaled_pos) * sigma_step_size + denoised_d = (denoised - old_denoised) / (sigmas[i] - sigmas[i - 1]) + x = x + (dt + s * noise_scaler(sigmas[i + 1])) * denoised_d + + if stage_used >= 3: + # Stage 3 + s_u = torch.sum((sigma_pos - sigmas[i]) / scaled_pos) * sigma_step_size + denoised_u = (denoised_d - old_denoised_d) / ((sigmas[i] - sigmas[i - 2]) / 2) + x = x + ((dt ** 2) / 2 + s_u * noise_scaler(sigmas[i + 1])) * denoised_u + old_denoised_d = denoised_d + + if s_noise != 0 and sigmas[i + 1] > 0: + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt() + old_denoised = denoised + return x diff --git a/comfy/samplers.py b/comfy/samplers.py index 7578ac1ef..10728bd1f 100644 --- a/comfy/samplers.py +++ b/comfy/samplers.py @@ -710,7 +710,7 @@ KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_c "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu", "dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp", - "gradient_estimation"] + "gradient_estimation", "er_sde"] class KSAMPLER(Sampler): def __init__(self, sampler_function, extra_options={}, inpaint_options={}): From d2a0fb6bb0da1bf481a3b2417bca2cebac4a4e03 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Wed, 12 Mar 2025 06:39:14 -0400 Subject: [PATCH 36/77] Add unwrap widget value support (#7197) * Add unwrap widget value support * nit --- execution.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/execution.py b/execution.py index 2c979205b..fcb4f6f40 100644 --- a/execution.py +++ b/execution.py @@ -634,6 +634,13 @@ def validate_inputs(prompt, item, validated): continue else: try: + # Unwraps values wrapped in __value__ key. This is used to pass + # list widget value to execution, as by default list value is + # reserved to represent the connection between nodes. + if isinstance(val, dict) and "__value__" in val: + val = val["__value__"] + inputs[x] = val + if type_input == "INT": val = int(val) inputs[x] = val From f4411250f311f1ba93b8ba57b4252e7c23f7d925 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 12 Mar 2025 07:13:40 -0400 Subject: [PATCH 37/77] Repeat frontend version warning at the end. This way someone running ComfyUI with the command line is more likely to actually see it. --- app/frontend_management.py | 3 ++- app/logger.py | 14 ++++++++++++++ main.py | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index 4b7dfbb98..b4ba994d1 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -17,6 +17,7 @@ import requests from typing_extensions import NotRequired from comfy.cli_args import DEFAULT_VERSION_STRING +import app.logger # The path to the requirements.txt file req_path = Path(__file__).parents[1] / "requirements.txt" @@ -42,7 +43,7 @@ def check_frontend_version(): with open(req_path, "r", encoding="utf-8") as f: required_frontend = parse_version(f.readline().split("=")[-1]) if frontend_version < required_frontend: - logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) + app.logger.log_startup_warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) else: logging.info("ComfyUI frontend version: {}".format(frontend_version_str)) except Exception as e: diff --git a/app/logger.py b/app/logger.py index 9e9f84ccf..3d26d98fe 100644 --- a/app/logger.py +++ b/app/logger.py @@ -82,3 +82,17 @@ def setup_logger(log_level: str = 'INFO', capacity: int = 300, use_stdout: bool logger.addHandler(stdout_handler) logger.addHandler(stream_handler) + + +STARTUP_WARNINGS = [] + + +def log_startup_warning(msg): + logging.warning(msg) + STARTUP_WARNINGS.append(msg) + + +def print_startup_warnings(): + for s in STARTUP_WARNINGS: + logging.warning(s) + STARTUP_WARNINGS.clear() diff --git a/main.py b/main.py index c6f5c3c1e..1b100fa8a 100644 --- a/main.py +++ b/main.py @@ -139,6 +139,7 @@ from server import BinaryEventTypes import nodes import comfy.model_management import comfyui_version +import app.logger def cuda_malloc_warning(): @@ -299,6 +300,7 @@ if __name__ == "__main__": event_loop, _, start_all_func = start_comfyui() try: x = start_all_func() + app.logger.print_startup_warnings() event_loop.run_until_complete(x) except KeyboardInterrupt: logging.info("\nStopped server") From 3fc688aebd9f54f8351da3a4282bd12c74e4a02e Mon Sep 17 00:00:00 2001 From: chaObserv <154517000+chaObserv@users.noreply.github.com> Date: Thu, 13 Mar 2025 05:28:59 +0800 Subject: [PATCH 38/77] Ensure the extra_args in dpmpp sde series (#7204) --- comfy/k_diffusion/sampling.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index 78678abd7..a28a30ac2 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -688,10 +688,10 @@ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=N if len(sigmas) <= 1: return x + extra_args = {} if extra_args is None else extra_args sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() seed = extra_args.get("seed", None) noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler - extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) sigma_fn = lambda t: t.neg().exp() t_fn = lambda sigma: sigma.log().neg() @@ -762,10 +762,10 @@ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disabl if solver_type not in {'heun', 'midpoint'}: raise ValueError('solver_type must be \'heun\' or \'midpoint\'') + extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler - extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) old_denoised = None @@ -808,10 +808,10 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl if len(sigmas) <= 1: return x + extra_args = {} if extra_args is None else extra_args seed = extra_args.get("seed", None) sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=seed, cpu=True) if noise_sampler is None else noise_sampler - extra_args = {} if extra_args is None else extra_args s_in = x.new_ones([x.shape[0]]) denoised_1, denoised_2 = None, None @@ -858,7 +858,7 @@ def sample_dpmpp_3m_sde(model, x, sigmas, extra_args=None, callback=None, disabl def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None): if len(sigmas) <= 1: return x - + extra_args = {} if extra_args is None else extra_args sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_3m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler) @@ -867,7 +867,7 @@ def sample_dpmpp_3m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'): if len(sigmas) <= 1: return x - + extra_args = {} if extra_args is None else extra_args sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_2m_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, solver_type=solver_type) @@ -876,7 +876,7 @@ def sample_dpmpp_2m_sde_gpu(model, x, sigmas, extra_args=None, callback=None, di def sample_dpmpp_sde_gpu(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2): if len(sigmas) <= 1: return x - + extra_args = {} if extra_args is None else extra_args sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=extra_args.get("seed", None), cpu=False) if noise_sampler is None else noise_sampler return sample_dpmpp_sde(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=r) From 9b6cd9b874b7f4ff2e9770f80e84b712fa8f1661 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Wed, 12 Mar 2025 17:29:39 -0400 Subject: [PATCH 39/77] [NodeDef] Add documentation on multi_select input option (#7212) --- comfy/comfy_types/node_typing.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index 4967de716..1b71208d4 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import Literal, TypedDict +from typing_extensions import NotRequired from abc import ABC, abstractmethod from enum import Enum @@ -26,6 +27,7 @@ class IO(StrEnum): BOOLEAN = "BOOLEAN" INT = "INT" FLOAT = "FLOAT" + COMBO = "COMBO" CONDITIONING = "CONDITIONING" SAMPLER = "SAMPLER" SIGMAS = "SIGMAS" @@ -66,6 +68,7 @@ class IO(StrEnum): b = frozenset(value.split(",")) return not (b.issubset(a) or a.issubset(b)) + class RemoteInputOptions(TypedDict): route: str """The route to the remote source.""" @@ -80,6 +83,14 @@ class RemoteInputOptions(TypedDict): refresh: int """The TTL of the remote input's value in milliseconds. Specifies the interval at which the remote input's value is refreshed.""" + +class MultiSelectOptions(TypedDict): + placeholder: NotRequired[str] + """The placeholder text to display in the multi-select widget when no items are selected.""" + chip: NotRequired[bool] + """Specifies whether to use chips instead of comma separated values for the multi-select widget.""" + + class InputTypeOptions(TypedDict): """Provides type hinting for the return type of the INPUT_TYPES node function. @@ -133,9 +144,22 @@ class InputTypeOptions(TypedDict): """Specifies which folder to get preview images from if the input has the ``image_upload`` flag. """ remote: RemoteInputOptions - """Specifies the configuration for a remote input.""" + """Specifies the configuration for a remote input. + Available after ComfyUI frontend v1.9.7 + https://github.com/Comfy-Org/ComfyUI_frontend/pull/2422""" control_after_generate: bool """Specifies whether a control widget should be added to the input, adding options to automatically change the value after each prompt is queued. Currently only used for INT and COMBO types.""" + options: NotRequired[list[str | int | float]] + """COMBO type only. Specifies the selectable options for the combo widget. + Prefer: + ["COMBO", {"options": ["Option 1", "Option 2", "Option 3"]}] + Over: + [["Option 1", "Option 2", "Option 3"]] + """ + multi_select: NotRequired[MultiSelectOptions] + """COMBO type only. Specifies the configuration for a multi-select widget. + Available after ComfyUI frontend v1.13.4 + https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987""" class HiddenInputTypeDict(TypedDict): From 52e566d2bcf3321cce84d3f08a3a39d55bf556cf Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Wed, 12 Mar 2025 17:30:00 -0400 Subject: [PATCH 40/77] Add codeowner for comfy/comfy_types (#7213) --- CODEOWNERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index eeec358de..72a59effe 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -19,5 +19,6 @@ /app/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata /utils/ @yoland68 @robinjhuang @huchenlei @webfiltered @pythongosssss @ltdrdata -# Extra nodes -/comfy_extras/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink +# Node developers +/comfy_extras/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink @webfiltered +/comfy/comfy_types/ @yoland68 @robinjhuang @huchenlei @pythongosssss @ltdrdata @Kosinkadink @webfiltered From 299436cfed82cb6490779fcecba8a72eee5ce39f Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 13 Mar 2025 10:05:15 -0400 Subject: [PATCH 41/77] Print mac version. --- comfy/model_management.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 3a4c93e30..1bb6156d3 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -186,12 +186,21 @@ def get_total_memory(dev=None, torch_total_too=False): else: return mem_total +def mac_version(): + try: + return tuple(int(n) for n in platform.mac_ver()[0].split(".")) + except: + return None + total_vram = get_total_memory(get_torch_device()) / (1024 * 1024) total_ram = psutil.virtual_memory().total / (1024 * 1024) logging.info("Total VRAM {:0.0f} MB, total RAM {:0.0f} MB".format(total_vram, total_ram)) try: logging.info("pytorch version: {}".format(torch_version)) + mac_ver = mac_version() + if mac_ver is not None: + print("Mac Version", mac_ver) except: pass @@ -969,12 +978,6 @@ def pytorch_attention_flash_attention(): return True #if you have pytorch attention enabled on AMD it probably supports at least mem efficient attention return False -def mac_version(): - try: - return tuple(int(n) for n in platform.mac_ver()[0].split(".")) - except: - return None - def force_upcast_attention_dtype(): upcast = args.force_upcast_attention From 35504e2f931c59190d0dd1b4ab2288f1c7f0e9f8 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 13 Mar 2025 15:03:18 -0400 Subject: [PATCH 42/77] Fix. --- comfy/model_management.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 1bb6156d3..b6f4e2d19 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -200,7 +200,7 @@ try: logging.info("pytorch version: {}".format(torch_version)) mac_ver = mac_version() if mac_ver is not None: - print("Mac Version", mac_ver) + logging.info("Mac Version {}".format(mac_ver)) except: pass From 7aceb9f91c1c2b860c1a65ac93a64b3bad575794 Mon Sep 17 00:00:00 2001 From: FeepingCreature <540727+FeepingCreature@users.noreply.github.com> Date: Fri, 14 Mar 2025 08:22:41 +0100 Subject: [PATCH 43/77] Add --use-flash-attention flag. (#7223) * Add --use-flash-attention flag. This is useful on AMD systems, as FA builds are still 10% faster than Pytorch cross-attention. --- comfy/cli_args.py | 1 + comfy/ldm/modules/attention.py | 60 ++++++++++++++++++++++++++++++++++ comfy/model_management.py | 3 ++ 3 files changed, 64 insertions(+) diff --git a/comfy/cli_args.py b/comfy/cli_args.py index a864205be..91c1fe705 100644 --- a/comfy/cli_args.py +++ b/comfy/cli_args.py @@ -106,6 +106,7 @@ attn_group.add_argument("--use-split-cross-attention", action="store_true", help attn_group.add_argument("--use-quad-cross-attention", action="store_true", help="Use the sub-quadratic cross attention optimization . Ignored when xformers is used.") attn_group.add_argument("--use-pytorch-cross-attention", action="store_true", help="Use the new pytorch 2.0 cross attention function.") attn_group.add_argument("--use-sage-attention", action="store_true", help="Use sage attention.") +attn_group.add_argument("--use-flash-attention", action="store_true", help="Use FlashAttention.") parser.add_argument("--disable-xformers", action="store_true", help="Disable xformers.") diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 2758f9508..3e5089a6f 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -24,6 +24,13 @@ if model_management.sage_attention_enabled(): logging.error(f"\n\nTo use the `--use-sage-attention` feature, the `sageattention` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install sageattention") exit(-1) +if model_management.flash_attention_enabled(): + try: + from flash_attn import flash_attn_func + except ModuleNotFoundError: + logging.error(f"\n\nTo use the `--use-flash-attention` feature, the `flash-attn` package must be installed first.\ncommand:\n\t{sys.executable} -m pip install flash-attn") + exit(-1) + from comfy.cli_args import args import comfy.ops ops = comfy.ops.disable_weight_init @@ -496,6 +503,56 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape= return out +@torch.library.custom_op("flash_attention::flash_attn", mutates_args=()) +def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, + dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: + return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal) + + +@flash_attn_wrapper.register_fake +def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False): + # Output shape is the same as q + return q.new_empty(q.shape) + + +def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): + if skip_reshape: + b, _, _, dim_head = q.shape + else: + b, _, dim_head = q.shape + dim_head //= heads + q, k, v = map( + lambda t: t.view(b, -1, heads, dim_head).transpose(1, 2), + (q, k, v), + ) + + if mask is not None: + # add a batch dimension if there isn't already one + if mask.ndim == 2: + mask = mask.unsqueeze(0) + # add a heads dimension if there isn't already one + if mask.ndim == 3: + mask = mask.unsqueeze(1) + + try: + assert mask is None + out = flash_attn_wrapper( + q.transpose(1, 2), + k.transpose(1, 2), + v.transpose(1, 2), + dropout_p=0.0, + causal=False, + ).transpose(1, 2) + except Exception as e: + logging.warning(f"Flash Attention failed, using default SDPA: {e}") + out = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False) + if not skip_output_reshape: + out = ( + out.transpose(1, 2).reshape(b, -1, heads * dim_head) + ) + return out + + optimized_attention = attention_basic if model_management.sage_attention_enabled(): @@ -504,6 +561,9 @@ if model_management.sage_attention_enabled(): elif model_management.xformers_enabled(): logging.info("Using xformers attention") optimized_attention = attention_xformers +elif model_management.flash_attention_enabled(): + logging.info("Using Flash Attention") + optimized_attention = attention_flash elif model_management.pytorch_attention_enabled(): logging.info("Using pytorch attention") optimized_attention = attention_pytorch diff --git a/comfy/model_management.py b/comfy/model_management.py index b6f4e2d19..2a9b022be 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -930,6 +930,9 @@ def cast_to_device(tensor, device, dtype, copy=False): def sage_attention_enabled(): return args.use_sage_attention +def flash_attention_enabled(): + return args.use_flash_attention + def xformers_enabled(): global directml_enabled global cpu_state From 9c98c6358be2c7896de1547490bc87c9ad7a1ecb Mon Sep 17 00:00:00 2001 From: FeepingCreature <540727+FeepingCreature@users.noreply.github.com> Date: Fri, 14 Mar 2025 14:51:26 +0100 Subject: [PATCH 44/77] Tolerate missing `@torch.library.custom_op` (#7234) This can happen on Pytorch versions older than 2.4. --- comfy/ldm/modules/attention.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 3e5089a6f..7908d1313 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -503,16 +503,23 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape= return out -@torch.library.custom_op("flash_attention::flash_attn", mutates_args=()) -def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, - dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: - return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal) +try: + @torch.library.custom_op("flash_attention::flash_attn", mutates_args=()) + def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, + dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: + return flash_attn_func(q, k, v, dropout_p=dropout_p, causal=causal) -@flash_attn_wrapper.register_fake -def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False): - # Output shape is the same as q - return q.new_empty(q.shape) + @flash_attn_wrapper.register_fake + def flash_attn_fake(q, k, v, dropout_p=0.0, causal=False): + # Output shape is the same as q + return q.new_empty(q.shape) +except AttributeError as error: + FLASH_ATTN_ERROR = error + + def flash_attn_wrapper(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, + dropout_p: float = 0.0, causal: bool = False) -> torch.Tensor: + assert False, f"Could not define flash_attn_wrapper: {FLASH_ATTN_ERROR}" def attention_flash(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): From 6a0daa79b6a8ed99b6859fb1c143081eef9e7aa0 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 14 Mar 2025 10:55:19 -0400 Subject: [PATCH 45/77] Make the SkipLayerGuidanceDIT node work on WAN. --- comfy/ldm/wan/model.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index e78d846b2..9966b20a1 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -384,6 +384,7 @@ class WanModel(torch.nn.Module): context, clip_fea=None, freqs=None, + transformer_options={}, ): r""" Forward pass through the diffusion model @@ -429,8 +430,18 @@ class WanModel(torch.nn.Module): freqs=freqs, context=context) - for block in self.blocks: - x = block(x, **kwargs) + patches_replace = transformer_options.get("patches_replace", {}) + blocks_replace = patches_replace.get("dit", {}) + for i, block in enumerate(self.blocks): + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"]) + return out + out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap}) + x = out["img"] + else: + x = block(x, e=e0, freqs=freqs, context=context) # head x = self.head(x, e) @@ -439,7 +450,7 @@ class WanModel(torch.nn.Module): x = self.unpatchify(x, grid_sizes) return x - def forward(self, x, timestep, context, clip_fea=None, **kwargs): + def forward(self, x, timestep, context, clip_fea=None, transformer_options={},**kwargs): bs, c, t, h, w = x.shape x = comfy.ldm.common_dit.pad_to_patch_size(x, self.patch_size) patch_size = self.patch_size @@ -453,7 +464,7 @@ class WanModel(torch.nn.Module): img_ids = repeat(img_ids, "t h w c -> b (t h w) c", b=bs) freqs = self.rope_embedder(img_ids).movedim(1, 2) - return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs)[:, :, :t, :h, :w] + return self.forward_orig(x, timestep, context, clip_fea=clip_fea, freqs=freqs, transformer_options=transformer_options)[:, :, :t, :h, :w] def unpatchify(self, x, grid_sizes): r""" From a2448fc52701651d183e35fbb37924b4441f7a98 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 14 Mar 2025 18:10:37 -0400 Subject: [PATCH 46/77] Remove useless code. --- comfy/ldm/wan/model.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/comfy/ldm/wan/model.py b/comfy/ldm/wan/model.py index 9966b20a1..9b5e5332c 100644 --- a/comfy/ldm/wan/model.py +++ b/comfy/ldm/wan/model.py @@ -424,12 +424,6 @@ class WanModel(torch.nn.Module): context_clip = self.img_emb(clip_fea) # bs x 257 x dim context = torch.concat([context_clip, context], dim=1) - # arguments - kwargs = dict( - e=e0, - freqs=freqs, - context=context) - patches_replace = transformer_options.get("patches_replace", {}) blocks_replace = patches_replace.get("dit", {}) for i, block in enumerate(self.blocks): From c624c29d6685377faa298d4151af09e433cea875 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 14 Mar 2025 18:17:26 -0400 Subject: [PATCH 47/77] Update frontend to 1.12.9 (#7236) * Update frontend to 1.12.9 * Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e1316ccff..771e53c20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.11.8 +comfyui-frontend-package==1.12.11 torch torchsde torchvision From 7ebd8087ffb9c713d308ff74f1bd14f07d569bed Mon Sep 17 00:00:00 2001 From: Christian Byrne Date: Fri, 14 Mar 2025 22:38:10 -0700 Subject: [PATCH 48/77] hotfix fe (#7244) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 771e53c20..70689bc99 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.12.11 +comfyui-frontend-package==1.12.14 torch torchsde torchvision From 3c3988df45826808210b9964dbaf85055f80e695 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 15 Mar 2025 08:26:36 -0400 Subject: [PATCH 49/77] Show a better error message if the VAE is invalid. --- comfy/sd.py | 8 ++++++++ nodes.py | 1 + 2 files changed, 9 insertions(+) diff --git a/comfy/sd.py b/comfy/sd.py index fd98585a1..51fe425aa 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -440,6 +440,10 @@ class VAE: self.patcher = comfy.model_patcher.ModelPatcher(self.first_stage_model, load_device=self.device, offload_device=offload_device) logging.info("VAE load device: {}, offload device: {}, dtype: {}".format(self.device, offload_device, self.vae_dtype)) + def throw_exception_if_invalid(self): + if self.first_stage_model is None: + raise RuntimeError("ERROR: VAE is invalid: None\n\nIf the VAE is from a checkpoint loader node your checkpoint does not contain a valid VAE.") + def vae_encode_crop_pixels(self, pixels): downscale_ratio = self.spacial_compression_encode() @@ -495,6 +499,7 @@ class VAE: return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.downscale_ratio, out_channels=self.latent_channels, downscale=True, index_formulas=self.downscale_index_formula, output_device=self.output_device) def decode(self, samples_in): + self.throw_exception_if_invalid() pixel_samples = None try: memory_used = self.memory_used_decode(samples_in.shape, self.vae_dtype) @@ -525,6 +530,7 @@ class VAE: return pixel_samples def decode_tiled(self, samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None): + self.throw_exception_if_invalid() memory_used = self.memory_used_decode(samples.shape, self.vae_dtype) #TODO: calculate mem required for tile model_management.load_models_gpu([self.patcher], memory_required=memory_used) dims = samples.ndim - 2 @@ -553,6 +559,7 @@ class VAE: return output.movedim(1, -1) def encode(self, pixel_samples): + self.throw_exception_if_invalid() pixel_samples = self.vae_encode_crop_pixels(pixel_samples) pixel_samples = pixel_samples.movedim(-1, 1) if self.latent_dim == 3 and pixel_samples.ndim < 5: @@ -585,6 +592,7 @@ class VAE: return samples def encode_tiled(self, pixel_samples, tile_x=None, tile_y=None, overlap=None, tile_t=None, overlap_t=None): + self.throw_exception_if_invalid() pixel_samples = self.vae_encode_crop_pixels(pixel_samples) dims = self.latent_dim pixel_samples = pixel_samples.movedim(-1, 1) diff --git a/nodes.py b/nodes.py index 63791e208..71d1b8dd7 100644 --- a/nodes.py +++ b/nodes.py @@ -770,6 +770,7 @@ class VAELoader: vae_path = folder_paths.get_full_path_or_raise("vae", vae_name) sd = comfy.utils.load_torch_file(vae_path) vae = comfy.sd.VAE(sd=sd) + vae.throw_exception_if_invalid() return (vae,) class ControlNetLoader: From 55a1b09ddc9f81b6406710e69df3ec2eaa4880ac Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 15 Mar 2025 08:27:49 -0400 Subject: [PATCH 50/77] Allow loading diffusion model files with the "Load Checkpoint" node. --- comfy/sd.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/comfy/sd.py b/comfy/sd.py index 51fe425aa..3d72a04d6 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -907,7 +907,12 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c model_config = model_detection.model_config_from_unet(sd, diffusion_model_prefix, metadata=metadata) if model_config is None: - return None + logging.warning("Warning, This is not a checkpoint file, trying to load it as a diffusion model only.") + diffusion_model = load_diffusion_model_state_dict(sd, model_options={}) + if diffusion_model is None: + return None + return (diffusion_model, None, VAE(sd={}), None) # The VAE object is there to throw an exception if it's actually used' + unet_weight_dtype = list(model_config.supported_inference_dtypes) if model_config.scaled_fp8 is not None: From fd5297131f81d03966adf3f2250d4502f34a8828 Mon Sep 17 00:00:00 2001 From: chaObserv <154517000+chaObserv@users.noreply.github.com> Date: Sun, 16 Mar 2025 18:02:25 +0800 Subject: [PATCH 51/77] Guard the edge cases of noise term in er_sde (#7265) --- comfy/k_diffusion/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index a28a30ac2..5b8d8000d 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1419,6 +1419,6 @@ def sample_er_sde(model, x, sigmas, extra_args=None, callback=None, disable=None old_denoised_d = denoised_d if s_noise != 0 and sigmas[i + 1] > 0: - x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt() + x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * (sigmas[i + 1] ** 2 - sigmas[i] ** 2 * r ** 2).sqrt().nan_to_num(nan=0.0) old_denoised = denoised return x From 2e24a15905122b4f310ac590265cea83aac96b15 Mon Sep 17 00:00:00 2001 From: Jedrzej Kosinski Date: Sun, 16 Mar 2025 05:02:45 -0500 Subject: [PATCH 52/77] Call unpatch_hooks at the start of ModelPatcher.partially_unload (#7253) * Call unpatch_hooks at the start of ModelPatcher.partially_unload * Only call unpatch_hooks in partially_unload if lowvram is possible --- comfy/model_patcher.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index e291158ce..b7cb12dfc 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -747,6 +747,7 @@ class ModelPatcher: def partially_unload(self, device_to, memory_to_free=0): with self.use_ejected(): + hooks_unpatched = False memory_freed = 0 patch_counter = 0 unload_list = self._load_list() @@ -770,6 +771,10 @@ class ModelPatcher: move_weight = False break + if not hooks_unpatched: + self.unpatch_hooks() + hooks_unpatched = True + if bk.inplace_update: comfy.utils.copy_to_param(self.model, key, bk.weight) else: From e8e990d6b8b5c813c87d1aeaed3e5110c7aba166 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 16 Mar 2025 06:29:12 -0400 Subject: [PATCH 53/77] Cleanup code. --- comfy/ldm/flux/math.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py index 36b67931c..c0cbd2914 100644 --- a/comfy/ldm/flux/math.py +++ b/comfy/ldm/flux/math.py @@ -10,8 +10,8 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor: q_shape = q.shape k_shape = k.shape - q = q.float().reshape(*q.shape[:-1], -1, 1, 2) - k = k.float().reshape(*k.shape[:-1], -1, 1, 2) + q = q.to(dtype=pe.dtype).reshape(*q.shape[:-1], -1, 1, 2) + k = k.to(dtype=pe.dtype).reshape(*k.shape[:-1], -1, 1, 2) q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v) k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v) @@ -36,8 +36,8 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor: def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): - xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) - xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) + xq_ = xq.to(dtype=freqs_cis.dtype).reshape(*xq.shape[:-1], -1, 1, 2) + xk_ = xk.to(dtype=freqs_cis.dtype).reshape(*xk.shape[:-1], -1, 1, 2) xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) From 6dc7b0bfe3cd44302444f0f34db0e62b86764482 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Mar 2025 05:53:54 -0400 Subject: [PATCH 54/77] Add support for giant dinov2 image encoder. --- comfy/clip_vision.py | 11 +- comfy/image_encoders/dino2.py | 141 ++++++++++++++++++++++++++ comfy/image_encoders/dino2_giant.json | 21 ++++ 3 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 comfy/image_encoders/dino2.py create mode 100644 comfy/image_encoders/dino2_giant.json diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index 297b3bca3..25baf5ca8 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -9,6 +9,7 @@ import comfy.model_patcher import comfy.model_management import comfy.utils import comfy.clip_model +import comfy.image_encoders.dino2 class Output: def __getitem__(self, key): @@ -34,6 +35,11 @@ def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], s image = torch.clip((255. * image), 0, 255).round() / 255.0 return (image - mean.view([3,1,1])) / std.view([3,1,1]) +IMAGE_ENCODERS = { + "clip_vision": comfy.clip_model.CLIPVisionModelProjection, + "dinov2": comfy.image_encoders.dino2.Dinov2Model, +} + class ClipVisionModel(): def __init__(self, json_config): with open(json_config) as f: @@ -42,10 +48,11 @@ class ClipVisionModel(): self.image_size = config.get("image_size", 224) self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073]) self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711]) + model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision")) self.load_device = comfy.model_management.text_encoder_device() offload_device = comfy.model_management.text_encoder_offload_device() self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) - self.model = comfy.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, comfy.ops.manual_cast) + self.model = model_class(config, self.dtype, offload_device, comfy.ops.manual_cast) self.model.eval() self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) @@ -111,6 +118,8 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False): json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json") else: json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json") + elif "embeddings.patch_embeddings.projection.weight" in sd: + json_config = os.path.join(os.path.join(os.path.dirname(os.path.realpath(__file__)), "image_encoders"), "dino2_giant.json") else: return None diff --git a/comfy/image_encoders/dino2.py b/comfy/image_encoders/dino2.py new file mode 100644 index 000000000..130ed6fd7 --- /dev/null +++ b/comfy/image_encoders/dino2.py @@ -0,0 +1,141 @@ +import torch +from comfy.text_encoders.bert import BertAttention +import comfy.model_management +from comfy.ldm.modules.attention import optimized_attention_for_device + + +class Dino2AttentionOutput(torch.nn.Module): + def __init__(self, input_dim, output_dim, layer_norm_eps, dtype, device, operations): + super().__init__() + self.dense = operations.Linear(input_dim, output_dim, dtype=dtype, device=device) + + def forward(self, x): + return self.dense(x) + + +class Dino2AttentionBlock(torch.nn.Module): + def __init__(self, embed_dim, heads, layer_norm_eps, dtype, device, operations): + super().__init__() + self.attention = BertAttention(embed_dim, heads, dtype, device, operations) + self.output = Dino2AttentionOutput(embed_dim, embed_dim, layer_norm_eps, dtype, device, operations) + + def forward(self, x, mask, optimized_attention): + return self.output(self.attention(x, mask, optimized_attention)) + + +class LayerScale(torch.nn.Module): + def __init__(self, dim, dtype, device, operations): + super().__init__() + self.lambda1 = torch.nn.Parameter(torch.empty(dim, device=device, dtype=dtype)) + + def forward(self, x): + return x * comfy.model_management.cast_to_device(self.lambda1, x.device, x.dtype) + + +class SwiGLUFFN(torch.nn.Module): + def __init__(self, dim, dtype, device, operations): + super().__init__() + in_features = out_features = dim + hidden_features = int(dim * 4) + hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8 + + self.weights_in = operations.Linear(in_features, 2 * hidden_features, bias=True, device=device, dtype=dtype) + self.weights_out = operations.Linear(hidden_features, out_features, bias=True, device=device, dtype=dtype) + + def forward(self, x): + x = self.weights_in(x) + x1, x2 = x.chunk(2, dim=-1) + x = torch.nn.functional.silu(x1) * x2 + return self.weights_out(x) + + +class Dino2Block(torch.nn.Module): + def __init__(self, dim, num_heads, layer_norm_eps, dtype, device, operations): + super().__init__() + self.attention = Dino2AttentionBlock(dim, num_heads, layer_norm_eps, dtype, device, operations) + self.layer_scale1 = LayerScale(dim, dtype, device, operations) + self.layer_scale2 = LayerScale(dim, dtype, device, operations) + self.mlp = SwiGLUFFN(dim, dtype, device, operations) + self.norm1 = operations.LayerNorm(dim, eps=layer_norm_eps, dtype=dtype, device=device) + self.norm2 = operations.LayerNorm(dim, eps=layer_norm_eps, dtype=dtype, device=device) + + def forward(self, x, optimized_attention): + x = x + self.layer_scale1(self.attention(self.norm1(x), None, optimized_attention)) + x = x + self.layer_scale2(self.mlp(self.norm2(x))) + return x + + +class Dino2Encoder(torch.nn.Module): + def __init__(self, dim, num_heads, layer_norm_eps, num_layers, dtype, device, operations): + super().__init__() + self.layer = torch.nn.ModuleList([Dino2Block(dim, num_heads, layer_norm_eps, dtype, device, operations) for _ in range(num_layers)]) + + def forward(self, x, intermediate_output=None): + optimized_attention = optimized_attention_for_device(x.device, False, small_input=True) + + if intermediate_output is not None: + if intermediate_output < 0: + intermediate_output = len(self.layer) + intermediate_output + + intermediate = None + for i, l in enumerate(self.layer): + x = l(x, optimized_attention) + if i == intermediate_output: + intermediate = x.clone() + return x, intermediate + + +class Dino2PatchEmbeddings(torch.nn.Module): + def __init__(self, dim, num_channels=3, patch_size=14, image_size=518, dtype=None, device=None, operations=None): + super().__init__() + self.projection = operations.Conv2d( + in_channels=num_channels, + out_channels=dim, + kernel_size=patch_size, + stride=patch_size, + bias=True, + dtype=dtype, + device=device + ) + + def forward(self, pixel_values): + return self.projection(pixel_values).flatten(2).transpose(1, 2) + + +class Dino2Embeddings(torch.nn.Module): + def __init__(self, dim, dtype, device, operations): + super().__init__() + patch_size = 14 + image_size = 518 + + self.patch_embeddings = Dino2PatchEmbeddings(dim, patch_size=patch_size, image_size=image_size, dtype=dtype, device=device, operations=operations) + self.position_embeddings = torch.nn.Parameter(torch.empty(1, (image_size // patch_size) ** 2 + 1, dim, dtype=dtype, device=device)) + self.cls_token = torch.nn.Parameter(torch.empty(1, 1, dim, dtype=dtype, device=device)) + self.mask_token = torch.nn.Parameter(torch.empty(1, dim, dtype=dtype, device=device)) + + def forward(self, pixel_values): + x = self.patch_embeddings(pixel_values) + # TODO: mask_token? + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + comfy.model_management.cast_to_device(self.position_embeddings, x.device, x.dtype) + return x + + +class Dinov2Model(torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + num_layers = config_dict["num_hidden_layers"] + dim = config_dict["hidden_size"] + heads = config_dict["num_attention_heads"] + layer_norm_eps = config_dict["layer_norm_eps"] + + self.embeddings = Dino2Embeddings(dim, dtype, device, operations) + self.encoder = Dino2Encoder(dim, heads, layer_norm_eps, num_layers, dtype, device, operations) + self.layernorm = operations.LayerNorm(dim, eps=layer_norm_eps, dtype=dtype, device=device) + + def forward(self, pixel_values, attention_mask=None, intermediate_output=None): + x = self.embeddings(pixel_values) + x, i = self.encoder(x, intermediate_output=intermediate_output) + x = self.layernorm(x) + pooled_output = x[:, 0, :] + return x, i, pooled_output, None diff --git a/comfy/image_encoders/dino2_giant.json b/comfy/image_encoders/dino2_giant.json new file mode 100644 index 000000000..f6076a4dc --- /dev/null +++ b/comfy/image_encoders/dino2_giant.json @@ -0,0 +1,21 @@ +{ + "attention_probs_dropout_prob": 0.0, + "drop_path_rate": 0.0, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_size": 1536, + "image_size": 518, + "initializer_range": 0.02, + "layer_norm_eps": 1e-06, + "layerscale_value": 1.0, + "mlp_ratio": 4, + "model_type": "dinov2", + "num_attention_heads": 24, + "num_channels": 3, + "num_hidden_layers": 40, + "patch_size": 14, + "qkv_bias": true, + "use_swiglu_ffn": true, + "image_mean": [0.485, 0.456, 0.406], + "image_std": [0.229, 0.224, 0.225] +} From 50614f1b7933244c01d85880c41b50bbd0c4de8b Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 17 Mar 2025 13:56:11 -0400 Subject: [PATCH 55/77] Fix regression with clip vision. --- comfy/clip_vision.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comfy/clip_vision.py b/comfy/clip_vision.py index 25baf5ca8..87d32a66e 100644 --- a/comfy/clip_vision.py +++ b/comfy/clip_vision.py @@ -36,7 +36,8 @@ def clip_preprocess(image, size=224, mean=[0.48145466, 0.4578275, 0.40821073], s return (image - mean.view([3,1,1])) / std.view([3,1,1]) IMAGE_ENCODERS = { - "clip_vision": comfy.clip_model.CLIPVisionModelProjection, + "clip_vision_model": comfy.clip_model.CLIPVisionModelProjection, + "siglip_vision_model": comfy.clip_model.CLIPVisionModelProjection, "dinov2": comfy.image_encoders.dino2.Dinov2Model, } @@ -48,7 +49,7 @@ class ClipVisionModel(): self.image_size = config.get("image_size", 224) self.image_mean = config.get("image_mean", [0.48145466, 0.4578275, 0.40821073]) self.image_std = config.get("image_std", [0.26862954, 0.26130258, 0.27577711]) - model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision")) + model_class = IMAGE_ENCODERS.get(config.get("model_type", "clip_vision_model")) self.load_device = comfy.model_management.text_encoder_device() offload_device = comfy.model_management.text_encoder_offload_device() self.dtype = comfy.model_management.text_encoder_dtype(self.load_device) From 3b19fc76e34692d779ceffe233e0a952cbcd20ab Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 18 Mar 2025 05:09:25 -0400 Subject: [PATCH 56/77] Allow disabling pe in flux code for some other models. --- comfy/ldm/flux/math.py | 9 +++++---- comfy/ldm/flux/model.py | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py index c0cbd2914..3e0978176 100644 --- a/comfy/ldm/flux/math.py +++ b/comfy/ldm/flux/math.py @@ -10,10 +10,11 @@ def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor, mask=None) -> Tensor: q_shape = q.shape k_shape = k.shape - q = q.to(dtype=pe.dtype).reshape(*q.shape[:-1], -1, 1, 2) - k = k.to(dtype=pe.dtype).reshape(*k.shape[:-1], -1, 1, 2) - q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v) - k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v) + if pe is not None: + q = q.to(dtype=pe.dtype).reshape(*q.shape[:-1], -1, 1, 2) + k = k.to(dtype=pe.dtype).reshape(*k.shape[:-1], -1, 1, 2) + q = (pe[..., 0] * q[..., 0] + pe[..., 1] * q[..., 1]).reshape(*q_shape).type_as(v) + k = (pe[..., 0] * k[..., 0] + pe[..., 1] * k[..., 1]).reshape(*k_shape).type_as(v) heads = q.shape[1] x = optimized_attention(q, k, v, heads, skip_reshape=True, mask=mask) diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py index cc34f7585..ef4ba4106 100644 --- a/comfy/ldm/flux/model.py +++ b/comfy/ldm/flux/model.py @@ -115,8 +115,11 @@ class Flux(nn.Module): vec = vec + self.vector_in(y[:,:self.params.vec_in_dim]) txt = self.txt_in(txt) - ids = torch.cat((txt_ids, img_ids), dim=1) - pe = self.pe_embedder(ids) + if img_ids is not None: + ids = torch.cat((txt_ids, img_ids), dim=1) + pe = self.pe_embedder(ids) + else: + pe = None blocks_replace = patches_replace.get("dit", {}) for i, block in enumerate(self.double_blocks): From 11f1b41bab62ece770aa1d3aacc59a450e277b41 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 19 Mar 2025 16:19:50 -0400 Subject: [PATCH 57/77] Initial Hunyuan3Dv2 implementation. Supports the multiview, mini, turbo models and VAEs. --- comfy/latent_formats.py | 10 + comfy/ldm/hunyuan3d/model.py | 135 ++++++++ comfy/ldm/hunyuan3d/vae.py | 587 ++++++++++++++++++++++++++++++++ comfy/model_base.py | 16 + comfy/model_detection.py | 17 +- comfy/sd.py | 15 +- comfy/supported_models.py | 38 ++- comfy_extras/nodes_hunyuan3d.py | 410 ++++++++++++++++++++++ nodes.py | 1 + 9 files changed, 1225 insertions(+), 4 deletions(-) create mode 100644 comfy/ldm/hunyuan3d/model.py create mode 100644 comfy/ldm/hunyuan3d/vae.py create mode 100644 comfy_extras/nodes_hunyuan3d.py diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 622c1df54..556c39512 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -456,3 +456,13 @@ class Wan21(LatentFormat): latents_mean = self.latents_mean.to(latent.device, latent.dtype) latents_std = self.latents_std.to(latent.device, latent.dtype) return latent * latents_std / self.scale_factor + latents_mean + +class Hunyuan3Dv2(LatentFormat): + latent_channels = 64 + latent_dimensions = 1 + scale_factor = 0.9990943042622529 + +class Hunyuan3Dv2mini(LatentFormat): + latent_channels = 64 + latent_dimensions = 1 + scale_factor = 1.0188137142395404 diff --git a/comfy/ldm/hunyuan3d/model.py b/comfy/ldm/hunyuan3d/model.py new file mode 100644 index 000000000..4e18358f0 --- /dev/null +++ b/comfy/ldm/hunyuan3d/model.py @@ -0,0 +1,135 @@ +import torch +from torch import nn +from comfy.ldm.flux.layers import ( + DoubleStreamBlock, + LastLayer, + MLPEmbedder, + SingleStreamBlock, + timestep_embedding, +) + + +class Hunyuan3Dv2(nn.Module): + def __init__( + self, + in_channels=64, + context_in_dim=1536, + hidden_size=1024, + mlp_ratio=4.0, + num_heads=16, + depth=16, + depth_single_blocks=32, + qkv_bias=True, + guidance_embed=False, + image_model=None, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.dtype = dtype + + if hidden_size % num_heads != 0: + raise ValueError( + f"Hidden size {hidden_size} must be divisible by num_heads {num_heads}" + ) + + self.max_period = 1000 # While reimplementing the model I noticed that they messed up. This 1000 value was meant to be the time_factor but they set the max_period instead + self.latent_in = operations.Linear(in_channels, hidden_size, bias=True, dtype=dtype, device=device) + self.time_in = MLPEmbedder(in_dim=256, hidden_dim=hidden_size, dtype=dtype, device=device, operations=operations) + self.guidance_in = ( + MLPEmbedder(in_dim=256, hidden_dim=hidden_size, dtype=dtype, device=device, operations=operations) if guidance_embed else None + ) + self.cond_in = operations.Linear(context_in_dim, hidden_size, dtype=dtype, device=device) + self.double_blocks = nn.ModuleList( + [ + DoubleStreamBlock( + hidden_size, + num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + dtype=dtype, device=device, operations=operations + ) + for _ in range(depth) + ] + ) + self.single_blocks = nn.ModuleList( + [ + SingleStreamBlock( + hidden_size, + num_heads, + mlp_ratio=mlp_ratio, + dtype=dtype, device=device, operations=operations + ) + for _ in range(depth_single_blocks) + ] + ) + self.final_layer = LastLayer(hidden_size, 1, in_channels, dtype=dtype, device=device, operations=operations) + + def forward(self, x, timestep, context, guidance=None, transformer_options={}, **kwargs): + x = x.movedim(-1, -2) + timestep = 1.0 - timestep + txt = context + img = self.latent_in(x) + + vec = self.time_in(timestep_embedding(timestep, 256, self.max_period).to(dtype=img.dtype)) + if self.guidance_in is not None: + if guidance is not None: + vec = vec + self.guidance_in(timestep_embedding(guidance, 256, self.max_period).to(img.dtype)) + + txt = self.cond_in(txt) + pe = None + attn_mask = None + + patches_replace = transformer_options.get("patches_replace", {}) + blocks_replace = patches_replace.get("dit", {}) + for i, block in enumerate(self.double_blocks): + if ("double_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"], out["txt"] = block(img=args["img"], + txt=args["txt"], + vec=args["vec"], + pe=args["pe"], + attn_mask=args.get("attn_mask")) + return out + + out = blocks_replace[("double_block", i)]({"img": img, + "txt": txt, + "vec": vec, + "pe": pe, + "attn_mask": attn_mask}, + {"original_block": block_wrap}) + txt = out["txt"] + img = out["img"] + else: + img, txt = block(img=img, + txt=txt, + vec=vec, + pe=pe, + attn_mask=attn_mask) + + img = torch.cat((txt, img), 1) + + for i, block in enumerate(self.single_blocks): + if ("single_block", i) in blocks_replace: + def block_wrap(args): + out = {} + out["img"] = block(args["img"], + vec=args["vec"], + pe=args["pe"], + attn_mask=args.get("attn_mask")) + return out + + out = blocks_replace[("single_block", i)]({"img": img, + "vec": vec, + "pe": pe, + "attn_mask": attn_mask}, + {"original_block": block_wrap}) + img = out["img"] + else: + img = block(img, vec=vec, pe=pe, attn_mask=attn_mask) + + img = img[:, txt.shape[1]:, ...] + img = self.final_layer(img, vec) + return img.movedim(-2, -1) * (-1.0) diff --git a/comfy/ldm/hunyuan3d/vae.py b/comfy/ldm/hunyuan3d/vae.py new file mode 100644 index 000000000..311c9b416 --- /dev/null +++ b/comfy/ldm/hunyuan3d/vae.py @@ -0,0 +1,587 @@ +# Original: https://github.com/Tencent/Hunyuan3D-2/blob/main/hy3dgen/shapegen/models/autoencoders/model.py +# Since the header on their VAE source file was a bit confusing we asked for permission to use this code from tencent under the GPL license used in ComfyUI. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +from typing import Union, Tuple, List, Callable, Optional + +import numpy as np +from einops import repeat, rearrange +from tqdm import tqdm +import logging + +import comfy.ops +ops = comfy.ops.disable_weight_init + +def generate_dense_grid_points( + bbox_min: np.ndarray, + bbox_max: np.ndarray, + octree_resolution: int, + indexing: str = "ij", +): + length = bbox_max - bbox_min + num_cells = octree_resolution + + x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32) + y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32) + z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32) + [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing) + xyz = np.stack((xs, ys, zs), axis=-1) + grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1] + + return xyz, grid_size, length + + +class VanillaVolumeDecoder: + @torch.no_grad() + def __call__( + self, + latents: torch.FloatTensor, + geo_decoder: Callable, + bounds: Union[Tuple[float], List[float], float] = 1.01, + num_chunks: int = 10000, + octree_resolution: int = None, + enable_pbar: bool = True, + **kwargs, + ): + device = latents.device + dtype = latents.dtype + batch_size = latents.shape[0] + + # 1. generate query points + if isinstance(bounds, float): + bounds = [-bounds, -bounds, -bounds, bounds, bounds, bounds] + + bbox_min, bbox_max = np.array(bounds[0:3]), np.array(bounds[3:6]) + xyz_samples, grid_size, length = generate_dense_grid_points( + bbox_min=bbox_min, + bbox_max=bbox_max, + octree_resolution=octree_resolution, + indexing="ij" + ) + xyz_samples = torch.from_numpy(xyz_samples).to(device, dtype=dtype).contiguous().reshape(-1, 3) + + # 2. latents to 3d volume + batch_logits = [] + for start in tqdm(range(0, xyz_samples.shape[0], num_chunks), desc="Volume Decoding", + disable=not enable_pbar): + chunk_queries = xyz_samples[start: start + num_chunks, :] + chunk_queries = repeat(chunk_queries, "p c -> b p c", b=batch_size) + logits = geo_decoder(queries=chunk_queries, latents=latents) + batch_logits.append(logits) + + grid_logits = torch.cat(batch_logits, dim=1) + grid_logits = grid_logits.view((batch_size, *grid_size)).float() + + return grid_logits + + +class FourierEmbedder(nn.Module): + """The sin/cosine positional embedding. Given an input tensor `x` of shape [n_batch, ..., c_dim], it converts + each feature dimension of `x[..., i]` into: + [ + sin(x[..., i]), + sin(f_1*x[..., i]), + sin(f_2*x[..., i]), + ... + sin(f_N * x[..., i]), + cos(x[..., i]), + cos(f_1*x[..., i]), + cos(f_2*x[..., i]), + ... + cos(f_N * x[..., i]), + x[..., i] # only present if include_input is True. + ], here f_i is the frequency. + + Denote the space is [0 / num_freqs, 1 / num_freqs, 2 / num_freqs, 3 / num_freqs, ..., (num_freqs - 1) / num_freqs]. + If logspace is True, then the frequency f_i is [2^(0 / num_freqs), ..., 2^(i / num_freqs), ...]; + Otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)]. + + Args: + num_freqs (int): the number of frequencies, default is 6; + logspace (bool): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...], + otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)]; + input_dim (int): the input dimension, default is 3; + include_input (bool): include the input tensor or not, default is True. + + Attributes: + frequencies (torch.Tensor): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...], + otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1); + + out_dim (int): the embedding size, if include_input is True, it is input_dim * (num_freqs * 2 + 1), + otherwise, it is input_dim * num_freqs * 2. + + """ + + def __init__(self, + num_freqs: int = 6, + logspace: bool = True, + input_dim: int = 3, + include_input: bool = True, + include_pi: bool = True) -> None: + + """The initialization""" + + super().__init__() + + if logspace: + frequencies = 2.0 ** torch.arange( + num_freqs, + dtype=torch.float32 + ) + else: + frequencies = torch.linspace( + 1.0, + 2.0 ** (num_freqs - 1), + num_freqs, + dtype=torch.float32 + ) + + if include_pi: + frequencies *= torch.pi + + self.register_buffer("frequencies", frequencies, persistent=False) + self.include_input = include_input + self.num_freqs = num_freqs + + self.out_dim = self.get_dims(input_dim) + + def get_dims(self, input_dim): + temp = 1 if self.include_input or self.num_freqs == 0 else 0 + out_dim = input_dim * (self.num_freqs * 2 + temp) + + return out_dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ Forward process. + + Args: + x: tensor of shape [..., dim] + + Returns: + embedding: an embedding of `x` of shape [..., dim * (num_freqs * 2 + temp)] + where temp is 1 if include_input is True and 0 otherwise. + """ + + if self.num_freqs > 0: + embed = (x[..., None].contiguous() * self.frequencies.to(device=x.device, dtype=x.dtype)).view(*x.shape[:-1], -1) + if self.include_input: + return torch.cat((x, embed.sin(), embed.cos()), dim=-1) + else: + return torch.cat((embed.sin(), embed.cos()), dim=-1) + else: + return x + + +class CrossAttentionProcessor: + def __call__(self, attn, q, k, v): + out = F.scaled_dot_product_attention(q, k, v) + return out + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for + changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + + """ + if self.drop_prob == 0. or not self.training: + return x + keep_prob = 1 - self.drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and self.scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + def extra_repr(self): + return f'drop_prob={round(self.drop_prob, 3):0.3f}' + + +class MLP(nn.Module): + def __init__( + self, *, + width: int, + expand_ratio: int = 4, + output_width: int = None, + drop_path_rate: float = 0.0 + ): + super().__init__() + self.width = width + self.c_fc = ops.Linear(width, width * expand_ratio) + self.c_proj = ops.Linear(width * expand_ratio, output_width if output_width is not None else width) + self.gelu = nn.GELU() + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x): + return self.drop_path(self.c_proj(self.gelu(self.c_fc(x)))) + + +class QKVMultiheadCrossAttention(nn.Module): + def __init__( + self, + *, + heads: int, + width=None, + qk_norm=False, + norm_layer=ops.LayerNorm + ): + super().__init__() + self.heads = heads + self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() + self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() + + self.attn_processor = CrossAttentionProcessor() + + def forward(self, q, kv): + _, n_ctx, _ = q.shape + bs, n_data, width = kv.shape + attn_ch = width // self.heads // 2 + q = q.view(bs, n_ctx, self.heads, -1) + kv = kv.view(bs, n_data, self.heads, -1) + k, v = torch.split(kv, attn_ch, dim=-1) + + q = self.q_norm(q) + k = self.k_norm(k) + q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v)) + out = self.attn_processor(self, q, k, v) + out = out.transpose(1, 2).reshape(bs, n_ctx, -1) + return out + + +class MultiheadCrossAttention(nn.Module): + def __init__( + self, + *, + width: int, + heads: int, + qkv_bias: bool = True, + data_width: Optional[int] = None, + norm_layer=ops.LayerNorm, + qk_norm: bool = False, + kv_cache: bool = False, + ): + super().__init__() + self.width = width + self.heads = heads + self.data_width = width if data_width is None else data_width + self.c_q = ops.Linear(width, width, bias=qkv_bias) + self.c_kv = ops.Linear(self.data_width, width * 2, bias=qkv_bias) + self.c_proj = ops.Linear(width, width) + self.attention = QKVMultiheadCrossAttention( + heads=heads, + width=width, + norm_layer=norm_layer, + qk_norm=qk_norm + ) + self.kv_cache = kv_cache + self.data = None + + def forward(self, x, data): + x = self.c_q(x) + if self.kv_cache: + if self.data is None: + self.data = self.c_kv(data) + logging.info('Save kv cache,this should be called only once for one mesh') + data = self.data + else: + data = self.c_kv(data) + x = self.attention(x, data) + x = self.c_proj(x) + return x + + +class ResidualCrossAttentionBlock(nn.Module): + def __init__( + self, + *, + width: int, + heads: int, + mlp_expand_ratio: int = 4, + data_width: Optional[int] = None, + qkv_bias: bool = True, + norm_layer=ops.LayerNorm, + qk_norm: bool = False + ): + super().__init__() + + if data_width is None: + data_width = width + + self.attn = MultiheadCrossAttention( + width=width, + heads=heads, + data_width=data_width, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + qk_norm=qk_norm + ) + self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6) + self.ln_2 = norm_layer(data_width, elementwise_affine=True, eps=1e-6) + self.ln_3 = norm_layer(width, elementwise_affine=True, eps=1e-6) + self.mlp = MLP(width=width, expand_ratio=mlp_expand_ratio) + + def forward(self, x: torch.Tensor, data: torch.Tensor): + x = x + self.attn(self.ln_1(x), self.ln_2(data)) + x = x + self.mlp(self.ln_3(x)) + return x + + +class QKVMultiheadAttention(nn.Module): + def __init__( + self, + *, + heads: int, + width=None, + qk_norm=False, + norm_layer=ops.LayerNorm + ): + super().__init__() + self.heads = heads + self.q_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() + self.k_norm = norm_layer(width // heads, elementwise_affine=True, eps=1e-6) if qk_norm else nn.Identity() + + def forward(self, qkv): + bs, n_ctx, width = qkv.shape + attn_ch = width // self.heads // 3 + qkv = qkv.view(bs, n_ctx, self.heads, -1) + q, k, v = torch.split(qkv, attn_ch, dim=-1) + + q = self.q_norm(q) + k = self.k_norm(k) + + q, k, v = map(lambda t: rearrange(t, 'b n h d -> b h n d', h=self.heads), (q, k, v)) + out = F.scaled_dot_product_attention(q, k, v).transpose(1, 2).reshape(bs, n_ctx, -1) + return out + + +class MultiheadAttention(nn.Module): + def __init__( + self, + *, + width: int, + heads: int, + qkv_bias: bool, + norm_layer=ops.LayerNorm, + qk_norm: bool = False, + drop_path_rate: float = 0.0 + ): + super().__init__() + self.width = width + self.heads = heads + self.c_qkv = ops.Linear(width, width * 3, bias=qkv_bias) + self.c_proj = ops.Linear(width, width) + self.attention = QKVMultiheadAttention( + heads=heads, + width=width, + norm_layer=norm_layer, + qk_norm=qk_norm + ) + self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x): + x = self.c_qkv(x) + x = self.attention(x) + x = self.drop_path(self.c_proj(x)) + return x + + +class ResidualAttentionBlock(nn.Module): + def __init__( + self, + *, + width: int, + heads: int, + qkv_bias: bool = True, + norm_layer=ops.LayerNorm, + qk_norm: bool = False, + drop_path_rate: float = 0.0, + ): + super().__init__() + self.attn = MultiheadAttention( + width=width, + heads=heads, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + qk_norm=qk_norm, + drop_path_rate=drop_path_rate + ) + self.ln_1 = norm_layer(width, elementwise_affine=True, eps=1e-6) + self.mlp = MLP(width=width, drop_path_rate=drop_path_rate) + self.ln_2 = norm_layer(width, elementwise_affine=True, eps=1e-6) + + def forward(self, x: torch.Tensor): + x = x + self.attn(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + def __init__( + self, + *, + width: int, + layers: int, + heads: int, + qkv_bias: bool = True, + norm_layer=ops.LayerNorm, + qk_norm: bool = False, + drop_path_rate: float = 0.0 + ): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.ModuleList( + [ + ResidualAttentionBlock( + width=width, + heads=heads, + qkv_bias=qkv_bias, + norm_layer=norm_layer, + qk_norm=qk_norm, + drop_path_rate=drop_path_rate + ) + for _ in range(layers) + ] + ) + + def forward(self, x: torch.Tensor): + for block in self.resblocks: + x = block(x) + return x + + +class CrossAttentionDecoder(nn.Module): + + def __init__( + self, + *, + out_channels: int, + fourier_embedder: FourierEmbedder, + width: int, + heads: int, + mlp_expand_ratio: int = 4, + downsample_ratio: int = 1, + enable_ln_post: bool = True, + qkv_bias: bool = True, + qk_norm: bool = False, + label_type: str = "binary" + ): + super().__init__() + + self.enable_ln_post = enable_ln_post + self.fourier_embedder = fourier_embedder + self.downsample_ratio = downsample_ratio + self.query_proj = ops.Linear(self.fourier_embedder.out_dim, width) + if self.downsample_ratio != 1: + self.latents_proj = ops.Linear(width * downsample_ratio, width) + if self.enable_ln_post == False: + qk_norm = False + self.cross_attn_decoder = ResidualCrossAttentionBlock( + width=width, + mlp_expand_ratio=mlp_expand_ratio, + heads=heads, + qkv_bias=qkv_bias, + qk_norm=qk_norm + ) + + if self.enable_ln_post: + self.ln_post = ops.LayerNorm(width) + self.output_proj = ops.Linear(width, out_channels) + self.label_type = label_type + self.count = 0 + + def forward(self, queries=None, query_embeddings=None, latents=None): + if query_embeddings is None: + query_embeddings = self.query_proj(self.fourier_embedder(queries).to(latents.dtype)) + self.count += query_embeddings.shape[1] + if self.downsample_ratio != 1: + latents = self.latents_proj(latents) + x = self.cross_attn_decoder(query_embeddings, latents) + if self.enable_ln_post: + x = self.ln_post(x) + occ = self.output_proj(x) + return occ + + +class ShapeVAE(nn.Module): + def __init__( + self, + *, + embed_dim: int, + width: int, + heads: int, + num_decoder_layers: int, + geo_decoder_downsample_ratio: int = 1, + geo_decoder_mlp_expand_ratio: int = 4, + geo_decoder_ln_post: bool = True, + num_freqs: int = 8, + include_pi: bool = True, + qkv_bias: bool = True, + qk_norm: bool = False, + label_type: str = "binary", + drop_path_rate: float = 0.0, + scale_factor: float = 1.0, + ): + super().__init__() + self.geo_decoder_ln_post = geo_decoder_ln_post + + self.fourier_embedder = FourierEmbedder(num_freqs=num_freqs, include_pi=include_pi) + + self.post_kl = ops.Linear(embed_dim, width) + + self.transformer = Transformer( + width=width, + layers=num_decoder_layers, + heads=heads, + qkv_bias=qkv_bias, + qk_norm=qk_norm, + drop_path_rate=drop_path_rate + ) + + self.geo_decoder = CrossAttentionDecoder( + fourier_embedder=self.fourier_embedder, + out_channels=1, + mlp_expand_ratio=geo_decoder_mlp_expand_ratio, + downsample_ratio=geo_decoder_downsample_ratio, + enable_ln_post=self.geo_decoder_ln_post, + width=width // geo_decoder_downsample_ratio, + heads=heads // geo_decoder_downsample_ratio, + qkv_bias=qkv_bias, + qk_norm=qk_norm, + label_type=label_type, + ) + + self.volume_decoder = VanillaVolumeDecoder() + self.scale_factor = scale_factor + + def decode(self, latents, **kwargs): + latents = self.post_kl(latents.movedim(-2, -1)) + latents = self.transformer(latents) + + bounds = kwargs.get("bounds", 1.01) + num_chunks = kwargs.get("num_chunks", 8000) + octree_resolution = kwargs.get("octree_resolution", 256) + enable_pbar = kwargs.get("enable_pbar", True) + + grid_logits = self.volume_decoder(latents, self.geo_decoder, bounds=bounds, num_chunks=num_chunks, octree_resolution=octree_resolution, enable_pbar=enable_pbar) + return grid_logits + + def encode(self, x): + return None diff --git a/comfy/model_base.py b/comfy/model_base.py index 976702b60..f02406ace 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -36,6 +36,7 @@ import comfy.ldm.hunyuan_video.model import comfy.ldm.cosmos.model import comfy.ldm.lumina.model import comfy.ldm.wan.model +import comfy.ldm.hunyuan3d.model import comfy.model_management import comfy.patcher_extension @@ -1013,3 +1014,18 @@ class WAN21(BaseModel): if clip_vision_output is not None: out['clip_fea'] = comfy.conds.CONDRegular(clip_vision_output.penultimate_hidden_states) return out + +class Hunyuan3Dv2(BaseModel): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) + + guidance = kwargs.get("guidance", 5.0) + if guidance is not None: + out['guidance'] = comfy.conds.CONDRegular(torch.FloatTensor([guidance])) + return out diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 403da5855..f9e96ab7e 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -154,7 +154,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["guidance_embed"] = len(guidance_keys) > 0 return dit_config - if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys: #Flux + if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and '{}img_in.weight'.format(key_prefix) in state_dict_keys: #Flux dit_config = {} dit_config["image_model"] = "flux" dit_config["in_channels"] = 16 @@ -323,6 +323,21 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["model_type"] = "t2v" return dit_config + if '{}latent_in.weight'.format(key_prefix) in state_dict_keys: # Hunyuan 3D + in_shape = state_dict['{}latent_in.weight'.format(key_prefix)].shape + dit_config = {} + dit_config["image_model"] = "hunyuan3d2" + dit_config["in_channels"] = in_shape[1] + dit_config["context_in_dim"] = state_dict['{}cond_in.weight'.format(key_prefix)].shape[1] + dit_config["hidden_size"] = in_shape[0] + dit_config["mlp_ratio"] = 4.0 + dit_config["num_heads"] = 16 + dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.') + dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.') + dit_config["qkv_bias"] = True + dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys + return dit_config + if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys: return None diff --git a/comfy/sd.py b/comfy/sd.py index 3d72a04d6..4160fa893 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -14,6 +14,7 @@ import comfy.ldm.genmo.vae.model import comfy.ldm.lightricks.vae.causal_video_autoencoder import comfy.ldm.cosmos.vae import comfy.ldm.wan.vae +import comfy.ldm.hunyuan3d.vae import yaml import math @@ -412,6 +413,16 @@ class VAE: self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32] self.memory_used_encode = lambda shape, dtype: 6000 * shape[3] * shape[4] * model_management.dtype_size(dtype) self.memory_used_decode = lambda shape, dtype: 7000 * shape[3] * shape[4] * (8 * 8) * model_management.dtype_size(dtype) + elif "geo_decoder.cross_attn_decoder.ln_1.bias" in sd: + self.latent_dim = 1 + ln_post = "geo_decoder.ln_post.weight" in sd + inner_size = sd["geo_decoder.output_proj.weight"].shape[1] + downsample_ratio = sd["post_kl.weight"].shape[0] // inner_size + mlp_expand = sd["geo_decoder.cross_attn_decoder.mlp.c_fc.weight"].shape[0] // inner_size + self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype) + self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * 2048) * model_management.dtype_size(dtype) + ddconfig = {"embed_dim": 64, "num_freqs": 8, "include_pi": False, "heads": 16, "width": 1024, "num_decoder_layers": 16, "qkv_bias": False, "qk_norm": True, "geo_decoder_mlp_expand_ratio": mlp_expand, "geo_decoder_downsample_ratio": downsample_ratio, "geo_decoder_ln_post": ln_post} + self.first_stage_model = comfy.ldm.hunyuan3d.vae.ShapeVAE(**ddconfig) else: logging.warning("WARNING: No VAE weights detected, VAE not initalized.") self.first_stage_model = None @@ -498,7 +509,7 @@ class VAE: encode_fn = lambda a: self.first_stage_model.encode((self.process_input(a)).to(self.vae_dtype).to(self.device)).float() return comfy.utils.tiled_scale_multidim(samples, encode_fn, tile=(tile_t, tile_x, tile_y), overlap=overlap, upscale_amount=self.downscale_ratio, out_channels=self.latent_channels, downscale=True, index_formulas=self.downscale_index_formula, output_device=self.output_device) - def decode(self, samples_in): + def decode(self, samples_in, vae_options={}): self.throw_exception_if_invalid() pixel_samples = None try: @@ -510,7 +521,7 @@ class VAE: for x in range(0, samples_in.shape[0], batch_number): samples = samples_in[x:x+batch_number].to(self.vae_dtype).to(self.device) - out = self.process_output(self.first_stage_model.decode(samples).to(self.output_device).float()) + out = self.process_output(self.first_stage_model.decode(samples, **vae_options).to(self.output_device).float()) if pixel_samples is None: pixel_samples = torch.empty((samples_in.shape[0],) + tuple(out.shape[1:]), device=self.output_device) pixel_samples[x:x+batch_number] = out diff --git a/comfy/supported_models.py b/comfy/supported_models.py index b4d7bfe20..b5c3194cf 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -959,6 +959,42 @@ class WAN21_I2V(WAN21_T2V): out = model_base.WAN21(self, image_to_video=True, device=device) return out -models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V] +class Hunyuan3Dv2(supported_models_base.BASE): + unet_config = { + "image_model": "hunyuan3d2", + } + + unet_extra_config = {} + + sampling_settings = { + "multiplier": 1.0, + "shift": 1.0, + } + + clip_vision_prefix = "conditioner.main_image_encoder.model." + vae_key_prefix = ["vae."] + + latent_format = latent_formats.Hunyuan3Dv2 + + def process_unet_state_dict_for_saving(self, state_dict): + replace_prefix = {"": "model."} + return utils.state_dict_prefix_replace(state_dict, replace_prefix) + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.Hunyuan3Dv2(self, device=device) + return out + + def clip_target(self, state_dict={}): + return None + +class Hunyuan3Dv2mini(Hunyuan3Dv2): + unet_config = { + "image_model": "hunyuan3d2", + "depth": 8, + } + + latent_format = latent_formats.Hunyuan3Dv2mini + +models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, Hunyuan3Dv2mini, Hunyuan3Dv2] models += [SVD_img2vid] diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py new file mode 100644 index 000000000..6abcde1f6 --- /dev/null +++ b/comfy_extras/nodes_hunyuan3d.py @@ -0,0 +1,410 @@ +import torch +import os +import json +import struct +import numpy as np +from comfy.ldm.modules.diffusionmodules.mmdit import get_1d_sincos_pos_embed_from_grid_torch +import folder_paths +import comfy.model_management +from comfy.cli_args import args + + +class EmptyLatentHunyuan3Dv2: + @classmethod + def INPUT_TYPES(s): + return {"required": {"resolution": ("INT", {"default": 3072, "min": 1, "max": 8192}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096, "tooltip": "The number of latent images in the batch."}), + }} + RETURN_TYPES = ("LATENT",) + FUNCTION = "generate" + + CATEGORY = "latent/3d" + + def generate(self, resolution, batch_size): + latent = torch.zeros([batch_size, 64, resolution], device=comfy.model_management.intermediate_device()) + return ({"samples": latent, "type": "hunyuan3dv2"}, ) + + +class Hunyuan3Dv2Conditioning: + @classmethod + def INPUT_TYPES(s): + return {"required": {"clip_vision_output": ("CLIP_VISION_OUTPUT",), + }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING") + RETURN_NAMES = ("positive", "negative") + + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, clip_vision_output): + embeds = clip_vision_output.last_hidden_state + positive = [[embeds, {}]] + negative = [[torch.zeros_like(embeds), {}]] + return (positive, negative) + + +class Hunyuan3Dv2ConditioningMultiView: + @classmethod + def INPUT_TYPES(s): + return {"required": {}, + "optional": {"front": ("CLIP_VISION_OUTPUT",), + "left": ("CLIP_VISION_OUTPUT",), + "back": ("CLIP_VISION_OUTPUT",), + "right": ("CLIP_VISION_OUTPUT",), }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING") + RETURN_NAMES = ("positive", "negative") + + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, front=None, left=None, back=None, right=None): + all_embeds = [front, left, back, right] + out = [] + pos_embeds = None + for i, e in enumerate(all_embeds): + if e is not None: + if pos_embeds is None: + pos_embeds = get_1d_sincos_pos_embed_from_grid_torch(e.last_hidden_state.shape[-1], torch.arange(4)) + out.append(e.last_hidden_state + pos_embeds[i].reshape(1, 1, -1)) + + embeds = torch.cat(out, dim=1) + positive = [[embeds, {}]] + negative = [[torch.zeros_like(embeds), {}]] + return (positive, negative) + + +class VOXEL: + def __init__(self, data): + self.data = data + + +class VAEDecodeHunyuan3D: + @classmethod + def INPUT_TYPES(s): + return {"required": {"samples": ("LATENT", ), + "vae": ("VAE", ), + "num_chunks": ("INT", {"default": 8000, "min": 1000, "max": 500000}), + "octree_resolution": ("INT", {"default": 256, "min": 16, "max": 512}), + }} + RETURN_TYPES = ("VOXEL",) + FUNCTION = "decode" + + CATEGORY = "latent/3d" + + def decode(self, vae, samples, num_chunks, octree_resolution): + voxels = VOXEL(vae.decode(samples["samples"], vae_options={"num_chunks": num_chunks, "octree_resolution": octree_resolution})) + return (voxels, ) + + +def voxel_to_mesh(voxels, threshold=0.5, device=None): + if device is None: + device = torch.device("cpu") + voxels = voxels.to(device) + + binary = (voxels > threshold).float() + padded = torch.nn.functional.pad(binary, (1, 1, 1, 1, 1, 1), 'constant', 0) + + D, H, W = binary.shape + + neighbors = torch.tensor([ + [0, 0, 1], + [0, 0, -1], + [0, 1, 0], + [0, -1, 0], + [1, 0, 0], + [-1, 0, 0] + ], device=device) + + z, y, x = torch.meshgrid( + torch.arange(D, device=device), + torch.arange(H, device=device), + torch.arange(W, device=device), + indexing='ij' + ) + voxel_indices = torch.stack([z.flatten(), y.flatten(), x.flatten()], dim=1) + + solid_mask = binary.flatten() > 0 + solid_indices = voxel_indices[solid_mask] + + corner_offsets = [ + torch.tensor([ + [0, 0, 1], [0, 1, 1], [1, 1, 1], [1, 0, 1] + ], device=device), + torch.tensor([ + [0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0] + ], device=device), + torch.tensor([ + [0, 1, 0], [1, 1, 0], [1, 1, 1], [0, 1, 1] + ], device=device), + torch.tensor([ + [0, 0, 0], [0, 0, 1], [1, 0, 1], [1, 0, 0] + ], device=device), + torch.tensor([ + [1, 0, 1], [1, 1, 1], [1, 1, 0], [1, 0, 0] + ], device=device), + torch.tensor([ + [0, 1, 0], [0, 1, 1], [0, 0, 1], [0, 0, 0] + ], device=device) + ] + + all_vertices = [] + all_indices = [] + + vertex_count = 0 + + for face_idx, offset in enumerate(neighbors): + neighbor_indices = solid_indices + offset + + padded_indices = neighbor_indices + 1 + + is_exposed = padded[ + padded_indices[:, 0], + padded_indices[:, 1], + padded_indices[:, 2] + ] == 0 + + if not is_exposed.any(): + continue + + exposed_indices = solid_indices[is_exposed] + + corners = corner_offsets[face_idx].unsqueeze(0) + + face_vertices = exposed_indices.unsqueeze(1) + corners + + all_vertices.append(face_vertices.reshape(-1, 3)) + + num_faces = exposed_indices.shape[0] + face_indices = torch.arange( + vertex_count, + vertex_count + 4 * num_faces, + device=device + ).reshape(-1, 4) + + all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 2], face_indices[:, 1]], dim=1)) + all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 3], face_indices[:, 2]], dim=1)) + + vertex_count += 4 * num_faces + + vertices = torch.cat(all_vertices, dim=0) + faces = torch.cat(all_indices, dim=0) + + v_min = 0 + v_max = max(voxels.shape) + + vertices = vertices - (v_min + v_max) / 2 + + scale = (v_max - v_min) / 2 + if scale > 0: + vertices = vertices / scale + + return vertices, faces + + +class MESH: + def __init__(self, vertices, faces): + self.vertices = vertices + self.faces = faces + + +class VoxelToMeshBasic: + @classmethod + def INPUT_TYPES(s): + return {"required": {"voxel": ("VOXEL", ), + "threshold": ("FLOAT", {"default": 0.6, "min": -1.0, "max": 1.0, "step": 0.01}), + }} + RETURN_TYPES = ("MESH",) + FUNCTION = "decode" + + CATEGORY = "3d" + + def decode(self, voxel, threshold): + vertices = [] + faces = [] + for x in voxel.data: + v, f = voxel_to_mesh(x, threshold=threshold, device=None) + vertices.append(v) + faces.append(f) + + return (MESH(torch.stack(vertices), torch.stack(faces)), ) + + +def save_glb(vertices, faces, filepath, metadata=None): + """ + Save PyTorch tensor vertices and faces as a GLB file without external dependencies. + + Parameters: + vertices: torch.Tensor of shape (N, 3) - The vertex coordinates + faces: torch.Tensor of shape (M, 4) or (M, 3) - The face indices (quad or triangle faces) + filepath: str - Output filepath (should end with .glb) + """ + + # Convert tensors to numpy arrays + vertices_np = vertices.cpu().numpy().astype(np.float32) + faces_np = faces.cpu().numpy().astype(np.uint32) + + vertices_buffer = vertices_np.tobytes() + indices_buffer = faces_np.tobytes() + + def pad_to_4_bytes(buffer): + padding_length = (4 - (len(buffer) % 4)) % 4 + return buffer + b'\x00' * padding_length + + vertices_buffer_padded = pad_to_4_bytes(vertices_buffer) + indices_buffer_padded = pad_to_4_bytes(indices_buffer) + + buffer_data = vertices_buffer_padded + indices_buffer_padded + + vertices_byte_length = len(vertices_buffer) + vertices_byte_offset = 0 + indices_byte_length = len(indices_buffer) + indices_byte_offset = len(vertices_buffer_padded) + + gltf = { + "asset": {"version": "2.0", "generator": "ComfyUI"}, + "buffers": [ + { + "byteLength": len(buffer_data) + } + ], + "bufferViews": [ + { + "buffer": 0, + "byteOffset": vertices_byte_offset, + "byteLength": vertices_byte_length, + "target": 34962 # ARRAY_BUFFER + }, + { + "buffer": 0, + "byteOffset": indices_byte_offset, + "byteLength": indices_byte_length, + "target": 34963 # ELEMENT_ARRAY_BUFFER + } + ], + "accessors": [ + { + "bufferView": 0, + "byteOffset": 0, + "componentType": 5126, # FLOAT + "count": len(vertices_np), + "type": "VEC3", + "max": vertices_np.max(axis=0).tolist(), + "min": vertices_np.min(axis=0).tolist() + }, + { + "bufferView": 1, + "byteOffset": 0, + "componentType": 5125, # UNSIGNED_INT + "count": faces_np.size, + "type": "SCALAR" + } + ], + "meshes": [ + { + "primitives": [ + { + "attributes": { + "POSITION": 0 + }, + "indices": 1, + "mode": 4 # TRIANGLES + } + ] + } + ], + "nodes": [ + { + "mesh": 0 + } + ], + "scenes": [ + { + "nodes": [0] + } + ], + "scene": 0 + } + + if metadata is not None: + gltf["asset"]["extras"] = metadata + + # Convert the JSON to bytes + gltf_json = json.dumps(gltf).encode('utf8') + + def pad_json_to_4_bytes(buffer): + padding_length = (4 - (len(buffer) % 4)) % 4 + return buffer + b' ' * padding_length + + gltf_json_padded = pad_json_to_4_bytes(gltf_json) + + # Create the GLB header + # Magic glTF + glb_header = struct.pack('<4sII', b'glTF', 2, 12 + 8 + len(gltf_json_padded) + 8 + len(buffer_data)) + + # Create JSON chunk header (chunk type 0) + json_chunk_header = struct.pack(' Date: Wed, 19 Mar 2025 19:55:24 -0400 Subject: [PATCH 58/77] Fix orientation of hunyuan 3d model. --- comfy/ldm/hunyuan3d/vae.py | 2 +- comfy_extras/nodes_hunyuan3d.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/comfy/ldm/hunyuan3d/vae.py b/comfy/ldm/hunyuan3d/vae.py index 311c9b416..5eb2c6548 100644 --- a/comfy/ldm/hunyuan3d/vae.py +++ b/comfy/ldm/hunyuan3d/vae.py @@ -581,7 +581,7 @@ class ShapeVAE(nn.Module): enable_pbar = kwargs.get("enable_pbar", True) grid_logits = self.volume_decoder(latents, self.geo_decoder, bounds=bounds, num_chunks=num_chunks, octree_resolution=octree_resolution, enable_pbar=enable_pbar) - return grid_logits + return grid_logits.movedim(-2, -1) def encode(self, x): return None diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index 6abcde1f6..ac2cff3a9 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -185,8 +185,8 @@ def voxel_to_mesh(voxels, threshold=0.5, device=None): device=device ).reshape(-1, 4) - all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 2], face_indices[:, 1]], dim=1)) - all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 3], face_indices[:, 2]], dim=1)) + all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 1], face_indices[:, 2]], dim=1)) + all_indices.append(torch.stack([face_indices[:, 0], face_indices[:, 2], face_indices[:, 3]], dim=1)) vertex_count += 4 * num_faces @@ -202,6 +202,7 @@ def voxel_to_mesh(voxels, threshold=0.5, device=None): if scale > 0: vertices = vertices / scale + vertices = torch.fliplr(vertices) return vertices, faces From 3872b43d4ba44ca93eae305298a6474efafa3eb7 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 20 Mar 2025 04:52:31 -0400 Subject: [PATCH 59/77] A few fixes for the hunyuan3d models. --- comfy/sd.py | 5 +++-- comfy/supported_models.py | 2 ++ comfy_extras/nodes_hunyuan3d.py | 8 ++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/comfy/sd.py b/comfy/sd.py index 4160fa893..d096f496c 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -419,10 +419,11 @@ class VAE: inner_size = sd["geo_decoder.output_proj.weight"].shape[1] downsample_ratio = sd["post_kl.weight"].shape[0] // inner_size mlp_expand = sd["geo_decoder.cross_attn_decoder.mlp.c_fc.weight"].shape[0] // inner_size - self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype) - self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * 2048) * model_management.dtype_size(dtype) + self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype) # TODO + self.memory_used_decode = lambda shape, dtype: (1024 * 1024 * 1024 * 2.0) * model_management.dtype_size(dtype) # TODO ddconfig = {"embed_dim": 64, "num_freqs": 8, "include_pi": False, "heads": 16, "width": 1024, "num_decoder_layers": 16, "qkv_bias": False, "qk_norm": True, "geo_decoder_mlp_expand_ratio": mlp_expand, "geo_decoder_downsample_ratio": downsample_ratio, "geo_decoder_ln_post": ln_post} self.first_stage_model = comfy.ldm.hunyuan3d.vae.ShapeVAE(**ddconfig) + self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32] else: logging.warning("WARNING: No VAE weights detected, VAE not initalized.") self.first_stage_model = None diff --git a/comfy/supported_models.py b/comfy/supported_models.py index b5c3194cf..be3aede60 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -971,6 +971,8 @@ class Hunyuan3Dv2(supported_models_base.BASE): "shift": 1.0, } + memory_usage_factor = 3.5 + clip_vision_prefix = "conditioner.main_image_encoder.model." vae_key_prefix = ["vae."] diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index ac2cff3a9..1ca7c2fe6 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -190,8 +190,12 @@ def voxel_to_mesh(voxels, threshold=0.5, device=None): vertex_count += 4 * num_faces - vertices = torch.cat(all_vertices, dim=0) - faces = torch.cat(all_indices, dim=0) + if len(all_vertices) > 0: + vertices = torch.cat(all_vertices, dim=0) + faces = torch.cat(all_indices, dim=0) + else: + vertices = torch.zeros((1, 3)) + faces = torch.zeros((1, 3)) v_min = 0 v_max = max(voxels.shape) From 8b9ce4ed18c24db2b7195b8d33932e516fcb3d85 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 21 Mar 2025 00:17:36 -0400 Subject: [PATCH 60/77] Update frontend to 1.13 (#7331) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 70689bc99..ceec006d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.12.14 +comfyui-frontend-package==1.13.9 torch torchsde torchvision From a4a956dbbdcd9b3072d748f826394dd3223a094b Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 21 Mar 2025 01:47:18 -0400 Subject: [PATCH 61/77] Add backend primitive nodes (#7328) * Add backend primitive nodes * Add control after generate to int primitive --- comfy_extras/nodes_primitive.py | 79 +++++++++++++++++++++++++++++++++ nodes.py | 1 + 2 files changed, 80 insertions(+) create mode 100644 comfy_extras/nodes_primitive.py diff --git a/comfy_extras/nodes_primitive.py b/comfy_extras/nodes_primitive.py new file mode 100644 index 000000000..b770104fb --- /dev/null +++ b/comfy_extras/nodes_primitive.py @@ -0,0 +1,79 @@ +# Primitive nodes that are evaluated at backend. +from __future__ import annotations + +from comfy.comfy_types.node_typing import ComfyNodeABC, InputTypeDict, IO + + +class String(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": {"value": (IO.STRING, {})}, + } + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "execute" + CATEGORY = "utils/primitive" + + def execute(self, value: str) -> tuple[str]: + return (value,) + + +class Int(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": {"value": (IO.INT, {"control_after_generate": True})}, + } + + RETURN_TYPES = (IO.INT,) + FUNCTION = "execute" + CATEGORY = "utils/primitive" + + def execute(self, value: int) -> tuple[int]: + return (value,) + + +class Float(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": {"value": (IO.FLOAT, {})}, + } + + RETURN_TYPES = (IO.FLOAT,) + FUNCTION = "execute" + CATEGORY = "utils/primitive" + + def execute(self, value: float) -> tuple[float]: + return (value,) + + +class Boolean(ComfyNodeABC): + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": {"value": (IO.BOOLEAN, {})}, + } + + RETURN_TYPES = (IO.BOOLEAN,) + FUNCTION = "execute" + CATEGORY = "utils/primitive" + + def execute(self, value: bool) -> tuple[bool]: + return (value,) + + +NODE_CLASS_MAPPINGS = { + "PrimitiveString": String, + "PrimitiveInt": Int, + "PrimitiveFloat": Float, + "PrimitiveBoolean": Boolean, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PrimitiveString": "String", + "PrimitiveInt": "Int", + "PrimitiveFloat": "Float", + "PrimitiveBoolean": "Boolean", +} diff --git a/nodes.py b/nodes.py index f89c328e9..a9c931dfa 100644 --- a/nodes.py +++ b/nodes.py @@ -2265,6 +2265,7 @@ def init_builtin_extra_nodes(): "nodes_lumina2.py", "nodes_wan.py", "nodes_hunyuan3d.py", + "nodes_primitive.py", ] import_failed = [] From 095610717000bffd477a7e72988d1fb2299afacb Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 21 Mar 2025 06:32:20 -0400 Subject: [PATCH 62/77] Nodes to convert images to YUV and back. Can be used to convert an image to black and white. --- comfy_extras/nodes_morphology.py | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/comfy_extras/nodes_morphology.py b/comfy_extras/nodes_morphology.py index b1372b8ce..075b26c40 100644 --- a/comfy_extras/nodes_morphology.py +++ b/comfy_extras/nodes_morphology.py @@ -2,6 +2,7 @@ import torch import comfy.model_management from kornia.morphology import dilation, erosion, opening, closing, gradient, top_hat, bottom_hat +import kornia.color class Morphology: @@ -40,8 +41,45 @@ class Morphology: img_out = output.to(comfy.model_management.intermediate_device()).movedim(1, -1) return (img_out,) + +class ImageRGBToYUV: + @classmethod + def INPUT_TYPES(s): + return {"required": { "image": ("IMAGE",), + }} + + RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE") + RETURN_NAMES = ("Y", "U", "V") + FUNCTION = "execute" + + CATEGORY = "image/batch" + + def execute(self, image): + out = kornia.color.rgb_to_ycbcr(image.movedim(-1, 1)).movedim(1, -1) + return (out[..., 0:1].expand_as(image), out[..., 1:2].expand_as(image), out[..., 2:3].expand_as(image)) + +class ImageYUVToRGB: + @classmethod + def INPUT_TYPES(s): + return {"required": {"Y": ("IMAGE",), + "U": ("IMAGE",), + "V": ("IMAGE",), + }} + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "execute" + + CATEGORY = "image/batch" + + def execute(self, Y, U, V): + image = torch.cat([torch.mean(Y, dim=-1, keepdim=True), torch.mean(U, dim=-1, keepdim=True), torch.mean(V, dim=-1, keepdim=True)], dim=-1) + out = kornia.color.ycbcr_to_rgb(image.movedim(-1, 1)).movedim(1, -1) + return (out,) + NODE_CLASS_MAPPINGS = { "Morphology": Morphology, + "ImageRGBToYUV": ImageRGBToYUV, + "ImageYUVToRGB": ImageYUVToRGB, } NODE_DISPLAY_NAME_MAPPINGS = { From 0cf227469929f74ae5ae887f3f7fa7e490e5e9d0 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 21 Mar 2025 13:50:09 -0400 Subject: [PATCH 63/77] Update frontend to 1.14 (#7343) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ceec006d2..c78d3c228 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.13.9 +comfyui-frontend-package==1.14.5 torch torchsde torchvision From 83e839a89be1dc6db0923bea45ff9eae43a8ea01 Mon Sep 17 00:00:00 2001 From: thot experiment <94414189+thot-experiment@users.noreply.github.com> Date: Fri, 21 Mar 2025 11:04:15 -0700 Subject: [PATCH 64/77] Native LotusD Implementation (#7125) * draft pass at a native comfy implementation of Lotus-D depth and normal est * fix model_sampling kludges * fix ruff --------- Co-authored-by: comfyanonymous <121283862+comfyanonymous@users.noreply.github.com> --- comfy/model_base.py | 14 ++++++++++++++ comfy/model_detection.py | 7 ++++++- comfy/supported_models.py | 18 ++++++++++++++++- comfy_extras/nodes_lotus.py | 29 ++++++++++++++++++++++++++++ comfy_extras/nodes_model_advanced.py | 8 +++++++- nodes.py | 1 + 6 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 comfy_extras/nodes_lotus.py diff --git a/comfy/model_base.py b/comfy/model_base.py index f02406ace..2fb4b1453 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -140,6 +140,7 @@ class BaseModel(torch.nn.Module): def _apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs): sigma = t xc = self.model_sampling.calculate_input(sigma, x) + if c_concat is not None: xc = torch.cat([xc] + [c_concat], dim=1) @@ -601,6 +602,19 @@ class SDXL_instructpix2pix(IP2P, SDXL): else: self.process_ip2p_image_in = lambda image: image #diffusers ip2p +class Lotus(BaseModel): + def extra_conds(self, **kwargs): + out = {} + cross_attn = kwargs.get("cross_attn", None) + out['c_crossattn'] = comfy.conds.CONDCrossAttn(cross_attn) + device = kwargs["device"] + task_emb = torch.tensor([1, 0]).float().to(device) + task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)]).unsqueeze(0) + out['y'] = comfy.conds.CONDRegular(task_emb) + return out + + def __init__(self, model_config, model_type=ModelType.EPS, device=None): + super().__init__(model_config, model_type, device=device) class StableCascade_C(BaseModel): def __init__(self, model_config, model_type=ModelType.STABLE_CASCADE, device=None): diff --git a/comfy/model_detection.py b/comfy/model_detection.py index f9e96ab7e..4217f5831 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -682,8 +682,13 @@ def unet_config_from_diffusers_unet(state_dict, dtype=None): 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], 'use_temporal_attention': False, 'use_temporal_resblock': False} + LotusD = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, 'adm_in_channels': 4, + 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], + 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_heads': 8, + 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + 'use_temporal_attention': False, 'use_temporal_resblock': False} - supported_models = [SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mid_cnet, SDXL_small_cnet, SDXL_diffusers_inpaint, SSD_1B, Segmind_Vega, KOALA_700M, KOALA_1B, SD09_XS, SD_XS, SDXL_diffusers_ip2p, SD15_diffusers_inpaint] + supported_models = [LotusD, SDXL, SDXL_refiner, SD21, SD15, SD21_uncliph, SD21_unclipl, SDXL_mid_cnet, SDXL_small_cnet, SDXL_diffusers_inpaint, SSD_1B, Segmind_Vega, KOALA_700M, KOALA_1B, SD09_XS, SD_XS, SDXL_diffusers_ip2p, SD15_diffusers_inpaint] for unet_config in supported_models: matches = True diff --git a/comfy/supported_models.py b/comfy/supported_models.py index be3aede60..fad00d35b 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -506,6 +506,22 @@ class SDXL_instructpix2pix(SDXL): def get_model(self, state_dict, prefix="", device=None): return model_base.SDXL_instructpix2pix(self, model_type=self.model_type(state_dict, prefix), device=device) +class LotusD(SD20): + unet_config = { + "model_channels": 320, + "use_linear_in_transformer": True, + "use_temporal_attention": False, + "adm_in_channels": 4, + "in_channels": 4, + } + + unet_extra_config = { + "num_classes": 'sequential' + } + + def get_model(self, state_dict, prefix="", device=None): + return model_base.Lotus(self, device=device) + class SD3(supported_models_base.BASE): unet_config = { "in_channels": 16, @@ -997,6 +1013,6 @@ class Hunyuan3Dv2mini(Hunyuan3Dv2): latent_format = latent_formats.Hunyuan3Dv2mini -models = [Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, Hunyuan3Dv2mini, Hunyuan3Dv2] +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, Hunyuan3Dv2mini, Hunyuan3Dv2] models += [SVD_img2vid] diff --git a/comfy_extras/nodes_lotus.py b/comfy_extras/nodes_lotus.py new file mode 100644 index 000000000..739dbdd3d --- /dev/null +++ b/comfy_extras/nodes_lotus.py @@ -0,0 +1,29 @@ +import torch +import comfy.model_management as mm + +class LotusConditioning: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + }, + } + + RETURN_TYPES = ("CONDITIONING",) + RETURN_NAMES = ("conditioning",) + FUNCTION = "conditioning" + CATEGORY = "conditioning/lotus" + + def conditioning(self): + device = mm.get_torch_device() + #lotus uses a frozen encoder and null conditioning, i'm just inlining the results of that operation since it doesn't change + #and getting parity with the reference implementation would otherwise require inference and 800mb of tensors + prompt_embeds = torch.tensor([[[-0.3134765625, -0.447509765625, -0.00823974609375, -0.22802734375, 0.1785888671875, -0.2342529296875, -0.2188720703125, -0.0089111328125, -0.31396484375, 0.196533203125, -0.055877685546875, -0.3828125, -0.0965576171875, 0.0073394775390625, -0.284423828125, 0.07470703125, -0.086181640625, -0.211181640625, 0.0599365234375, 0.10693359375, 0.0007929801940917969, -0.78076171875, -0.382568359375, -0.1851806640625, -0.140625, -0.0936279296875, -0.1229248046875, -0.152099609375, -0.203857421875, -0.2349853515625, -0.2437744140625, -0.10858154296875, -0.08990478515625, 0.08892822265625, -0.2391357421875, -0.1611328125, -0.427978515625, -0.1336669921875, -0.27685546875, -0.1781005859375, -0.3857421875, 0.251953125, -0.055999755859375, -0.0712890625, -0.00130462646484375, 0.033477783203125, -0.26416015625, 0.07171630859375, -0.0090789794921875, -0.2025146484375, -0.2763671875, -0.09869384765625, -0.45751953125, -0.23095703125, 0.004528045654296875, -0.369140625, -0.366943359375, -0.205322265625, -0.1505126953125, -0.45166015625, -0.2059326171875, 0.0168609619140625, -0.305419921875, -0.150634765625, 0.02685546875, -0.609375, -0.019012451171875, 0.050445556640625, -0.0084381103515625, -0.31005859375, -0.184326171875, -0.15185546875, 0.06732177734375, 0.150390625, -0.10919189453125, -0.08837890625, -0.50537109375, -0.389892578125, -0.0294342041015625, -0.10491943359375, -0.187255859375, -0.43212890625, -0.328125, -1.060546875, 0.011871337890625, 0.04730224609375, -0.09521484375, -0.07452392578125, -0.29296875, -0.109130859375, -0.250244140625, -0.3828125, -0.171875, -0.03399658203125, -0.15478515625, -0.1861572265625, -0.2398681640625, 0.1053466796875, -0.22314453125, -0.1932373046875, -0.18798828125, -0.430419921875, -0.05364990234375, -0.474609375, -0.261474609375, -0.1077880859375, -0.439208984375, 0.08966064453125, -0.185302734375, -0.338134765625, -0.297119140625, -0.298583984375, -0.175537109375, -0.373291015625, -0.1397705078125, -0.260498046875, -0.383544921875, -0.09979248046875, -0.319580078125, -0.06884765625, -0.4365234375, -0.183837890625, -0.393310546875, -0.002277374267578125, 0.11236572265625, -0.260498046875, -0.2242431640625, -0.19384765625, -0.51123046875, 0.03216552734375, -0.048004150390625, -0.279052734375, -0.2978515625, -0.255615234375, 0.115478515625, -4.08984375, -0.1668701171875, -0.278076171875, -0.5712890625, -0.1385498046875, -0.244384765625, -0.41455078125, -0.244140625, -0.0677490234375, -0.141357421875, -0.11590576171875, -0.1439208984375, -0.0185394287109375, -2.490234375, -0.1549072265625, -0.2305908203125, -0.3828125, -0.1173095703125, -0.08258056640625, -0.1719970703125, -0.325439453125, -0.292724609375, -0.08154296875, -0.412353515625, -0.3115234375, -0.00832366943359375, 0.00489044189453125, -0.2236328125, -0.151123046875, -0.457275390625, -0.135009765625, -0.163330078125, -0.0819091796875, 0.06689453125, 0.0209197998046875, -0.11907958984375, -0.10369873046875, -0.2998046875, -0.478759765625, -0.07940673828125, -0.01517486572265625, -0.3017578125, -0.343994140625, -0.258544921875, -0.44775390625, -0.392822265625, -0.0255584716796875, -0.2998046875, 0.10833740234375, -0.271728515625, -0.36181640625, -0.255859375, -0.2056884765625, -0.055450439453125, 0.060516357421875, -0.45751953125, -0.2322998046875, -0.1737060546875, -0.40576171875, -0.2286376953125, -0.053070068359375, -0.0283660888671875, -0.1898193359375, -4.291534423828125e-05, -0.6591796875, -0.1717529296875, -0.479736328125, -0.1400146484375, -0.40771484375, 0.154296875, 0.003101348876953125, 0.00661468505859375, -0.2073974609375, -0.493408203125, 2.171875, -0.45361328125, -0.283935546875, -0.302001953125, -0.25146484375, -0.207275390625, -0.1524658203125, -0.72998046875, -0.08203125, 0.053192138671875, -0.2685546875, 0.1834716796875, -0.270263671875, -0.091552734375, -0.08319091796875, -0.1297607421875, -0.453857421875, 0.0687255859375, 0.0268096923828125, -0.16552734375, -0.4208984375, -0.1552734375, -0.057373046875, -0.300537109375, -0.04541015625, -0.486083984375, -0.2205810546875, -0.39013671875, 0.007488250732421875, -0.005329132080078125, -0.09759521484375, -0.1448974609375, -0.21923828125, -0.429443359375, -0.40087890625, -0.19384765625, -0.064453125, -0.0306243896484375, -0.045806884765625, -0.056793212890625, 0.119384765625, -0.2073974609375, -0.356201171875, -0.168212890625, -0.291748046875, -0.289794921875, -0.205322265625, -0.419677734375, -0.478271484375, -0.2037353515625, -0.368408203125, -0.186279296875, -0.427734375, -0.1756591796875, 0.07501220703125, -0.2457275390625, -0.03692626953125, 0.003997802734375, -5.7578125, -0.01052093505859375, -0.2305908203125, -0.2252197265625, -0.197509765625, -0.1566162109375, -0.1668701171875, -0.383056640625, -0.05413818359375, 0.12188720703125, -0.369873046875, -0.0184478759765625, -0.150146484375, -0.51123046875, -0.45947265625, -0.1561279296875, 0.060455322265625, 0.043487548828125, -0.1370849609375, -0.069091796875, -0.285888671875, -0.44482421875, -0.2374267578125, -0.2191162109375, -0.434814453125, -0.0360107421875, 0.1298828125, 0.0217742919921875, -0.51220703125, -0.13525390625, -0.09381103515625, -0.276611328125, -0.171875, -0.17138671875, -0.4443359375, -0.2178955078125, -0.269775390625, -0.38623046875, -0.31591796875, -0.42333984375, -0.280029296875, -0.255615234375, -0.17041015625, 0.06268310546875, -0.1878662109375, -0.00677490234375, -0.23583984375, -0.08795166015625, -0.2232666015625, -0.1719970703125, -0.484130859375, -0.328857421875, 0.04669189453125, -0.0419921875, -0.11114501953125, 0.02313232421875, -0.0033130645751953125, -0.6005859375, 0.09051513671875, -0.1884765625, -0.262939453125, -0.375732421875, -0.525390625, -0.1170654296875, -0.3779296875, -0.242919921875, -0.419921875, 0.0665283203125, -0.343017578125, 0.06658935546875, -0.346435546875, -0.1363525390625, -0.2000732421875, -0.3837890625, 0.028167724609375, 0.043853759765625, -0.0171051025390625, -0.477294921875, -0.107421875, -0.129150390625, -0.319580078125, -0.32177734375, -0.4951171875, -0.010589599609375, -0.1778564453125, -0.40234375, -0.0810546875, 0.03314208984375, -0.13720703125, -0.31591796875, -0.048248291015625, -0.274658203125, -0.0689697265625, -0.027130126953125, -0.0953369140625, 0.146728515625, -0.38671875, -0.025390625, -0.42333984375, -0.41748046875, -0.379638671875, -0.1978759765625, -0.533203125, -0.33544921875, 0.0694580078125, -0.322998046875, -0.1876220703125, 0.0094451904296875, 0.1839599609375, -0.254150390625, -0.30078125, -0.09228515625, -0.0885009765625, 0.12371826171875, 0.1500244140625, -0.12152099609375, -0.29833984375, 0.03924560546875, -0.1470947265625, -0.1610107421875, -0.2049560546875, -0.01708984375, -0.2470703125, -0.1522216796875, -0.25830078125, 0.10870361328125, -0.302490234375, -0.2376708984375, -0.360107421875, -0.443359375, -0.0784912109375, -0.63623046875, -0.0980224609375, -0.332275390625, -0.1749267578125, -0.30859375, -0.1968994140625, -0.250244140625, -0.447021484375, -0.18408203125, -0.006908416748046875, -0.2044677734375, -0.2548828125, -0.369140625, -0.11328125, -0.1103515625, -0.27783203125, -0.325439453125, 0.01381683349609375, 0.036773681640625, -0.1458740234375, -0.34619140625, -0.232177734375, -0.0562744140625, -0.4482421875, -0.21875, -0.0855712890625, -0.276123046875, -0.1544189453125, -0.223388671875, -0.259521484375, 0.0865478515625, -0.0038013458251953125, -0.340087890625, -0.076171875, -0.25341796875, -0.0007548332214355469, -0.060455322265625, -0.352294921875, 0.035736083984375, -0.2181396484375, -0.2318115234375, -0.1707763671875, 0.018646240234375, 0.093505859375, -0.197021484375, 0.033477783203125, -0.035247802734375, 0.0440673828125, -0.2056884765625, -0.040924072265625, -0.05865478515625, 0.056884765625, -0.08807373046875, -0.10845947265625, 0.09564208984375, -0.10888671875, -0.332275390625, -0.1119384765625, -0.115478515625, 13.0234375, 0.0030040740966796875, -0.53662109375, -0.1856689453125, -0.068115234375, -0.143798828125, -0.177978515625, -0.32666015625, -0.353515625, -0.1563720703125, -0.3203125, 0.0085906982421875, -0.1043701171875, -0.365478515625, -0.303466796875, -0.34326171875, -0.410888671875, -0.03790283203125, -0.11419677734375, -0.2939453125, 0.074462890625, -0.21826171875, 0.0242767333984375, -0.226318359375, -0.353515625, -0.177734375, -0.169189453125, -0.2423095703125, -0.12115478515625, -0.07843017578125, -0.341064453125, -0.2117919921875, -0.505859375, -0.544921875, -0.3935546875, -0.10772705078125, -0.2054443359375, -0.136474609375, -0.1796875, -0.396240234375, -0.1971435546875, -0.68408203125, -0.032684326171875, -0.03863525390625, -0.0709228515625, -0.1005859375, -0.156005859375, -0.3837890625, -0.319580078125, 0.11102294921875, -0.394287109375, 0.0799560546875, -0.50341796875, -0.1572265625, 0.004131317138671875, -0.12286376953125, -0.2347412109375, -0.29150390625, -0.10321044921875, -0.286376953125, 0.018798828125, -0.152099609375, -0.321044921875, 0.0191650390625, -0.11376953125, -0.54736328125, 0.15869140625, -0.257568359375, -0.2490234375, -0.3115234375, -0.09765625, -0.350830078125, -0.36376953125, -0.0771484375, -0.2298583984375, -0.30615234375, -0.052154541015625, -0.12091064453125, -0.40283203125, -0.1649169921875, 0.0206451416015625, -0.312744140625, -0.10308837890625, -0.50341796875, -0.1754150390625, -0.2003173828125, -0.173583984375, -0.204833984375, -0.1876220703125, -0.12176513671875, -0.06201171875, -0.03485107421875, -0.20068359375, -0.21484375, -0.246337890625, -0.006587982177734375, -0.09674072265625, -0.4658203125, -0.3994140625, -0.2210693359375, -0.09588623046875, -0.126220703125, -0.09222412109375, -0.145751953125, -0.217529296875, -0.289306640625, -0.28271484375, -0.1787109375, -0.169189453125, -0.359375, -0.21826171875, -0.043792724609375, -0.205322265625, -0.2900390625, -0.055419921875, -0.1490478515625, -0.340576171875, -0.045928955078125, -0.30517578125, -0.51123046875, -0.1046142578125, -0.349853515625, -0.10882568359375, -0.16748046875, -0.267333984375, -0.122314453125, -0.0985107421875, -0.3076171875, -0.1766357421875, -0.251708984375, 0.1964111328125, -0.2220458984375, -0.2349853515625, -0.035980224609375, -0.1749267578125, -0.237060546875, -0.480224609375, -0.240234375, -0.09539794921875, -0.2481689453125, -0.389404296875, -0.1748046875, -0.370849609375, -0.010650634765625, -0.147705078125, -0.0035457611083984375, -0.32568359375, -0.29931640625, -0.1395263671875, -0.28173828125, -0.09820556640625, -0.0176239013671875, -0.05926513671875, -0.0755615234375, -0.1746826171875, -0.283203125, -0.1617431640625, -0.4404296875, 0.046234130859375, -0.183837890625, -0.052032470703125, -0.24658203125, -0.11224365234375, -0.100830078125, -0.162841796875, -0.29736328125, -0.396484375, 0.11798095703125, -0.006496429443359375, -0.32568359375, -0.347900390625, -0.04595947265625, -0.09637451171875, -0.344970703125, -0.01166534423828125, -0.346435546875, -0.2861328125, -0.1845703125, -0.276611328125, -0.01312255859375, -0.395263671875, -0.50927734375, -0.1114501953125, -0.1861572265625, -0.2158203125, -0.1812744140625, 0.055419921875, -0.294189453125, 0.06500244140625, -0.1444091796875, -0.06365966796875, -0.18408203125, -0.0091705322265625, -0.1640625, -0.1856689453125, 0.090087890625, 0.024566650390625, -0.0195159912109375, -0.5546875, -0.301025390625, -0.438232421875, -0.072021484375, 0.030517578125, -0.1490478515625, 0.04888916015625, -0.23681640625, -0.1553955078125, -0.018096923828125, -0.229736328125, -0.2919921875, -0.355712890625, -0.285400390625, -0.1756591796875, -0.08355712890625, -0.416259765625, 0.022674560546875, -0.417236328125, 0.410400390625, -0.249755859375, 0.015625, -0.033599853515625, -0.040313720703125, -0.51708984375, -0.0518798828125, -0.08843994140625, -0.2022705078125, -0.3740234375, -0.285888671875, -0.176025390625, -0.292724609375, -0.369140625, -0.08367919921875, -0.356689453125, -0.38623046875, 0.06549072265625, 0.1669921875, -0.2099609375, -0.007434844970703125, 0.12890625, -0.0040740966796875, -0.2174072265625, -0.025115966796875, -0.2364501953125, -0.1695556640625, -0.0469970703125, -0.03924560546875, -0.36181640625, -0.047515869140625, -0.3154296875, -0.275634765625, -0.25634765625, -0.061920166015625, -0.12164306640625, -0.47314453125, -0.10784912109375, -0.74755859375, -0.13232421875, -0.32421875, -0.04998779296875, -0.286376953125, 0.10345458984375, -0.1710205078125, -0.388916015625, 0.12744140625, -0.3359375, -0.302490234375, -0.238525390625, -0.1455078125, -0.15869140625, -0.2427978515625, -0.0355224609375, -0.11944580078125, -0.31298828125, 0.11456298828125, -0.287841796875, -0.5439453125, -0.3076171875, -0.08642578125, -0.2408447265625, -0.283447265625, -0.428466796875, -0.085693359375, -0.1683349609375, 0.255126953125, 0.07635498046875, -0.38623046875, -0.2025146484375, -0.1331787109375, -0.10821533203125, -0.49951171875, 0.09130859375, -0.19677734375, -0.01904296875, -0.151123046875, -0.344482421875, -0.316650390625, -0.03900146484375, 0.1397705078125, 0.1334228515625, -0.037200927734375, -0.01861572265625, -0.1351318359375, -0.07037353515625, -0.380615234375, -0.34033203125, -0.06903076171875, 0.219970703125, 0.0132598876953125, -0.15869140625, -0.6376953125, 0.158935546875, -0.5283203125, -0.2320556640625, -0.185791015625, -0.2132568359375, -0.436767578125, -0.430908203125, -0.1763916015625, -0.0007672309875488281, -0.424072265625, -0.06719970703125, -0.347900390625, -0.14453125, -0.3056640625, -0.36474609375, -0.35986328125, -0.46240234375, -0.446044921875, -0.1905517578125, -0.1114501953125, -0.42919921875, -0.0643310546875, -0.3662109375, -0.4296875, -0.10968017578125, -0.2998046875, -0.1756591796875, -0.4052734375, -0.0841064453125, -0.252197265625, -0.047393798828125, 0.00434112548828125, -0.10040283203125, -0.271484375, -0.185302734375, -0.1910400390625, 0.10260009765625, 0.01393890380859375, -0.03350830078125, -0.33935546875, -0.329345703125, 0.0574951171875, -0.18896484375, -0.17724609375, -0.42919921875, -0.26708984375, -0.4189453125, -0.149169921875, -0.265625, -0.198974609375, -0.1722412109375, 0.1563720703125, -0.20947265625, -0.267822265625, -0.06353759765625, -0.365478515625, -0.340087890625, -0.3095703125, -0.320068359375, -0.0880126953125, -0.353759765625, -0.0005812644958496094, -0.1617431640625, -0.1866455078125, -0.201416015625, -0.181396484375, -0.2349853515625, -0.384765625, -0.5244140625, 0.01227569580078125, -0.21337890625, -0.30810546875, -0.17578125, -0.3037109375, -0.52978515625, -0.1561279296875, -0.296142578125, 0.057342529296875, -0.369384765625, -0.107666015625, -0.338623046875, -0.2060546875, -0.0213775634765625, -0.394775390625, -0.219482421875, -0.125732421875, -0.03997802734375, -0.42431640625, -0.134521484375, -0.2418212890625, -0.10504150390625, 0.1552734375, 0.1126708984375, -0.1427001953125, -0.133544921875, -0.111083984375, -0.375732421875, -0.2783203125, -0.036834716796875, -0.11053466796875, 0.2471923828125, -0.2529296875, -0.56494140625, -0.374755859375, -0.326416015625, 0.2137451171875, -0.09454345703125, -0.337158203125, -0.3359375, -0.34375, -0.0999755859375, -0.388671875, 0.0103302001953125, 0.14990234375, -0.2041015625, -0.39501953125, -0.39013671875, -0.1258544921875, 0.1453857421875, -0.250732421875, -0.06732177734375, -0.10638427734375, -0.032379150390625, -0.35888671875, -0.098876953125, -0.172607421875, 0.05126953125, -0.1956787109375, -0.183837890625, -0.37060546875, 0.1556396484375, -0.34375, -0.28662109375, -0.06982421875, -0.302490234375, -0.281005859375, -0.1640625, -0.5302734375, -0.1368408203125, -0.1268310546875, -0.35302734375, -0.1473388671875, -0.45556640625, -0.35986328125, -0.273681640625, -0.2249755859375, -0.1893310546875, 0.09356689453125, -0.248291015625, -0.197998046875, -0.3525390625, -0.30126953125, -0.228271484375, -0.2421875, -0.0906982421875, 0.227783203125, -0.296875, -0.009796142578125, -0.2939453125, -0.1021728515625, -0.215576171875, -0.267822265625, -0.052642822265625, 0.203369140625, -0.1417236328125, 0.18505859375, 0.12347412109375, -0.0972900390625, -0.54052734375, -0.430419921875, -0.0906982421875, -0.5419921875, -0.22900390625, -0.0625, -0.12152099609375, -0.495849609375, -0.206787109375, -0.025848388671875, 0.039031982421875, -0.453857421875, -0.318359375, -0.426025390625, -0.3701171875, -0.2169189453125, 0.0845947265625, -0.045654296875, 0.11090087890625, 0.0012454986572265625, 0.2066650390625, -0.046356201171875, -0.2337646484375, -0.295654296875, 0.057891845703125, -0.1639404296875, -0.0535888671875, -0.2607421875, -0.1488037109375, -0.16015625, -0.54345703125, -0.2305908203125, -0.55029296875, -0.178955078125, -0.222412109375, -0.0711669921875, -0.12298583984375, -0.119140625, -0.253662109375, -0.33984375, -0.11322021484375, -0.10723876953125, -0.205078125, -0.360595703125, 0.085205078125, -0.252197265625, -0.365966796875, -0.26953125, 0.2000732421875, -0.50634765625, 0.05706787109375, -0.3115234375, 0.0242919921875, -0.1689453125, -0.2401123046875, -0.3759765625, -0.2125244140625, 0.076416015625, -0.489013671875, -0.11749267578125, -0.55908203125, -0.313232421875, -0.572265625, -0.1387939453125, -0.037078857421875, -0.385498046875, 0.0323486328125, -0.39404296875, -0.05072021484375, -0.10430908203125, -0.10919189453125, -0.28759765625, -0.37451171875, -0.016937255859375, -0.2200927734375, -0.296875, -0.0286712646484375, -0.213134765625, 0.052001953125, -0.052337646484375, -0.253662109375, 0.07269287109375, -0.2498779296875, -0.150146484375, -0.09930419921875, -0.343505859375, 0.254150390625, -0.032440185546875, -0.296142578125], [1.4111328125, 0.00757598876953125, -0.428955078125, 0.089599609375, 0.0227813720703125, -0.0350341796875, -1.0986328125, 0.194091796875, 2.115234375, -0.75439453125, 0.269287109375, -0.73486328125, -1.1025390625, -0.050262451171875, -0.5830078125, 0.0268707275390625, -0.603515625, -0.6025390625, -1.1689453125, 0.25048828125, -0.4189453125, -0.5517578125, -0.30322265625, 0.7724609375, 0.931640625, -0.1422119140625, 2.27734375, -0.56591796875, 1.013671875, -0.9638671875, -0.66796875, -0.8125, 1.3740234375, -1.060546875, -1.029296875, -1.6796875, 0.62890625, 0.49365234375, 0.671875, 0.99755859375, -1.0185546875, -0.047027587890625, -0.374267578125, 0.2354736328125, 1.4970703125, -1.5673828125, 0.448974609375, 0.2078857421875, -1.060546875, -0.171875, -0.6201171875, -0.1607666015625, 0.7548828125, -0.58935546875, -0.2052001953125, 0.060791015625, 0.200439453125, 3.154296875, -3.87890625, 2.03515625, 1.126953125, 0.1640625, -1.8447265625, 0.002620697021484375, 0.7998046875, -0.337158203125, 0.47216796875, -0.5849609375, 0.9970703125, 0.3935546875, 1.22265625, -1.5048828125, -0.65673828125, 1.1474609375, -1.73046875, -1.8701171875, 1.529296875, -0.6787109375, -1.4453125, 1.556640625, -0.327392578125, 2.986328125, -0.146240234375, -2.83984375, 0.303466796875, -0.71728515625, -0.09698486328125, -0.2423095703125, 0.6767578125, -2.197265625, -0.86279296875, -0.53857421875, -1.2236328125, 1.669921875, -1.1689453125, -0.291259765625, -0.54736328125, -0.036346435546875, 1.041015625, -1.7265625, -0.6064453125, -0.1634521484375, 0.2381591796875, 0.65087890625, -1.169921875, 1.9208984375, 0.5634765625, 0.37841796875, 0.798828125, -1.021484375, -0.4091796875, 2.275390625, -0.302734375, -1.7783203125, 1.0458984375, 1.478515625, 0.708984375, -1.541015625, -0.0006041526794433594, 1.1884765625, 2.041015625, 0.560546875, -0.1131591796875, 1.0341796875, 0.06121826171875, 2.6796875, -0.53369140625, -1.2490234375, -0.7333984375, -1.017578125, -1.0078125, 1.3212890625, -0.47607421875, -1.4189453125, 0.54052734375, -0.796875, -0.73095703125, -1.412109375, -0.94873046875, -2.2734375, -1.1220703125, -1.3837890625, -0.5087890625, -1.0380859375, -0.93603515625, -0.58349609375, -1.0703125, -1.10546875, -2.60546875, 0.062225341796875, 0.38232421875, -0.411376953125, -0.369140625, -0.9833984375, -0.7294921875, -0.181396484375, -0.47216796875, -0.56884765625, -0.11041259765625, -2.673828125, 0.27783203125, -0.857421875, 0.9296875, 1.9580078125, 0.1385498046875, -1.91796875, -1.529296875, 0.53857421875, 0.509765625, -0.90380859375, -0.0947265625, -2.083984375, 0.9228515625, -0.28564453125, -0.80859375, -0.093505859375, -0.6015625, -1.255859375, 0.6533203125, 0.327880859375, -0.07598876953125, -0.22705078125, -0.30078125, -0.5185546875, -1.6044921875, 1.5927734375, 1.416015625, -0.91796875, -0.276611328125, -0.75830078125, -1.1689453125, -1.7421875, 1.0546875, -0.26513671875, -0.03314208984375, 0.278076171875, -1.337890625, 0.055023193359375, 0.10546875, -1.064453125, 1.048828125, -1.4052734375, -1.1240234375, -0.51416015625, -1.05859375, -1.7265625, -1.1328125, 0.43310546875, -2.576171875, -2.140625, -0.79345703125, 0.50146484375, 1.96484375, 0.98583984375, 0.337646484375, -0.77978515625, 0.85498046875, -0.65185546875, -0.484375, 2.708984375, 0.55810546875, -0.147216796875, -0.5537109375, -0.75439453125, -1.736328125, 1.1259765625, -1.095703125, -0.2587890625, 2.978515625, 0.335205078125, 0.357666015625, -0.09356689453125, 0.295654296875, -0.23779296875, 1.5751953125, 0.10400390625, 1.7001953125, -0.72900390625, -1.466796875, -0.2012939453125, 0.634765625, -0.1556396484375, -2.01171875, 0.32666015625, 0.047454833984375, -0.1671142578125, -0.78369140625, -0.994140625, 0.7802734375, -0.1429443359375, -0.115234375, 0.53271484375, -0.96142578125, -0.064208984375, 1.396484375, 1.654296875, -1.6015625, -0.77392578125, 0.276123046875, -0.42236328125, 0.8642578125, 0.533203125, 0.397216796875, -1.21484375, 0.392578125, -0.501953125, -0.231689453125, 1.474609375, 1.6669921875, 1.8662109375, -1.2998046875, 0.223876953125, -0.51318359375, -0.437744140625, -1.16796875, -0.7724609375, 1.6826171875, 0.62255859375, 2.189453125, -0.599609375, -0.65576171875, -1.1005859375, -0.45263671875, -0.292236328125, 2.58203125, -1.3779296875, 0.23486328125, -1.708984375, -1.4111328125, -0.5078125, -0.8525390625, -0.90771484375, 0.861328125, -2.22265625, -1.380859375, 0.7275390625, 0.85595703125, -0.77978515625, 2.044921875, -0.430908203125, 0.78857421875, -1.21484375, -0.09130859375, 0.5146484375, -1.92578125, -0.1396484375, 0.289306640625, 0.60498046875, 0.93896484375, -0.09295654296875, -0.45751953125, -0.986328125, -0.66259765625, 1.48046875, 0.274169921875, -0.267333984375, -1.3017578125, -1.3623046875, -1.982421875, -0.86083984375, -0.41259765625, -0.2939453125, -1.91015625, 1.6826171875, 0.437255859375, 1.0029296875, 0.376220703125, -0.010467529296875, -0.82861328125, -0.513671875, -3.134765625, 1.0205078125, -1.26171875, -1.009765625, 1.0869140625, -0.95703125, 0.0103759765625, 1.642578125, 0.78564453125, 1.029296875, 0.496826171875, 1.2880859375, 0.5234375, 0.05322265625, -0.206787109375, -0.79443359375, -1.1669921875, 0.049530029296875, -0.27978515625, 0.0237884521484375, -0.74169921875, -1.068359375, 0.86083984375, 1.1787109375, 0.91064453125, -0.453857421875, -1.822265625, -0.9228515625, -0.50048828125, 0.359130859375, 0.802734375, -1.3564453125, -0.322509765625, -1.1123046875, -1.0390625, -0.52685546875, -1.291015625, -0.343017578125, -1.2109375, -0.19091796875, 2.146484375, -0.04315185546875, -0.3701171875, -2.044921875, -0.429931640625, -0.56103515625, -0.166015625, -0.4658203125, -2.29296875, -1.078125, -1.0927734375, -0.1033935546875, -0.56103515625, -0.05743408203125, -1.986328125, -0.513671875, 0.70361328125, -2.484375, -1.3037109375, -1.6650390625, 0.4814453125, -0.84912109375, -2.697265625, -0.197998046875, 0.0869140625, -0.172607421875, -1.326171875, -1.197265625, 1.23828125, -0.38720703125, -0.075927734375, 0.02569580078125, -1.2119140625, 0.09027099609375, -2.12890625, -1.640625, -0.1524658203125, 0.2373046875, 1.37109375, 2.248046875, 1.4619140625, 0.3134765625, 0.50244140625, -0.1383056640625, -1.2705078125, 0.7353515625, 0.65771484375, -0.431396484375, -1.341796875, 0.10089111328125, 0.208984375, -0.0099945068359375, 0.83203125, 1.314453125, -0.422607421875, -1.58984375, -0.6044921875, 0.23681640625, -1.60546875, -0.61083984375, -1.5615234375, 1.62890625, -0.6728515625, -0.68212890625, -0.5224609375, -0.9150390625, -0.468994140625, 0.268310546875, 0.287353515625, -0.025543212890625, 0.443603515625, 1.62109375, -1.08984375, -0.5556640625, 1.03515625, -0.31298828125, -0.041778564453125, 0.260986328125, 0.34716796875, -2.326171875, 0.228271484375, -0.85107421875, -2.255859375, 0.3486328125, -0.25830078125, -0.3671875, -0.796875, -1.115234375, 1.8369140625, -0.19775390625, -1.236328125, -0.0447998046875, 0.69921875, 1.37890625, 1.11328125, 0.0928955078125, 0.6318359375, -0.62353515625, 0.55859375, -0.286865234375, 1.5361328125, -0.391357421875, -0.052215576171875, -1.12890625, 0.55517578125, -0.28515625, -0.3603515625, 0.68896484375, 0.67626953125, 0.003070831298828125, 1.2236328125, 0.1597900390625, -1.3076171875, 0.99951171875, -2.5078125, -1.2119140625, 0.1749267578125, -1.1865234375, -1.234375, -0.1180419921875, -1.751953125, 0.033050537109375, 0.234130859375, -3.107421875, -1.0380859375, 0.61181640625, -0.87548828125, 0.3154296875, -1.103515625, 0.261474609375, -1.130859375, -0.7470703125, -0.43408203125, 1.3828125, -0.41259765625, -1.7587890625, 0.765625, 0.004852294921875, 0.135498046875, -0.76953125, -0.1314697265625, 0.400390625, 1.43359375, 0.07135009765625, 0.0645751953125, -0.5869140625, -0.5810546875, -0.2900390625, -1.3037109375, 0.1287841796875, -0.27490234375, 0.59228515625, 2.333984375, -0.54541015625, -0.556640625, 0.447265625, -0.806640625, 0.09149169921875, -0.70654296875, -0.357177734375, -1.099609375, -0.5576171875, -0.44189453125, 0.400390625, -0.666015625, -1.4619140625, 0.728515625, -1.5986328125, 0.153076171875, -0.126708984375, -2.83984375, -1.84375, -0.2469482421875, 0.677734375, 0.43701171875, 3.298828125, 1.1591796875, -0.7158203125, -0.8251953125, 0.451171875, -2.376953125, -0.58642578125, -0.86767578125, 0.0789794921875, 0.1351318359375, -0.325439453125, 0.484375, 1.166015625, -0.1610107421875, -0.15234375, -0.54638671875, -0.806640625, 0.285400390625, 0.1661376953125, -0.50146484375, -1.0478515625, 1.5751953125, 0.0313720703125, 0.2396240234375, -0.6572265625, -0.1258544921875, -1.060546875, 1.3076171875, -0.301513671875, -1.2412109375, 0.6376953125, -1.5693359375, 0.354248046875, 0.2427978515625, -0.392333984375, 0.61962890625, -0.58837890625, -1.71484375, -0.2098388671875, -0.828125, 0.330810546875, 0.16357421875, -0.2259521484375, 0.0972900390625, -0.451416015625, 1.79296875, -1.673828125, -1.58203125, -2.099609375, -0.487548828125, -0.87060546875, 0.62646484375, -1.470703125, -0.1558837890625, 0.4609375, 1.3369140625, 0.2322998046875, 0.1632080078125, 0.65966796875, 1.0810546875, 0.1041259765625, 0.63232421875, -0.32421875, -1.04296875, -1.046875, -1.3720703125, -0.8486328125, 0.1290283203125, 0.137939453125, 0.1549072265625, -1.0908203125, 0.0167694091796875, -0.31689453125, 1.390625, 0.07269287109375, 1.0390625, 1.1162109375, -0.455810546875, -0.06689453125, -0.053741455078125, 0.5048828125, -0.8408203125, -1.19921875, 0.87841796875, 0.7421875, 0.2030029296875, 0.109619140625, -0.59912109375, -1.337890625, -0.74169921875, -0.64453125, -1.326171875, 0.21044921875, -1.3583984375, -1.685546875, -0.472900390625, -0.270263671875, 0.99365234375, -0.96240234375, 1.1279296875, -0.45947265625, -0.45654296875, -0.99169921875, -3.515625, -1.9853515625, 0.73681640625, 0.92333984375, -0.56201171875, -1.4453125, -2.078125, 0.94189453125, -1.333984375, 0.0982666015625, 0.60693359375, 0.367431640625, 3.015625, -1.1357421875, -1.5634765625, 0.90234375, -0.1783447265625, 0.1802978515625, -0.317138671875, -0.513671875, 1.2353515625, -0.033203125, 1.4482421875, 1.0087890625, 0.9248046875, 0.10418701171875, 0.7626953125, -1.3798828125, 0.276123046875, 0.55224609375, 1.1005859375, -0.62158203125, -0.806640625, 0.65087890625, 0.270263671875, -0.339111328125, -0.9384765625, -0.09381103515625, -0.7216796875, 1.37890625, -0.398193359375, -0.3095703125, -1.4912109375, 0.96630859375, 0.43798828125, 0.62255859375, 0.0213470458984375, 0.235595703125, -1.2958984375, 0.0157318115234375, -0.810546875, 1.9736328125, -0.2462158203125, 0.720703125, 0.822265625, -0.755859375, -0.658203125, 0.344482421875, -2.892578125, -0.282470703125, 1.2529296875, -0.294189453125, 0.6748046875, -0.80859375, 0.9287109375, 1.27734375, -1.71875, -0.166015625, 0.47412109375, -0.41259765625, -1.3681640625, -0.978515625, -0.77978515625, -1.044921875, -0.90380859375, -0.08184814453125, -0.86181640625, -0.10772705078125, -0.299560546875, -0.4306640625, -0.47119140625, 0.95703125, 1.107421875, 0.91796875, 0.76025390625, 0.7392578125, -0.09161376953125, -0.7392578125, 0.9716796875, -0.395751953125, -0.75390625, -0.164306640625, -0.087646484375, 0.028564453125, -0.91943359375, -0.66796875, 2.486328125, 0.427734375, 0.626953125, 0.474853515625, 0.0926513671875, 0.830078125, -0.6923828125, 0.7841796875, -0.89208984375, -2.482421875, 0.034912109375, -1.3447265625, -0.475341796875, -0.286376953125, -0.732421875, 0.190673828125, -0.491455078125, -3.091796875, -1.2783203125, -0.66015625, -0.1507568359375, 0.042236328125, -1.025390625, 0.12744140625, -1.984375, -0.393798828125, -1.25, -1.140625, 1.77734375, 0.2457275390625, -0.8017578125, 0.7763671875, -0.387939453125, -0.3662109375, 1.1572265625, 0.123291015625, -0.07135009765625, 1.412109375, -0.685546875, -3.078125, 0.031524658203125, -0.70458984375, 0.78759765625, 0.433837890625, -1.861328125, -1.33203125, 2.119140625, -1.3544921875, -0.6591796875, -1.4970703125, 0.40625, -2.078125, -1.30859375, 0.050262451171875, -0.60107421875, 1.0078125, 0.05657958984375, -0.96826171875, 0.0264892578125, 0.159912109375, 0.84033203125, -1.1494140625, -0.0433349609375, -0.2034912109375, 1.09765625, -1.142578125, -0.283203125, -0.427978515625, 1.0927734375, -0.67529296875, -0.61572265625, 2.517578125, 0.84130859375, 1.8662109375, 0.1748046875, -0.407958984375, -0.029449462890625, -0.27587890625, -0.958984375, -0.10028076171875, 1.248046875, -0.0792236328125, -0.45556640625, 0.7685546875, 1.5556640625, -1.8759765625, -0.131591796875, -1.3583984375, 0.7890625, 0.80810546875, -1.0322265625, -0.53076171875, -0.1484375, -1.7841796875, -1.2470703125, 0.17138671875, -0.04864501953125, -0.80322265625, -0.0933837890625, 0.984375, 0.7001953125, 0.5380859375, 0.2022705078125, -1.1865234375, 0.5439453125, 1.1318359375, 0.79931640625, 0.32666015625, -1.26171875, 0.457763671875, 1.1591796875, -0.34423828125, 0.65771484375, 0.216552734375, 1.19140625, -0.2744140625, -0.020416259765625, -0.86376953125, 0.93017578125, 1.0556640625, 0.69873046875, -0.15087890625, -0.33056640625, 0.8505859375, 0.06890869140625, 0.359375, -0.262939453125, 0.12493896484375, 0.017059326171875, -0.98974609375, 0.5107421875, 0.2408447265625, 0.615234375, -0.62890625, 0.86962890625, -0.07427978515625, 0.85595703125, 0.300537109375, -1.072265625, -1.6064453125, -0.353515625, -0.484130859375, -0.6044921875, -0.455810546875, 0.95849609375, 1.3671875, 0.544921875, 0.560546875, 0.34521484375, -0.6513671875, -0.410400390625, -0.2021484375, -0.1656494140625, 0.073486328125, 0.84716796875, -1.7998046875, -1.0126953125, -0.1324462890625, 0.95849609375, -0.669921875, -0.79052734375, -2.193359375, -0.42529296875, -1.7275390625, -1.04296875, 0.716796875, -0.4423828125, -1.193359375, 0.61572265625, -1.5224609375, 0.62890625, -0.705078125, 0.677734375, -0.213134765625, -1.6748046875, -1.087890625, -0.65185546875, -1.1337890625, 2.314453125, -0.352783203125, -0.27001953125, -2.01953125, -1.2685546875, 0.308837890625, -0.280517578125, -1.3798828125, -1.595703125, 0.642578125, 1.693359375, -0.82470703125, -1.255859375, 0.57373046875, 1.5859375, 1.068359375, -0.876953125, 0.370849609375, 1.220703125, 0.59765625, 0.007602691650390625, 0.09326171875, -0.9521484375, -0.024932861328125, -0.94775390625, -0.299560546875, -0.002536773681640625, 1.41796875, -0.06903076171875, -1.5927734375, 0.353515625, 3.63671875, -0.765625, -1.1142578125, 0.4287109375, -0.86865234375, -0.9267578125, -0.21826171875, -1.10546875, 0.29296875, -0.225830078125, 0.5400390625, -0.45556640625, -0.68701171875, -0.79150390625, -1.0810546875, 0.25439453125, -1.2998046875, -0.494140625, -0.1510009765625, 1.5615234375, -0.4248046875, -0.486572265625, 0.45458984375, 0.047637939453125, -0.11639404296875, 0.057403564453125, 0.130126953125, -0.10125732421875, -0.56201171875, 1.4765625, -1.7451171875, 1.34765625, -0.45703125, 0.873046875, -0.056121826171875, -0.8876953125, -0.986328125, 1.5654296875, 0.49853515625, 0.55859375, -0.2198486328125, 0.62548828125, 0.2734375, -0.63671875, -0.41259765625, -1.2705078125, 0.0665283203125, 1.3369140625, 0.90283203125, -0.77685546875, -1.5, -1.8525390625, -1.314453125, -0.86767578125, -0.331787109375, 0.1590576171875, 0.94775390625, -0.1771240234375, 1.638671875, -2.17578125, 0.58740234375, 0.424560546875, -0.3466796875, 0.642578125, 0.473388671875, 0.96435546875, 1.38671875, -0.91357421875, 1.0361328125, -0.67333984375, 1.5009765625]]]).to(device) + + cond = [[prompt_embeds, {}]] + + return (cond,) + +NODE_CLASS_MAPPINGS = { + "LotusConditioning" : LotusConditioning, +} diff --git a/comfy_extras/nodes_model_advanced.py b/comfy_extras/nodes_model_advanced.py index ceac5654b..2b805c1ee 100644 --- a/comfy_extras/nodes_model_advanced.py +++ b/comfy_extras/nodes_model_advanced.py @@ -24,6 +24,10 @@ class X0(comfy.model_sampling.EPS): def calculate_denoised(self, sigma, model_output, model_input): return model_output +class Lotus(X0): + def calculate_input(self, sigma, noise): + return noise + class ModelSamplingDiscreteDistilled(comfy.model_sampling.ModelSamplingDiscrete): original_timesteps = 50 @@ -56,7 +60,7 @@ class ModelSamplingDiscrete: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["eps", "v_prediction", "lcm", "x0"],), + "sampling": (["eps", "v_prediction", "lcm", "x0", "lotus"],), "zsnr": ("BOOLEAN", {"default": False}), }} @@ -78,6 +82,8 @@ class ModelSamplingDiscrete: sampling_base = ModelSamplingDiscreteDistilled elif sampling == "x0": sampling_type = X0 + elif sampling == "lotus": + sampling_type = Lotus class ModelSamplingAdvanced(sampling_base, sampling_type): pass diff --git a/nodes.py b/nodes.py index a9c931dfa..27ef743b3 100644 --- a/nodes.py +++ b/nodes.py @@ -2264,6 +2264,7 @@ def init_builtin_extra_nodes(): "nodes_video.py", "nodes_lumina2.py", "nodes_wan.py", + "nodes_lotus.py", "nodes_hunyuan3d.py", "nodes_primitive.py", ] From d9fa9d307ff49d3bad50b623306118d483a387fd Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 21 Mar 2025 14:19:37 -0400 Subject: [PATCH 65/77] Automatically set the right sampling type for lotus. --- comfy/model_base.py | 5 ++++- comfy/model_sampling.py | 9 +++++++++ comfy_extras/nodes_model_advanced.py | 16 ++++------------ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index 2fb4b1453..eec70d5de 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -59,6 +59,7 @@ class ModelType(Enum): FLOW = 6 V_PREDICTION_CONTINUOUS = 7 FLUX = 8 + IMG_TO_IMG = 9 from comfy.model_sampling import EPS, V_PREDICTION, EDM, ModelSamplingDiscrete, ModelSamplingContinuousEDM, StableCascadeSampling, ModelSamplingContinuousV @@ -89,6 +90,8 @@ def model_sampling(model_config, model_type): elif model_type == ModelType.FLUX: c = comfy.model_sampling.CONST s = comfy.model_sampling.ModelSamplingFlux + elif model_type == ModelType.IMG_TO_IMG: + c = comfy.model_sampling.IMG_TO_IMG class ModelSampling(s, c): pass @@ -613,7 +616,7 @@ class Lotus(BaseModel): out['y'] = comfy.conds.CONDRegular(task_emb) return out - def __init__(self, model_config, model_type=ModelType.EPS, device=None): + def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG, device=None): super().__init__(model_config, model_type, device=device) class StableCascade_C(BaseModel): diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py index ff27b09a8..b79af1e92 100644 --- a/comfy/model_sampling.py +++ b/comfy/model_sampling.py @@ -69,6 +69,15 @@ class CONST: sigma = sigma.view(sigma.shape[:1] + (1,) * (latent.ndim - 1)) return latent / (1.0 - sigma) +class X0(EPS): + def calculate_denoised(self, sigma, model_output, model_input): + return model_output + +class IMG_TO_IMG(X0): + def calculate_input(self, sigma, noise): + return noise + + class ModelSamplingDiscrete(torch.nn.Module): def __init__(self, model_config=None, zsnr=None): super().__init__() diff --git a/comfy_extras/nodes_model_advanced.py b/comfy_extras/nodes_model_advanced.py index 2b805c1ee..71a652ffa 100644 --- a/comfy_extras/nodes_model_advanced.py +++ b/comfy_extras/nodes_model_advanced.py @@ -20,14 +20,6 @@ class LCM(comfy.model_sampling.EPS): return c_out * x0 + c_skip * model_input -class X0(comfy.model_sampling.EPS): - def calculate_denoised(self, sigma, model_output, model_input): - return model_output - -class Lotus(X0): - def calculate_input(self, sigma, noise): - return noise - class ModelSamplingDiscreteDistilled(comfy.model_sampling.ModelSamplingDiscrete): original_timesteps = 50 @@ -60,7 +52,7 @@ class ModelSamplingDiscrete: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["eps", "v_prediction", "lcm", "x0", "lotus"],), + "sampling": (["eps", "v_prediction", "lcm", "x0", "img_to_img"],), "zsnr": ("BOOLEAN", {"default": False}), }} @@ -81,9 +73,9 @@ class ModelSamplingDiscrete: sampling_type = LCM sampling_base = ModelSamplingDiscreteDistilled elif sampling == "x0": - sampling_type = X0 - elif sampling == "lotus": - sampling_type = Lotus + sampling_type = comfy.model_sampling.X0 + elif sampling == "img_to_img": + sampling_type = comfy.model_sampling.IMG_TO_IMG class ModelSamplingAdvanced(sampling_base, sampling_type): pass From 2206246055af7996ee8c6cb79346767d90da8372 Mon Sep 17 00:00:00 2001 From: Terry Jia Date: Fri, 21 Mar 2025 16:24:13 -0400 Subject: [PATCH 66/77] support output normal and lineart once (#7290) --- comfy_extras/nodes_load_3d.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py index 8b43cf218..db30030fb 100644 --- a/comfy_extras/nodes_load_3d.py +++ b/comfy_extras/nodes_load_3d.py @@ -21,8 +21,8 @@ class Load3D(): "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), }} - RETURN_TYPES = ("IMAGE", "MASK", "STRING") - RETURN_NAMES = ("image", "mask", "mesh_path") + RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE", "IMAGE") + RETURN_NAMES = ("image", "mask", "mesh_path", "normal", "lineart") FUNCTION = "process" EXPERIMENTAL = True @@ -32,12 +32,16 @@ class Load3D(): def process(self, model_file, image, **kwargs): image_path = folder_paths.get_annotated_filepath(image['image']) mask_path = folder_paths.get_annotated_filepath(image['mask']) + normal_path = folder_paths.get_annotated_filepath(image['normal']) + lineart_path = folder_paths.get_annotated_filepath(image['lineart']) load_image_node = nodes.LoadImage() output_image, ignore_mask = load_image_node.load_image(image=image_path) ignore_image, output_mask = load_image_node.load_image(image=mask_path) + normal_image, ignore_mask2 = load_image_node.load_image(image=normal_path) + lineart_image, ignore_mask3 = load_image_node.load_image(image=lineart_path) - return output_image, output_mask, model_file, + return output_image, output_mask, model_file, normal_image, lineart_image class Load3DAnimation(): @classmethod @@ -55,8 +59,8 @@ class Load3DAnimation(): "height": ("INT", {"default": 1024, "min": 1, "max": 4096, "step": 1}), }} - RETURN_TYPES = ("IMAGE", "MASK", "STRING") - RETURN_NAMES = ("image", "mask", "mesh_path") + RETURN_TYPES = ("IMAGE", "MASK", "STRING", "IMAGE") + RETURN_NAMES = ("image", "mask", "mesh_path", "normal") FUNCTION = "process" EXPERIMENTAL = True @@ -66,12 +70,14 @@ class Load3DAnimation(): def process(self, model_file, image, **kwargs): image_path = folder_paths.get_annotated_filepath(image['image']) mask_path = folder_paths.get_annotated_filepath(image['mask']) + normal_path = folder_paths.get_annotated_filepath(image['normal']) load_image_node = nodes.LoadImage() output_image, ignore_mask = load_image_node.load_image(image=image_path) ignore_image, output_mask = load_image_node.load_image(image=mask_path) + normal_image, ignore_mask2 = load_image_node.load_image(image=normal_path) - return output_image, output_mask, model_file, + return output_image, output_mask, model_file, normal_image class Preview3D(): @classmethod From ce9b084279110f78ca2faf53fb0ef05ac4aaba48 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 21 Mar 2025 19:08:25 -0400 Subject: [PATCH 67/77] [nit] Format error strings (#7345) --- app/frontend_management.py | 53 +++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/app/frontend_management.py b/app/frontend_management.py index b4ba994d1..c56ea86e0 100644 --- a/app/frontend_management.py +++ b/app/frontend_management.py @@ -22,13 +22,21 @@ import app.logger # The path to the requirements.txt file req_path = Path(__file__).parents[1] / "requirements.txt" + def frontend_install_warning_message(): """The warning message to display when the frontend version is not up to date.""" extra = "" if sys.flags.no_user_site: extra = "-s " - return f"Please install the updated requirements.txt file by running:\n{sys.executable} {extra}-m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem" + return f""" +Please install the updated requirements.txt file by running: +{sys.executable} {extra}-m pip install -r {req_path} + +This error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead. + +If you are on the portable package you can run: update\\update_comfyui.bat to solve this problem +""".strip() def check_frontend_version(): @@ -43,7 +51,17 @@ def check_frontend_version(): with open(req_path, "r", encoding="utf-8") as f: required_frontend = parse_version(f.readline().split("=")[-1]) if frontend_version < required_frontend: - app.logger.log_startup_warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), frontend_install_warning_message())) + app.logger.log_startup_warning( + f""" +________________________________________________________________________ +WARNING WARNING WARNING WARNING WARNING + +Installed frontend version {".".join(map(str, frontend_version))} is lower than the recommended version {".".join(map(str, required_frontend))}. + +{frontend_install_warning_message()} +________________________________________________________________________ +""".strip() + ) else: logging.info("ComfyUI frontend version: {}".format(frontend_version_str)) except Exception as e: @@ -150,9 +168,20 @@ class FrontendManager: def default_frontend_path(cls) -> str: try: import comfyui_frontend_package + return str(importlib.resources.files(comfyui_frontend_package) / "static") except ImportError: - logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n") + logging.error( + f""" +********** ERROR *********** + +comfyui-frontend-package is not installed. + +{frontend_install_warning_message()} + +********** ERROR *********** +""".strip() + ) sys.exit(-1) @classmethod @@ -175,7 +204,9 @@ class FrontendManager: return match_result.group(1), match_result.group(2), match_result.group(3) @classmethod - def init_frontend_unsafe(cls, version_string: str, provider: Optional[FrontEndProvider] = None) -> str: + def init_frontend_unsafe( + cls, version_string: str, provider: Optional[FrontEndProvider] = None + ) -> str: """ Initializes the frontend for the specified version. @@ -197,12 +228,20 @@ class FrontendManager: repo_owner, repo_name, version = cls.parse_version_string(version_string) if version.startswith("v"): - expected_path = str(Path(cls.CUSTOM_FRONTENDS_ROOT) / f"{repo_owner}_{repo_name}" / version.lstrip("v")) + expected_path = str( + Path(cls.CUSTOM_FRONTENDS_ROOT) + / f"{repo_owner}_{repo_name}" + / version.lstrip("v") + ) if os.path.exists(expected_path): - logging.info(f"Using existing copy of specific frontend version tag: {repo_owner}/{repo_name}@{version}") + logging.info( + f"Using existing copy of specific frontend version tag: {repo_owner}/{repo_name}@{version}" + ) return expected_path - logging.info(f"Initializing frontend: {repo_owner}/{repo_name}@{version}, requesting version details from GitHub...") + logging.info( + f"Initializing frontend: {repo_owner}/{repo_name}@{version}, requesting version details from GitHub..." + ) provider = provider or FrontEndProvider(repo_owner, repo_name) release = provider.get_release(version) From 75c1c757d90ca891eff823893248ef8b51d31d01 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 21 Mar 2025 20:09:54 -0400 Subject: [PATCH 68/77] ComfyUI version v0.3.27 --- comfyui_version.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/comfyui_version.py b/comfyui_version.py index b5e6fbead..705622529 100644 --- a/comfyui_version.py +++ b/comfyui_version.py @@ -1,3 +1,3 @@ # This file is automatically generated by the build process when version is # updated in pyproject.toml. -__version__ = "0.3.26" +__version__ = "0.3.27" diff --git a/pyproject.toml b/pyproject.toml index f13fed8dc..db9e776cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "ComfyUI" -version = "0.3.26" +version = "0.3.27" readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.9" From e471c726e57b3854e0dd47efe0e7c53a28703dbb Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sat, 22 Mar 2025 15:45:56 -0400 Subject: [PATCH 69/77] Fallback to pytorch attention if sage attention fails. --- comfy/ldm/modules/attention.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index 7908d1313..ede506463 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -471,7 +471,7 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False): if skip_reshape: b, _, _, dim_head = q.shape - tensor_layout="HND" + tensor_layout = "HND" else: b, _, dim_head = q.shape dim_head //= heads @@ -479,7 +479,7 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape= lambda t: t.view(b, -1, heads, dim_head), (q, k, v), ) - tensor_layout="NHD" + tensor_layout = "NHD" if mask is not None: # add a batch dimension if there isn't already one @@ -489,7 +489,17 @@ def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape= if mask.ndim == 3: mask = mask.unsqueeze(1) - out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout) + try: + out = sageattn(q, k, v, attn_mask=mask, is_causal=False, tensor_layout=tensor_layout) + except Exception as e: + logging.error("Error running sage attention: {}, using pytorch attention instead.".format(e)) + if tensor_layout == "NHD": + q, k, v = map( + lambda t: t.transpose(1, 2), + (q, k, v), + ) + return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=True, skip_output_reshape=skip_output_reshape) + if tensor_layout == "HND": if not skip_output_reshape: out = ( From 581a9991ff641ef330a2977d5b92e682c9c3df95 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Sun, 23 Mar 2025 08:06:36 -0400 Subject: [PATCH 70/77] Add model merging node for WAN 2.1 --- .../nodes_model_merging_model_specific.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/comfy_extras/nodes_model_merging_model_specific.py b/comfy_extras/nodes_model_merging_model_specific.py index 3e37f70d4..dc3411947 100644 --- a/comfy_extras/nodes_model_merging_model_specific.py +++ b/comfy_extras/nodes_model_merging_model_specific.py @@ -244,6 +244,30 @@ class ModelMergeCosmos14B(comfy_extras.nodes_model_merging.ModelMergeBlocks): return {"required": arg_dict} +class ModelMergeWAN2_1(comfy_extras.nodes_model_merging.ModelMergeBlocks): + CATEGORY = "advanced/model_merging/model_specific" + DESCRIPTION = "1.3B model has 30 blocks, 14B model has 40 blocks. Image to video model has the extra img_emb." + + @classmethod + def INPUT_TYPES(s): + arg_dict = { "model1": ("MODEL",), + "model2": ("MODEL",)} + + argument = ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}) + + arg_dict["patch_embedding."] = argument + arg_dict["time_embedding."] = argument + arg_dict["time_projection."] = argument + arg_dict["text_embedding."] = argument + arg_dict["img_emb."] = argument + + for i in range(40): + arg_dict["blocks.{}.".format(i)] = argument + + arg_dict["head."] = argument + + return {"required": arg_dict} + NODE_CLASS_MAPPINGS = { "ModelMergeSD1": ModelMergeSD1, "ModelMergeSD2": ModelMergeSD1, #SD1 and SD2 have the same blocks @@ -256,4 +280,5 @@ NODE_CLASS_MAPPINGS = { "ModelMergeLTXV": ModelMergeLTXV, "ModelMergeCosmos7B": ModelMergeCosmos7B, "ModelMergeCosmos14B": ModelMergeCosmos14B, + "ModelMergeWAN2_1": ModelMergeWAN2_1, } From eade1551bbd8678a7883d7061de73264cc279abf Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Mon, 24 Mar 2025 07:14:32 -0400 Subject: [PATCH 71/77] Add Hunyuan3D to readme. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a807ea9d6..a99aca0e7 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,8 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith - [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/) - [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) - [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/) +- 3D Models + - [Hunyuan3D 2.0](https://docs.comfy.org/tutorials/3d/hunyuan3D-2) - [Stable Audio](https://comfyanonymous.github.io/ComfyUI_examples/audio/) - Asynchronous Queue system - Many optimizations: Only re-executes the parts of the workflow that changes between executions. From 8edc1f44c1312d58afb6b0d817181079d39681e7 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Tue, 25 Mar 2025 05:23:49 -0400 Subject: [PATCH 72/77] Support more float8 types. --- comfy/model_management.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/comfy/model_management.py b/comfy/model_management.py index 2a9b022be..f1ecfc20e 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -46,6 +46,32 @@ cpu_state = CPUState.GPU total_vram = 0 +def get_supported_float8_types(): + float8_types = [] + try: + float8_types.append(torch.float8_e4m3fn) + except: + pass + try: + float8_types.append(torch.float8_e4m3fnuz) + except: + pass + try: + float8_types.append(torch.float8_e5m2) + except: + pass + try: + float8_types.append(torch.float8_e5m2fnuz) + except: + pass + try: + float8_types.append(torch.float8_e8m0fnu) + except: + pass + return float8_types + +FLOAT8_TYPES = get_supported_float8_types() + xpu_available = False torch_version = "" try: @@ -701,11 +727,8 @@ def unet_dtype(device=None, model_params=0, supported_dtypes=[torch.float16, tor return torch.float8_e5m2 fp8_dtype = None - try: - if weight_dtype in [torch.float8_e4m3fn, torch.float8_e5m2]: - fp8_dtype = weight_dtype - except: - pass + if weight_dtype in FLOAT8_TYPES: + fp8_dtype = weight_dtype if fp8_dtype is not None: if supports_fp8_compute(device): #if fp8 compute is supported the casting is most likely not expensive From 84fdaf7b0ef4d030723bc3b350282dc6c92743f6 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 26 Mar 2025 05:08:49 -0400 Subject: [PATCH 73/77] Add CFGZeroStar node. Works on all models that use a negative prompt but is meant for rectified flow models. --- comfy_extras/nodes_cfg.py | 45 +++++++++++++++++++++++++++++++++++++++ nodes.py | 1 + 2 files changed, 46 insertions(+) create mode 100644 comfy_extras/nodes_cfg.py diff --git a/comfy_extras/nodes_cfg.py b/comfy_extras/nodes_cfg.py new file mode 100644 index 000000000..1fb686644 --- /dev/null +++ b/comfy_extras/nodes_cfg.py @@ -0,0 +1,45 @@ +import torch + +# https://github.com/WeichenFan/CFG-Zero-star +def optimized_scale(positive, negative): + positive_flat = positive.reshape(positive.shape[0], -1) + negative_flat = negative.reshape(negative.shape[0], -1) + + # Calculate dot production + dot_product = torch.sum(positive_flat * negative_flat, dim=1, keepdim=True) + + # Squared norm of uncondition + squared_norm = torch.sum(negative_flat ** 2, dim=1, keepdim=True) + 1e-8 + + # st_star = v_cond^T * v_uncond / ||v_uncond||^2 + st_star = dot_product / squared_norm + + return st_star.reshape([positive.shape[0]] + [1] * (positive.ndim - 1)) + +class CFGZeroStar: + @classmethod + def INPUT_TYPES(s): + return {"required": {"model": ("MODEL",), + }} + RETURN_TYPES = ("MODEL",) + RETURN_NAMES = ("patched_model",) + FUNCTION = "patch" + CATEGORY = "advanced/guidance" + + def patch(self, model): + m = model.clone() + def cfg_zero_star(args): + guidance_scale = args['cond_scale'] + x = args['input'] + cond_p = args['cond_denoised'] + uncond_p = args['uncond_denoised'] + out = args["denoised"] + alpha = optimized_scale(x - cond_p, x - uncond_p) + + return out + uncond_p * (alpha - 1.0) + guidance_scale * uncond_p * (1.0 - alpha) + m.set_model_sampler_post_cfg_function(cfg_zero_star) + return (m, ) + +NODE_CLASS_MAPPINGS = { + "CFGZeroStar": CFGZeroStar +} diff --git a/nodes.py b/nodes.py index 27ef743b3..272c2a25e 100644 --- a/nodes.py +++ b/nodes.py @@ -2267,6 +2267,7 @@ def init_builtin_extra_nodes(): "nodes_lotus.py", "nodes_hunyuan3d.py", "nodes_primitive.py", + "nodes_cfg.py", ] import_failed = [] From 3661c833bcc41b788a7c9f0e7bc48524f8ee5f82 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 26 Mar 2025 19:54:54 -0400 Subject: [PATCH 74/77] Support the WAN 2.1 fun control models. Use the new WanFunControlToVideo node. --- comfy/model_base.py | 17 ++++++++----- comfy/supported_models.py | 14 ++++++++++- comfy_extras/nodes_wan.py | 51 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index eec70d5de..315b5d1e3 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -992,7 +992,8 @@ class WAN21(BaseModel): def concat_cond(self, **kwargs): noise = kwargs.get("noise", None) - if self.diffusion_model.patch_embedding.weight.shape[1] == noise.shape[1]: + extra_channels = self.diffusion_model.patch_embedding.weight.shape[1] - noise.shape[1] + if extra_channels == 0: return None image = kwargs.get("concat_latent_image", None) @@ -1000,12 +1001,16 @@ class WAN21(BaseModel): if image is None: image = torch.zeros_like(noise) + shape_image = list(noise.shape) + shape_image[1] = extra_channels + image = torch.zeros(shape_image, dtype=noise.dtype, layout=noise.layout, device=noise.device) + else: + image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center") + for i in range(0, image.shape[1], 16): + image[:, i: i + 16] = self.process_latent_in(image[:, i: i + 16]) + image = utils.resize_to_batch_size(image, noise.shape[0]) - image = utils.common_upscale(image.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center") - image = self.process_latent_in(image) - image = utils.resize_to_batch_size(image, noise.shape[0]) - - if not self.image_to_video: + if not self.image_to_video or extra_channels == image.shape[1]: return image mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None)) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index fad00d35b..2a6a61560 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -969,12 +969,24 @@ class WAN21_I2V(WAN21_T2V): unet_config = { "image_model": "wan2.1", "model_type": "i2v", + "in_dim": 36, } def get_model(self, state_dict, prefix="", device=None): out = model_base.WAN21(self, image_to_video=True, device=device) return out +class WAN21_FunControl2V(WAN21_T2V): + unet_config = { + "image_model": "wan2.1", + "model_type": "i2v", + "in_dim": 48, + } + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.WAN21(self, image_to_video=False, device=device) + return out + class Hunyuan3Dv2(supported_models_base.BASE): unet_config = { "image_model": "hunyuan3d2", @@ -1013,6 +1025,6 @@ class Hunyuan3Dv2mini(Hunyuan3Dv2): latent_format = latent_formats.Hunyuan3Dv2mini -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, Hunyuan3Dv2mini, Hunyuan3Dv2] +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, Lumina2, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, Hunyuan3Dv2mini, Hunyuan3Dv2] models += [SVD_img2vid] diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index dc30eb546..428874bcc 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -3,6 +3,7 @@ import node_helpers import torch import comfy.model_management import comfy.utils +import comfy.latent_formats class WanImageToVideo: @@ -49,6 +50,56 @@ class WanImageToVideo: return (positive, negative, out_latent) +class WanFunControlToVideo: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "vae": ("VAE", ), + "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }, + "optional": {"clip_vision_output": ("CLIP_VISION_OUTPUT", ), + "start_image": ("IMAGE", ), + "control_video": ("IMAGE", ), + }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, positive, negative, vae, width, height, length, batch_size, start_image=None, clip_vision_output=None, control_video=None): + latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + concat_latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + concat_latent = comfy.latent_formats.Wan21().process_out(concat_latent) + concat_latent = concat_latent.repeat(1, 2, 1, 1, 1) + + if start_image is not None: + start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + concat_latent_image = vae.encode(start_image[:, :, :, :3]) + concat_latent[:,16:,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]] + + if control_video is not None: + control_video = comfy.utils.common_upscale(control_video[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + concat_latent_image = vae.encode(control_video[:, :, :, :3]) + concat_latent[:,:16,:concat_latent_image.shape[2]] = concat_latent_image[:,:,:concat_latent.shape[2]] + + positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent}) + negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent}) + + if clip_vision_output is not None: + positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output}) + negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output}) + + out_latent = {} + out_latent["samples"] = latent + return (positive, negative, out_latent) + NODE_CLASS_MAPPINGS = { "WanImageToVideo": WanImageToVideo, + "WanFunControlToVideo": WanFunControlToVideo, } From 0a1f8869c9998bbfcfeb2e97aa96a6d3e0a2b5df Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Thu, 27 Mar 2025 11:13:27 -0400 Subject: [PATCH 75/77] Add WanFunInpaintToVideo node for the Wan fun inpaint models. --- comfy/model_base.py | 7 +++-- comfy_extras/nodes_wan.py | 54 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/comfy/model_base.py b/comfy/model_base.py index 315b5d1e3..8f588e2bf 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1017,11 +1017,14 @@ class WAN21(BaseModel): if mask is None: mask = torch.zeros_like(noise)[:, :4] else: - mask = 1.0 - torch.mean(mask, dim=1, keepdim=True) + if mask.shape[1] != 4: + mask = torch.mean(mask, dim=1, keepdim=True) + mask = 1.0 - mask mask = utils.common_upscale(mask.to(device), noise.shape[-1], noise.shape[-2], "bilinear", "center") if mask.shape[-3] < noise.shape[-3]: mask = torch.nn.functional.pad(mask, (0, 0, 0, 0, 0, noise.shape[-3] - mask.shape[-3]), mode='constant', value=0) - mask = mask.repeat(1, 4, 1, 1, 1) + if mask.shape[1] == 1: + mask = mask.repeat(1, 4, 1, 1, 1) mask = utils.resize_to_batch_size(mask, noise.shape[0]) return torch.cat((mask, image), dim=1) diff --git a/comfy_extras/nodes_wan.py b/comfy_extras/nodes_wan.py index 428874bcc..2d0f31ac8 100644 --- a/comfy_extras/nodes_wan.py +++ b/comfy_extras/nodes_wan.py @@ -99,7 +99,61 @@ class WanFunControlToVideo: out_latent["samples"] = latent return (positive, negative, out_latent) +class WanFunInpaintToVideo: + @classmethod + def INPUT_TYPES(s): + return {"required": {"positive": ("CONDITIONING", ), + "negative": ("CONDITIONING", ), + "vae": ("VAE", ), + "width": ("INT", {"default": 832, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "height": ("INT", {"default": 480, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 16}), + "length": ("INT", {"default": 81, "min": 1, "max": nodes.MAX_RESOLUTION, "step": 4}), + "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}), + }, + "optional": {"clip_vision_output": ("CLIP_VISION_OUTPUT", ), + "start_image": ("IMAGE", ), + "end_image": ("IMAGE", ), + }} + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT") + RETURN_NAMES = ("positive", "negative", "latent") + FUNCTION = "encode" + + CATEGORY = "conditioning/video_models" + + def encode(self, positive, negative, vae, width, height, length, batch_size, start_image=None, end_image=None, clip_vision_output=None): + latent = torch.zeros([batch_size, 16, ((length - 1) // 4) + 1, height // 8, width // 8], device=comfy.model_management.intermediate_device()) + if start_image is not None: + start_image = comfy.utils.common_upscale(start_image[:length].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + if end_image is not None: + end_image = comfy.utils.common_upscale(end_image[-length:].movedim(-1, 1), width, height, "bilinear", "center").movedim(1, -1) + + image = torch.ones((length, height, width, 3)) * 0.5 + mask = torch.ones((1, 1, latent.shape[2] * 4, latent.shape[-2], latent.shape[-1])) + + if start_image is not None: + image[:start_image.shape[0]] = start_image + mask[:, :, :start_image.shape[0] + 3] = 0.0 + + if end_image is not None: + image[-end_image.shape[0]:] = end_image + mask[:, :, -end_image.shape[0]:] = 0.0 + + concat_latent_image = vae.encode(image[:, :, :, :3]) + mask = mask.view(1, mask.shape[2] // 4, 4, mask.shape[3], mask.shape[4]).transpose(1, 2) + positive = node_helpers.conditioning_set_values(positive, {"concat_latent_image": concat_latent_image, "concat_mask": mask}) + negative = node_helpers.conditioning_set_values(negative, {"concat_latent_image": concat_latent_image, "concat_mask": mask}) + + if clip_vision_output is not None: + positive = node_helpers.conditioning_set_values(positive, {"clip_vision_output": clip_vision_output}) + negative = node_helpers.conditioning_set_values(negative, {"clip_vision_output": clip_vision_output}) + + out_latent = {} + out_latent["samples"] = latent + return (positive, negative, out_latent) + NODE_CLASS_MAPPINGS = { "WanImageToVideo": WanImageToVideo, "WanFunControlToVideo": WanFunControlToVideo, + "WanFunInpaintToVideo": WanFunInpaintToVideo, } From a40fcfc2d5392a5014cd87588035ebce194cb015 Mon Sep 17 00:00:00 2001 From: Chenlei Hu Date: Fri, 28 Mar 2025 02:27:01 -0400 Subject: [PATCH 76/77] Update frontend to 1.14.6 (#7416) Cherry-pick the fix: https://github.com/Comfy-Org/ComfyUI_frontend/pull/3252 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c78d3c228..806fbc751 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -comfyui-frontend-package==1.14.5 +comfyui-frontend-package==1.14.6 torch torchsde torchvision From 2d17d8910c7d34383feaf1aaac8d08571fe42077 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 28 Mar 2025 08:40:25 -0400 Subject: [PATCH 77/77] Don't error if wan concat image has extra channels. --- comfy/model_base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/comfy/model_base.py b/comfy/model_base.py index 8f588e2bf..f55cbe183 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -1013,6 +1013,9 @@ class WAN21(BaseModel): if not self.image_to_video or extra_channels == image.shape[1]: return image + if image.shape[1] > (extra_channels - 4): + image = image[:, :(extra_channels - 4)] + mask = kwargs.get("concat_mask", kwargs.get("denoise_mask", None)) if mask is None: mask = torch.zeros_like(noise)[:, :4]