From 84cc9cb5287a6b0345b681174a8e85bd3ca41515 Mon Sep 17 00:00:00 2001
From: Chenlei Hu <hcl@comfy.org>
Date: Fri, 7 Mar 2025 19:02:13 -0500
Subject: [PATCH 1/4] Update frontend to 1.11.8 (#7119)

* Update frontend to 1.11.7

* Update requirements.txt
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4ad5f3b8a..e1316ccff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-comfyui-frontend-package==1.10.17
+comfyui-frontend-package==1.11.8
 torch
 torchsde
 torchvision

From c3d9cc4592310d22f414c93a7840b541f3a7b497 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Mar 2025 19:53:07 -0500
Subject: [PATCH 2/4] Print the frontend version in the log.

---
 app/frontend_management.py | 6 ++++++
 main.py                    | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/app/frontend_management.py b/app/frontend_management.py
index 9feb1e965..94293af1e 100644
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@@ -27,6 +27,12 @@ except ImportError:
     exit(-1)
 
 
+try:
+    frontend_version = tuple(map(int, comfyui_frontend_package.__version__.split(".")))
+except:
+    frontend_version = (0,)
+    pass
+
 REQUEST_TIMEOUT = 10  # seconds
 
 
diff --git a/main.py b/main.py
index f6510c90a..57fa397e6 100644
--- a/main.py
+++ b/main.py
@@ -139,6 +139,7 @@ from server import BinaryEventTypes
 import nodes
 import comfy.model_management
 import comfyui_version
+import app.frontend_management
 
 
 def cuda_malloc_warning():
@@ -295,6 +296,8 @@ def start_comfyui(asyncio_loop=None):
 if __name__ == "__main__":
     # Running directly, just start ComfyUI.
     logging.info("ComfyUI version: {}".format(comfyui_version.__version__))
+    logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, app.frontend_management.frontend_version))))
+
     event_loop, _, start_all_func = start_comfyui()
     try:
         event_loop.run_until_complete(start_all_func())

From be4e760648e0234f9202b9cbe7dcfb3bd307acb9 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Fri, 7 Mar 2025 19:56:11 -0500
Subject: [PATCH 3/4] Add an image_interleave option to the Hunyuan image to
 video encode node.

See the tooltip for what it does.
---
 comfy/text_encoders/hunyuan_video.py | 28 +++++++++++++++++-----------
 comfy_extras/nodes_hunyuan.py        |  5 +++--
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/comfy/text_encoders/hunyuan_video.py b/comfy/text_encoders/hunyuan_video.py
index 1d814aadd..dbb259e54 100644
--- a/comfy/text_encoders/hunyuan_video.py
+++ b/comfy/text_encoders/hunyuan_video.py
@@ -42,7 +42,7 @@ class HunyuanVideoTokenizer:
         self.llama_template = """<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: 1. The main content and theme of the video.2. The color, shape, size, texture, quantity, text, and spatial relationships of the objects.3. Actions, events, behaviors temporal relationships, physical movement changes of the objects.4. background environment, light, style and atmosphere.5. camera angles, movements, and transitions used in the video:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"""  # 95 tokens
         self.llama = LLAMA3Tokenizer(embedding_directory=embedding_directory, min_length=1)
 
-    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, **kwargs):
+    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, image_embeds=None, image_interleave=1, **kwargs):
         out = {}
         out["l"] = self.clip_l.tokenize_with_weights(text, return_word_ids)
 
@@ -56,7 +56,7 @@ class HunyuanVideoTokenizer:
             for i in range(len(r)):
                 if r[i][0] == 128257:
                     if image_embeds is not None and embed_count < image_embeds.shape[0]:
-                        r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image"},) + r[i][1:]
+                        r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image", "image_interleave": image_interleave},) + r[i][1:]
                         embed_count += 1
         out["llama"] = llama_text_tokens
         return out
@@ -92,10 +92,10 @@ class HunyuanVideoClipModel(torch.nn.Module):
         llama_out, llama_pooled, llama_extra_out = self.llama.encode_token_weights(token_weight_pairs_llama)
 
         template_end = 0
-        image_start = None
-        image_end = None
+        extra_template_end = 0
         extra_sizes = 0
         user_end = 9999999999999
+        images = []
 
         tok_pairs = token_weight_pairs_llama[0]
         for i, v in enumerate(tok_pairs):
@@ -112,22 +112,28 @@ class HunyuanVideoClipModel(torch.nn.Module):
                 else:
                     if elem.get("original_type") == "image":
                         elem_size = elem.get("data").shape[0]
-                        if image_start is None:
+                        if template_end > 0:
+                            if user_end == -1:
+                                extra_template_end += elem_size - 1
+                        else:
                             image_start = i + extra_sizes
                             image_end = i + elem_size + extra_sizes
-                        extra_sizes += elem_size - 1
+                            images.append((image_start, image_end, elem.get("image_interleave", 1)))
+                            extra_sizes += elem_size - 1
 
         if llama_out.shape[1] > (template_end + 2):
             if tok_pairs[template_end + 1][0] == 271:
                 template_end += 2
-        llama_output = llama_out[:, template_end + extra_sizes:user_end + extra_sizes]
-        llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end + extra_sizes:user_end + extra_sizes]
+        llama_output = llama_out[:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end]
+        llama_extra_out["attention_mask"] = llama_extra_out["attention_mask"][:, template_end + extra_sizes:user_end + extra_sizes + extra_template_end]
         if llama_extra_out["attention_mask"].sum() == torch.numel(llama_extra_out["attention_mask"]):
             llama_extra_out.pop("attention_mask")  # attention mask is useless if no masked elements
 
-        if image_start is not None:
-            image_output = llama_out[:, image_start: image_end]
-            llama_output = torch.cat([image_output[:, ::2], llama_output], dim=1)
+        if len(images) > 0:
+            out = []
+            for i in images:
+                out.append(llama_out[:, i[0]: i[1]: i[2]])
+            llama_output = torch.cat(out + [llama_output], dim=1)
 
         l_out, l_pooled = self.clip_l.encode_token_weights(token_weight_pairs_l)
         return llama_output, l_pooled, llama_extra_out
diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py
index 4f700bbe6..56aef9b01 100644
--- a/comfy_extras/nodes_hunyuan.py
+++ b/comfy_extras/nodes_hunyuan.py
@@ -57,14 +57,15 @@ class TextEncodeHunyuanVideo_ImageToVideo:
             "clip": ("CLIP", ),
             "clip_vision_output": ("CLIP_VISION_OUTPUT", ),
             "prompt": ("STRING", {"multiline": True, "dynamicPrompts": True}),
+            "image_interleave": ("INT", {"default": 2, "min": 1, "max": 512, "tooltip": "How much the image influences things vs the text prompt. Higher number means more influence from the text prompt."}),
             }}
     RETURN_TYPES = ("CONDITIONING",)
     FUNCTION = "encode"
 
     CATEGORY = "advanced/conditioning"
 
-    def encode(self, clip, clip_vision_output, prompt):
-        tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected)
+    def encode(self, clip, clip_vision_output, prompt, image_interleave):
+        tokens = clip.tokenize(prompt, llama_template=PROMPT_TEMPLATE_ENCODE_VIDEO_I2V, image_embeds=clip_vision_output.mm_projected, image_interleave=image_interleave)
         return (clip.encode_from_tokens_scheduled(tokens), )
 
 

From 29832b3b61591633d8f312f7df727c1bb8b4d9e4 Mon Sep 17 00:00:00 2001
From: comfyanonymous <comfyanonymous@protonmail.com>
Date: Sat, 8 Mar 2025 03:51:36 -0500
Subject: [PATCH 4/4] Warn if frontend package is older than the one in
 requirements.txt

---
 app/frontend_management.py | 10 ++++++++--
 main.py                    | 19 +++++++++++++++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/app/frontend_management.py b/app/frontend_management.py
index 94293af1e..308f71da6 100644
--- a/app/frontend_management.py
+++ b/app/frontend_management.py
@@ -18,12 +18,18 @@ from typing_extensions import NotRequired
 from comfy.cli_args import DEFAULT_VERSION_STRING
 
 
+def frontend_install_warning_message():
+    req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt'))
+    extra = ""
+    if sys.flags.no_user_site:
+        extra = "-s "
+    return f"Please install the updated requirements.txt file by running:\n{sys.executable} {extra}-m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem"
+
 try:
     import comfyui_frontend_package
 except ImportError:
     # TODO: Remove the check after roll out of 0.3.16
-    req_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt'))
-    logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. Please install the updated requirements.txt file by running:\n{sys.executable} -s -m pip install -r {req_path}\n\nThis error is happening because the ComfyUI frontend is no longer shipped as part of the main repo but as a pip package instead.\n\nIf you are on the portable package you can run: update\\update_comfyui.bat to solve this problem\n********** ERROR **********\n")
+    logging.error(f"\n\n********** ERROR ***********\n\ncomfyui-frontend-package is not installed. {frontend_install_warning_message()}\n********** ERROR **********\n")
     exit(-1)
 
 
diff --git a/main.py b/main.py
index 57fa397e6..6fa1cfb0f 100644
--- a/main.py
+++ b/main.py
@@ -293,14 +293,29 @@ def start_comfyui(asyncio_loop=None):
     return asyncio_loop, prompt_server, start_all
 
 
+def warn_frontend_version(frontend_version):
+    try:
+        required_frontend = (0,)
+        req_path = os.path.join(os.path.dirname(__file__), 'requirements.txt')
+        with open(req_path, 'r') as f:
+            required_frontend = tuple(map(int, f.readline().split('=')[-1].split('.')))
+        if frontend_version < required_frontend:
+            logging.warning("________________________________________________________________________\nWARNING WARNING WARNING WARNING WARNING\n\nInstalled frontend version {} is lower than the recommended version {}.\n\n{}\n________________________________________________________________________".format('.'.join(map(str, frontend_version)), '.'.join(map(str, required_frontend)), app.frontend_management.frontend_install_warning_message()))
+    except:
+        pass
+
+
 if __name__ == "__main__":
     # Running directly, just start ComfyUI.
     logging.info("ComfyUI version: {}".format(comfyui_version.__version__))
-    logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, app.frontend_management.frontend_version))))
+    frontend_version = app.frontend_management.frontend_version
+    logging.info("ComfyUI frontend version: {}".format('.'.join(map(str, frontend_version))))
 
     event_loop, _, start_all_func = start_comfyui()
     try:
-        event_loop.run_until_complete(start_all_func())
+        x = start_all_func()
+        warn_frontend_version(frontend_version)
+        event_loop.run_until_complete(x)
     except KeyboardInterrupt:
         logging.info("\nStopped server")