Remove prematurely added image embedding code from the Ideogram 4 text encoder

2026-06-24 00:39:30 +08:00 · 2026-06-15 09:35:49 +02:00 · 2026-06-15 09:35:49 +02:00 · ff05eb9a78
commit ff05eb9a78
parent 213c7d8914
1 changed files with 2 additions and 17 deletions
--- a/comfy/text_encoders/ideogram4.py
+++ b/comfy/text_encoders/ideogram4.py
@ -32,14 +32,11 @@ class Ideogram4Tokenizer(sd1_clip.SD1Tokenizer):
                         name="qwen3vl_8b", tokenizer=Qwen3VLTokenizer)
        self.llama_template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
        self.llama_template_images = "<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
-    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], prevent_empty_text=False, **kwargs):
+    def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, prevent_empty_text=False, **kwargs):
        skip_template = False
        if text.startswith('<|im_start|>'):
            skip_template = True
        if text.startswith('<|start_header_id|>'):
            skip_template = True
        if prevent_empty_text and text == '':
            text = ' '
@ -47,23 +44,11 @@ class Ideogram4Tokenizer(sd1_clip.SD1Tokenizer):
            llama_text = text
        else:
            if llama_template is None:
-                if len(images) > 0:
+                llama_text = self.llama_template.format(text)
                    llama_text = self.llama_template_images.format(text)
                else:
                    llama_text = self.llama_template.format(text)
            else:
                llama_text = llama_template.format(text)
        tokens = super().tokenize_with_weights(llama_text, return_word_ids=return_word_ids, disable_weights=True, **kwargs)
        key_name = next(iter(tokens))
        embed_count = 0
        qwen_tokens = tokens[key_name]
        for r in qwen_tokens:
            for i in range(len(r)):
                if r[i][0] == 151655:
                    if len(images) > embed_count:
                        r[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"},) + r[i][1:]
                        embed_count += 1
        return tokens