mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2025-12-18 02:23:06 +08:00
Don't add template to qwen2.5vl when template is in prompt. (#10043)
Make the hunyuan image refiner template_end 36.
This commit is contained in:
parent
cd66d72b46
commit
1e098d6132
@ -63,7 +63,13 @@ class HunyuanImageTEModel(QwenImageTEModel):
|
|||||||
self.byt5_small = None
|
self.byt5_small = None
|
||||||
|
|
||||||
def encode_token_weights(self, token_weight_pairs):
|
def encode_token_weights(self, token_weight_pairs):
|
||||||
cond, p, extra = super().encode_token_weights(token_weight_pairs)
|
tok_pairs = token_weight_pairs["qwen25_7b"][0]
|
||||||
|
template_end = -1
|
||||||
|
if tok_pairs[0][0] == 27:
|
||||||
|
if len(tok_pairs) > 36: # refiner prompt uses a fixed 36 template_end
|
||||||
|
template_end = 36
|
||||||
|
|
||||||
|
cond, p, extra = super().encode_token_weights(token_weight_pairs, template_end=template_end)
|
||||||
if self.byt5_small is not None and "byt5" in token_weight_pairs:
|
if self.byt5_small is not None and "byt5" in token_weight_pairs:
|
||||||
out = self.byt5_small.encode_token_weights(token_weight_pairs["byt5"])
|
out = self.byt5_small.encode_token_weights(token_weight_pairs["byt5"])
|
||||||
extra["conditioning_byt5small"] = out[0]
|
extra["conditioning_byt5small"] = out[0]
|
||||||
|
|||||||
@ -18,6 +18,15 @@ class QwenImageTokenizer(sd1_clip.SD1Tokenizer):
|
|||||||
self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
|
self.llama_template_images = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>{}<|im_end|>\n<|im_start|>assistant\n"
|
||||||
|
|
||||||
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs):
|
def tokenize_with_weights(self, text, return_word_ids=False, llama_template=None, images=[], **kwargs):
|
||||||
|
skip_template = False
|
||||||
|
if text.startswith('<|im_start|>'):
|
||||||
|
skip_template = True
|
||||||
|
if text.startswith('<|start_header_id|>'):
|
||||||
|
skip_template = True
|
||||||
|
|
||||||
|
if skip_template:
|
||||||
|
llama_text = text
|
||||||
|
else:
|
||||||
if llama_template is None:
|
if llama_template is None:
|
||||||
if len(images) > 0:
|
if len(images) > 0:
|
||||||
llama_text = self.llama_template_images.format(text)
|
llama_text = self.llama_template_images.format(text)
|
||||||
@ -47,10 +56,11 @@ class QwenImageTEModel(sd1_clip.SD1ClipModel):
|
|||||||
def __init__(self, device="cpu", dtype=None, model_options={}):
|
def __init__(self, device="cpu", dtype=None, model_options={}):
|
||||||
super().__init__(device=device, dtype=dtype, name="qwen25_7b", clip_model=Qwen25_7BVLIModel, model_options=model_options)
|
super().__init__(device=device, dtype=dtype, name="qwen25_7b", clip_model=Qwen25_7BVLIModel, model_options=model_options)
|
||||||
|
|
||||||
def encode_token_weights(self, token_weight_pairs):
|
def encode_token_weights(self, token_weight_pairs, template_end=-1):
|
||||||
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
|
out, pooled, extra = super().encode_token_weights(token_weight_pairs)
|
||||||
tok_pairs = token_weight_pairs["qwen25_7b"][0]
|
tok_pairs = token_weight_pairs["qwen25_7b"][0]
|
||||||
count_im_start = 0
|
count_im_start = 0
|
||||||
|
if template_end == -1:
|
||||||
for i, v in enumerate(tok_pairs):
|
for i, v in enumerate(tok_pairs):
|
||||||
elem = v[0]
|
elem = v[0]
|
||||||
if not torch.is_tensor(elem):
|
if not torch.is_tensor(elem):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user