diff --git a/comfy/text_encoders/qwen3vl.py b/comfy/text_encoders/qwen3vl.py index 59c9aae6d..2082c42e7 100644 --- a/comfy/text_encoders/qwen3vl.py +++ b/comfy/text_encoders/qwen3vl.py @@ -167,7 +167,7 @@ class Qwen3VLTokenizer(sd1_clip.SD1Tokenizer): embed_count = 0 for r in tokens[key_name]: for i in range(len(r)): - if r[i][0] == 151655: # <|image_pad|> + if isinstance(r[i][0], (int, float)) and r[i][0] == 151655: # <|image_pad|> if len(images) > embed_count: r[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"},) + r[i][1:] embed_count += 1