diff --git a/comfy_extras/nodes_clip_sdxl.py b/comfy_extras/nodes_clip_sdxl.py index dcf8859fa..55d372e16 100644 --- a/comfy_extras/nodes_clip_sdxl.py +++ b/comfy_extras/nodes_clip_sdxl.py @@ -1,22 +1,26 @@ import torch from nodes import MAX_RESOLUTION +from comfy.parse_choice import translate_choices class CLIPTextEncodeSDXLRefiner: @classmethod def INPUT_TYPES(s): return {"required": { + "clip": ("CLIP", ), "ascore": ("FLOAT", {"default": 6.0, "min": 0.0, "max": 1000.0, "step": 0.01}), "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), - "text": ("STRING", {"multiline": True}), "clip": ("CLIP", ), + "text": ("STRING", {"multiline": True}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "encode" CATEGORY = "advanced/conditioning" - def encode(self, clip, ascore, width, height, text): - tokens = clip.tokenize(text) + def encode(self, clip, ascore, width, height, text, seed): + translated_prompt_text = translate_choices(text, seed) + tokens = clip.tokenize(translated_prompt_text) cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) return ([[cond, {"pooled_output": pooled, "aesthetic_score": ascore, "width": width,"height": height}]], ) @@ -24,23 +28,27 @@ class CLIPTextEncodeSDXL: @classmethod def INPUT_TYPES(s): return {"required": { + "clip": ("CLIP", ), "width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), "height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), "crop_w": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), "crop_h": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION}), "target_width": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), "target_height": ("INT", {"default": 1024.0, "min": 0, "max": MAX_RESOLUTION}), - "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), "clip": ("CLIP", ), - "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), "clip": ("CLIP", ), + "text_g": ("STRING", {"multiline": True, "default": "CLIP_G"}), + "text_l": ("STRING", {"multiline": True, "default": "CLIP_L"}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), }} RETURN_TYPES = ("CONDITIONING",) FUNCTION = "encode" CATEGORY = "advanced/conditioning" - def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l): - tokens = clip.tokenize(text_g) - tokens["l"] = clip.tokenize(text_l)["l"] + def encode(self, clip, width, height, crop_w, crop_h, target_width, target_height, text_g, text_l, seed): + translated_g = translate_choices(text_g, seed) + translated_l = translate_choices(text_l, seed) + tokens = clip.tokenize(translated_g) + tokens["l"] = clip.tokenize(translated_l)["l"] if len(tokens["l"]) != len(tokens["g"]): empty = clip.tokenize("") while len(tokens["l"]) < len(tokens["g"]): diff --git a/nodes.py b/nodes.py index d9df26d6f..7292a4ca1 100644 --- a/nodes.py +++ b/nodes.py @@ -828,6 +828,7 @@ class GLIGENTextBoxApply: "clip": ("CLIP", ), "gligen_textbox_model": ("GLIGEN", ), "text": ("STRING", {"multiline": True}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), "width": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 64, "min": 8, "max": MAX_RESOLUTION, "step": 8}), "x": ("INT", {"default": 0, "min": 0, "max": MAX_RESOLUTION, "step": 8}), @@ -838,9 +839,11 @@ class GLIGENTextBoxApply: CATEGORY = "conditioning/gligen" - def append(self, conditioning_to, clip, gligen_textbox_model, text, width, height, x, y): + def append(self, conditioning_to, clip, gligen_textbox_model, text, seed, width, height, x, y): c = [] - cond, cond_pooled = clip.encode_from_tokens(clip.tokenize(text), return_pooled=True) + translated_prompt_text = translate_choices(text, seed) + tokens = clip.tokenize(translated_prompt_text) + _, cond_pooled = clip.encode_from_tokens(tokens, return_pooled=True) for t in conditioning_to: n = [t[0], t[1].copy()] position_params = [(cond_pooled, height // 8, width // 8, y // 8, x // 8)] @@ -1469,6 +1472,24 @@ class ImagePadForOutpaint: return (new_image, mask) +class DynamicPrompt: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "text": ("STRING", {"multiline": True, "placeholder": "A prompt which can contain random text. Eg. This is a {good|great|spectacular} prompt."}), + "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), + } + } + + RETURN_TYPES = ("STRING",) + FUNCTION = "dynamic_prompt" + CATEGORY = "conditioning" + + def dynamic_prompt(self, text, seed): + translated_prompt_text = translate_choices(text, seed) + return (translated_prompt_text,) + NODE_CLASS_MAPPINGS = { "KSampler": KSampler, @@ -1530,6 +1551,8 @@ NODE_CLASS_MAPPINGS = { "ConditioningZeroOut": ConditioningZeroOut, "ConditioningSetTimestepRange": ConditioningSetTimestepRange, + + "DynamicPrompt": DynamicPrompt, } NODE_DISPLAY_NAME_MAPPINGS = { @@ -1583,6 +1606,8 @@ NODE_DISPLAY_NAME_MAPPINGS = { "ImageUpscaleWithModel": "Upscale Image (using Model)", "ImageInvert": "Invert Image", "ImagePadForOutpaint": "Pad Image for Outpainting", + # Prompts + "DynamicPrompt": "Dynamic Prompt", # _for_testing "VAEDecodeTiled": "VAE Decode (Tiled)", "VAEEncodeTiled": "VAE Encode (Tiled)",