Reimplement dynamic prompts in the CLIPTextEncode node.

To achieve repeatability, a random seed input is added. By this method, the original prompt text remains intact when serializing and deserializing from a PNG or history. But the random seed ensures the same choices will be made when re-translating the "choices" in the original prompt text, thus ensuring repeatability. Also, choices can now be nested.
2026-07-14 10:27:17 +08:00 · 2023-07-30 15:40:23 +10:00 · 2023-07-30 15:40:23 +10:00 · d91544b83c
commit d91544b83c
parent 2bfe6886c8
3 changed files with 285 additions and 3 deletions
--- a/comfy/parse.py
+++ b/comfy/parse.py
@ -0,0 +1,161 @@
+
+import re
+
+
+class ParseError(Exception):
+	def __init__(self, input, message):
+		self.input = input.clone() # clone the parse cursor at the point of the error
+		self.message = message
+	
+	def __str__(self):
+		return f'{self.message} {self.input.loc()}'
+
+
+class Cursor:
+	def __init__(self, text, skip_space=False, consume=True, space=r'\s+'):
+		self.text = text
+		self.pos = 0		# current text position
+		
+		self.start = 0		# last match start position before whitespace skipping
+		self.skip = 0		# last match start position after whitespace skipping
+		self.end = 0		# last match end position
+		
+		self.skip_space = skip_space
+		self.consume = consume
+		self.space = space
+	
+	def loc(self):
+		# describe the cursor position in a human-readable form, suitable for error messages
+		
+		pos = self.pos
+		text = self.text
+		endline = re.compile(r'\n|$')
+		
+		# locate the line in which the current position is located
+		line_start = 0
+		line_id = 0
+		while True:
+			# determine line end position
+			match = endline.search(text, pos=line_start)
+			more_lines = match.group() == '\n'
+			line_end = match.start()
+			
+			# we add 1 to include the newline in the positions covered (if present)
+			# <<< at the end of the string, with no newline, it still kinda works okay I think
+			if line_start <= pos < (line_end + 1):
+				# pos is within the current line
+				break
+			
+			if not more_lines:
+				# pos is, somehow, somewhere past the end of the string
+				# <<< for now, we'll just treat it as if pos was in the final line
+				break
+			
+			line_start = line_end + 1 # skip newline
+			line_id += 1
+		
+		line_size = line_end - line_start
+		
+		line_number = line_id + 1
+		# line_offset is so ambiguous - is it offset *of* the line or offset of the cursor *within* the line?  in this case, it's the latter
+		line_offset = pos - line_start
+		line_text = text[line_start:line_end] # excludes newline
+		caret_spacing = re.sub(r'[^\t]', ' ', line_text[:line_offset])
+		
+		return f'at line {line_number}, offset {line_offset}, line string {repr(line_text)}\n{line_text}\n{caret_spacing}^\n'
+	
+	def clone(self):
+		# python's immutable strings should mean the actual string data for text is not copied
+		clone = Cursor(text)
+		# pos is the main purpose of the clone
+		clone.pos = self.pos
+		# this other stuff, we're just cloning for completeness
+		clone.start = self.start
+		clone.skip = self.skip
+		clone.end = self.end
+		return clone
+	
+	def string_match(self, string):
+		'''
+		Check for an exact match between the provided string and the input.
+		Note that it's a string, not a regex.  Every character is literal.
+		And it returns a bool, not a match object.
+		'''
+		pos = self.pos
+		self.start = pos
+		self.skip = pos
+		self.end = pos
+		size = len(string)
+		if self.text[self.pos:self.pos + size] == string:
+			pos += size
+			self.pos = pos
+			self.end = pos
+			return True
+		else:
+			return False
+	
+	def match(self, regex, skip_space=None, consume=None, space=None):
+		'''
+		check if a regex matches at the cursor position
+		given a match, update the cursor to consume the matched text (by default)
+		Typical usage:
+			if input.match(r'(\d+)'):
+				# handle numbers
+				value = int(input.m.group(1))
+				# ...
+			elif input.match(r'"'):
+				# handle double-quoted strings
+				# ...
+			elif input.match(r'for'):
+				# "for" loop
+				# ...
+			elif input.match(r'\s*$'):
+				# end of input
+				break
+			else:
+				raise 
+		'''
+		if skip_space == None:
+			skip_space = self.skip_space
+		if consume == None:
+			consume = self.consume
+		if space == None:
+			space = self.space
+		
+		pos = self.pos
+		self.start = pos
+		self.skip = pos
+		self.end = pos
+		
+		if skip_space:
+			space_compile_flags = re.DOTALL
+			space = re.compile(space, space_compile_flags) # <<< todo: compile once and reuse
+			space_match = space.match(self.text, pos=pos)
+			if space_match:
+				pos = space_match.end()
+				self.skip = pos
+		
+		compile_flags = re.DOTALL
+		pattern = re.compile(regex, compile_flags)
+		match = pattern.match(self.text, pos=pos)
+		if match:
+			pos = match.end()
+			self.end = pos
+			if consume:
+				self.pos = pos
+		
+		return match
+	
+	def match_exact(self, regex, skip_space=False, consume=True):
+		# check if a regex matches at the cursor position
+		# consume the matched text (by default)
+		# skip initial whitespace (by default)
+		return self.match(regex, skip_space=skip_space, consume=consume)
+	
+	def check(self, regex, skip_space=None, consume=False):
+		# check if a regex matches at the cursor position
+		# do not consume the matched text (by default)
+		# skip initial whitespace (by default)
+		# another suitable name for this would have been "lookahead"
+		return self.match(regex, skip_space=skip_space, consume=consume)
+
--- a/comfy/parse_choice.py
+++ b/comfy/parse_choice.py
@ -0,0 +1,114 @@
+
+
+import random
+
+import comfy.parse
+from comfy.parse import ParseError
+
+class LogicError(Exception):
+	# something that shouldn't be possible occurred in the code
+	# not the user's fault
+	pass
+
+
+def translate_choices(text, seed=0):
+	'''
+	Parses the text, translating "{A|B|C}" choices into a single choice.
+	An option is chosen randomly from the available options.
+	For example: "a {green|red|blue} ball on a {wooden|metal} bench" might expand to "a red ball on a wooden bench".
+	Nesting choices is supported, so 
+	"a woman wearing a {{lavish|garish|expensive|stylish|} {red|brown|blue|} dress|{sexy|realistic|} {police|nurse|maid} uniform|{black leather|wooly|thick} coat}"
+	could expand to
+	"a woman wearing a stylish brown dress".
+	All random choices are governed by the supplied random seed value, ensuring repeatability.
+	You can use a single PrimitiveNode with an INT value, and connect that to the CLIPTextEncode node and the KSampler node in a typical Stable Diffusion 1.5 workflow, for example.
+	
+	Notes:
+		* this function must correctly support valid inputs
+		* for invalid inputs:
+			* raise an error if that's supported cleanly enough in the system
+			* otherwise, return the original input as the output, and issue a warning to stdout or stderr if that's acceptable in the system
+		* must preserve escaped metacharacters for the prompt weight parsing
+	'''
+	
+	# the user will be escaping for both this processing (using curly braces) and the weight processing (using round parentheses)
+	# from their perspective, they will need to escape literal data like this to cover both sets of processing:
+	# { -> \{
+	# } -> \}
+	# | -> \|
+	# \ -> \\
+	# ( -> \(
+	# ) -> \)
+	
+	def parse_choice(input):
+		options = []
+		while True:
+			options.append(parse_text_with_choices(input))
+			if 0: pass
+			elif m := input.match(r'\|'):
+				# loop around for another choice
+				pass
+			elif m := input.match(r'\}'):
+				break
+			else:
+				raise ParseError(input, f"Expected '|' or '}}' after choice text")
+		
+		# choose one of the options
+		text = rng.choice(options)
+		return text
+	
+	def parse_text_with_choices(input):
+		out = []
+		
+		while True:
+			if 0: pass
+			elif m := input.match(r'[\\\{]'): # a single metacharacter, \ or {
+				ch = m.group(0)
+				if 0: pass
+				elif ch == '\\':
+					if not (m := input.match(r'.')):
+						raise ParseError(input, f'Unexpected end of input after backslash')
+					ch = m.group(0)
+					if 0: pass
+					elif ch in {'\\', '(', ')'}:
+						# these are metacharacters in the weight parsing phase, so we have to handle them specially
+						# maintain the escaping for the upcoming weight parsing phase
+						out.append(f'\\{ch}')
+					elif ch in {'{', '}', '|'}:
+						# escaping a metacharacter to make it literal
+						out.append(ch) # output literal character
+					else:
+						# other characters shouldn't require escaping
+						# treat it as a normal escape regardless
+						# policy subject to change
+						out.append(ch)
+				elif ch == '{':
+					# choice
+					chosen_text = parse_choice(input)
+					out.append(chosen_text)
+				else:
+					raise LogicError(input, f"Expected metacharacter '\\' or '{{' ")
+			elif m := input.match(r'[^\\\{\}\|]+'): # 1 or more non-metacharacters
+				out.append(m.group(0))
+			else:
+				# didn't match \, { or non-metacharacters
+				# must be either |, } or end of input
+				break
+		
+		return ''.join(out)
+	
+	# init our local random number generator
+	rng = random.Random(seed)
+	
+	try:
+		input = comfy.parse.Cursor(text)
+		out = parse_text_with_choices(input)
+		if not input.match(r'$'):
+			raise ParseError(input, f'Failed to parse up to the end of the prompt text')
+		
+		return out
+	except (ParseError, LogicError) as e:
+		# alternative: re-throw the error
+		stdout.write(f'Error parsing prompt: {e}');
+		return text
+
--- a/nodes.py
+++ b/nodes.py
@ -14,6 +14,8 @@ from PIL.PngImagePlugin import PngInfo
 import numpy as np
 import safetensors.torch

+from comfy.parse_choice import translate_choices
+
 sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), "comfy"))


@ -44,14 +46,19 @@ MAX_RESOLUTION=8192
 class CLIPTextEncode:
    @classmethod
    def INPUT_TYPES(s):
-        return {"required": {"text": ("STRING", {"multiline": True}), "clip": ("CLIP", )}}
+        return {"required": {
+            "text": ("STRING", {"multiline": True}), 
+            "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
+            "clip": ("CLIP", )
+        }}
    RETURN_TYPES = ("CONDITIONING",)
    FUNCTION = "encode"

    CATEGORY = "conditioning"

-    def encode(self, clip, text):
-        tokens = clip.tokenize(text)
+    def encode(self, clip, seed, text):
+        translated_prompt_text = translate_choices(text, seed)
+        tokens = clip.tokenize(translated_prompt_text)
        cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
        return ([[cond, {"pooled_output": pooled}]], )