Convert indentation from tabs to spaces, to match the codebase.

This commit is contained in:
Barry Downes 2023-08-06 10:41:08 +10:00
parent ed253b31df
commit 9c505aa38c
3 changed files with 271 additions and 271 deletions

View File

@ -9,111 +9,111 @@ from comfy.parse import ParseError, ParseLogicError
from comfy.comments import strip_c_comments from comfy.comments import strip_c_comments
def translate_choices_with_c_comments(text, seed=None, strict=True, reescape=frozenset()): def translate_choices_with_c_comments(text, seed=None, strict=True, reescape=frozenset()):
text = strip_c_comments(text, strict=strict) text = strip_c_comments(text, strict=strict)
text = translate(text, seed=seed, strict=strict, reescape = reescape) text = translate(text, seed=seed, strict=strict, reescape = reescape)
return text return text
def get_random_seed(): def get_random_seed():
return int.from_bytes(os.urandom(8)) return int.from_bytes(os.urandom(8))
def translate(text, seed=None, strict=True, reescape=frozenset()): def translate(text, seed=None, strict=True, reescape=frozenset()):
''' '''
Parses the text, translating "{A|B|C}" choices into a single chosen option. Parses the text, translating "{A|B|C}" choices into a single chosen option.
An option is chosen randomly from the available options. An option is chosen randomly from the available options.
For example: "a {green|red|blue} ball on a {wooden|metal} bench" might expand to "a red ball on a wooden bench". For example: "a {green|red|blue} ball on a {wooden|metal} bench" might expand to "a red ball on a wooden bench".
Nesting choices is supported, so Nesting choices is supported, so
"a woman wearing a {{lavish|garish|expensive|stylish|} {red|brown|blue|} dress|{sexy|realistic|} {police|nurse|maid} uniform|{black leather|wooly|thick} coat}" "a woman wearing a {{lavish|garish|expensive|stylish|} {red|brown|blue|} dress|{sexy|realistic|} {police|nurse|maid} uniform|{black leather|wooly|thick} coat}"
could expand to could expand to
"a woman wearing a realistic police uniform". "a woman wearing a realistic police uniform".
All random choices are governed by the supplied random seed value, ensuring repeatability. All random choices are governed by the supplied random seed value, ensuring repeatability.
If strict is True, exceptions will be thrown if the input doesn't conform to expectations. If strict is True, exceptions will be thrown if the input doesn't conform to expectations.
reescape indicates the set of metacharacters that, if escaped with a backslash in the input, should be re-escaped in the output. reescape indicates the set of metacharacters that, if escaped with a backslash in the input, should be re-escaped in the output.
This is useful to avoid need for multi-escaping when incorporating this parser as a single phase in a multi-phase parsing operation. This is useful to avoid need for multi-escaping when incorporating this parser as a single phase in a multi-phase parsing operation.
Note that while the default is a frozenset, you can pass anything that works with the "in" operator, such as a string or a set. Note that while the default is a frozenset, you can pass anything that works with the "in" operator, such as a string or a set.
''' '''
def parse_choice(input): def parse_choice(input):
options = [] options = []
while True: while True:
options.append(parse_text_with_choices(input)) options.append(parse_text_with_choices(input))
if m := input.match(r'\|'): if m := input.match(r'\|'):
# loop around for another choice # loop around for another choice
pass pass
else: else:
# at this point, the input must be } # at this point, the input must be }
# although for incorrectly-formed input, it could be end of input too # although for incorrectly-formed input, it could be end of input too
# regardless, the correct action here is to break and return to the caller # regardless, the correct action here is to break and return to the caller
break break
# choose one of the options # choose one of the options
text = rng.choice(options) text = rng.choice(options)
return text return text
def parse_text_with_choices(input): def parse_text_with_choices(input):
out = [] out = []
while True: while True:
if 0: pass if 0: pass
elif m := input.match(r'\\'): elif m := input.match(r'\\'):
# \ = escape character # \ = escape character
if m := input.match(r'.'): if m := input.match(r'.'):
ch = m.group(0) ch = m.group(0)
if ch in reescape: if ch in reescape:
out.append('\\') out.append('\\')
out.append(ch) out.append(ch)
else: else:
if strict: if strict:
raise ParseError(input, f'Unexpected end of input after backslash') raise ParseError(input, f'Unexpected end of input after backslash')
elif m := input.match(r'\{'): elif m := input.match(r'\{'):
# { ... | ... } choice # { ... | ... } choice
openbrace = input.prior() openbrace = input.prior()
chosen_text = parse_choice(input) chosen_text = parse_choice(input)
if not input.match(r'\}'): if not input.match(r'\}'):
if strict: if strict:
raise ParseError(openbrace, f"Missing matching closing brace '}}' for earlier open brace '{{'") raise ParseError(openbrace, f"Missing matching closing brace '}}' for earlier open brace '{{'")
out.append(chosen_text) out.append(chosen_text)
elif m := input.match(r'[^\\\{\}\|]+'): elif m := input.match(r'[^\\\{\}\|]+'):
# 1 or more non-metacharacters # 1 or more non-metacharacters
out.append(m.group(0)) out.append(m.group(0))
else: else:
# didn't match \, {, / or non-metacharacters # didn't match \, {, / or non-metacharacters
# must be either |, } or end of input # must be either |, } or end of input
break break
return ''.join(out) return ''.join(out)
def parse_text_with_choices_outer(input): def parse_text_with_choices_outer(input):
# this function and the contained loop is required to support the non-strict parsing mode # this function and the contained loop is required to support the non-strict parsing mode
# it catches the case where we exit parse_text_with_choices upon encountering | or }, and don't find ourselves withing a calling instance of parse_choice # it catches the case where we exit parse_text_with_choices upon encountering | or }, and don't find ourselves withing a calling instance of parse_choice
out = [] out = []
while True: while True:
out.append(parse_text_with_choices(input)) out.append(parse_text_with_choices(input))
if 0:pass if 0:pass
elif input.match(r'$'): elif input.match(r'$'):
break break
elif input.match(r'\|'): elif input.match(r'\|'):
if strict: if strict:
raise ParseError(input.prior(), f"Encountered a choice delimiter '|' outside any choice block") raise ParseError(input.prior(), f"Encountered a choice delimiter '|' outside any choice block")
elif input.match(r'\}'): elif input.match(r'\}'):
if strict: if strict:
raise ParseError(input.prior(), f"Encountered a closing brace '}}' without a matching open brace") raise ParseError(input.prior(), f"Encountered a closing brace '}}' without a matching open brace")
else: else:
if strict: if strict:
raise ParseLogicError(input, f'Failed to parse up to the end of the prompt text') raise ParseLogicError(input, f'Failed to parse up to the end of the prompt text')
break break
return ''.join(out) return ''.join(out)
if seed == None: if seed == None:
seed = get_random_seed() seed = get_random_seed()
# init our local random number generator # init our local random number generator
rng = random.Random(seed) rng = random.Random(seed)
input = parse.Cursor(text) input = parse.Cursor(text)
out = parse_text_with_choices_outer(input) out = parse_text_with_choices_outer(input)
return out return out

View File

@ -2,11 +2,11 @@
import re import re
def strip_c_comments(text, strict=True): def strip_c_comments(text, strict=True):
# Processes the text and strips out any C-style block "/* ... */" or line "// ..." comments found. # Processes the text and strips out any C-style block "/* ... */" or line "// ..." comments found.
# from old dynamicPrompts.js: return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g,''); # from old dynamicPrompts.js: return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g,'');
return re.sub(r'/\*[.\n]*?(?:\*/|$)|//.*', '', text) return re.sub(r'/\*[.\n]*?(?:\*/|$)|//.*', '', text)
def strip_hash_comments(text, strict=True): def strip_hash_comments(text, strict=True):
# Processes the text and strips out any hash "# ... " comments found. # Processes the text and strips out any hash "# ... " comments found.
return re.sub(r'#.*', '', text) return re.sub(r'#.*', '', text)

View File

@ -3,172 +3,172 @@ import re
class ParseError(Exception): class ParseError(Exception):
def __init__(self, input, message): def __init__(self, input, message):
self.input = input.clone() # clone the parse cursor at the point of the error self.input = input.clone() # clone the parse cursor at the point of the error
self.message = message self.message = message
def __str__(self): def __str__(self):
return f'{self.message} {self.input.loc()}' return f'{self.message} {self.input.loc()}'
class ParseLogicError(ParseError): class ParseLogicError(ParseError):
# like a ParseError, in that it has an associated cursor position which will help in understanding the error # like a ParseError, in that it has an associated cursor position which will help in understanding the error
# but unlike a ParseError, because it wasn't the user's fault # but unlike a ParseError, because it wasn't the user's fault
# something that shouldn't be possible occurred in the code # something that shouldn't be possible occurred in the code
pass pass
class Cursor: class Cursor:
def __init__(self, text, skip_space=False, consume=True, space=r'\s+'): def __init__(self, text, skip_space=False, consume=True, space=r'\s+'):
self.text = text self.text = text
self.pos = 0 # current text position self.pos = 0 # current text position
self.start = 0 # last match start position before whitespace skipping self.start = 0 # last match start position before whitespace skipping
self.skip = 0 # last match start position after whitespace skipping self.skip = 0 # last match start position after whitespace skipping
self.end = 0 # last match end position self.end = 0 # last match end position
self.skip_space = skip_space self.skip_space = skip_space
self.consume = consume self.consume = consume
self.space = space self.space = space
def prior(self): def prior(self):
# returns a cursor pointing at the position prior to the last match # returns a cursor pointing at the position prior to the last match
prior = self.clone() prior = self.clone()
prior.end = prior.start prior.end = prior.start
prior.pos = prior.start prior.pos = prior.start
return prior return prior
def loc(self): def loc(self):
# describe the cursor position in a human-readable form, suitable for error messages # describe the cursor position in a human-readable form, suitable for error messages
pos = self.pos pos = self.pos
text = self.text text = self.text
endline = re.compile(r'\n|$') endline = re.compile(r'\n|$')
# locate the line in which the current position is located # locate the line in which the current position is located
line_start = 0 line_start = 0
line_id = 0 line_id = 0
while True: while True:
# determine line end position # determine line end position
match = endline.search(text, pos=line_start) match = endline.search(text, pos=line_start)
more_lines = match.group() == '\n' more_lines = match.group() == '\n'
line_end = match.start() line_end = match.start()
# we add 1 to include the newline in the positions covered (if present) # we add 1 to include the newline in the positions covered (if present)
# <<< at the end of the string, with no newline, it still kinda works okay I think # <<< at the end of the string, with no newline, it still kinda works okay I think
if line_start <= pos < (line_end + 1): if line_start <= pos < (line_end + 1):
# pos is within the current line # pos is within the current line
break break
if not more_lines: if not more_lines:
# pos is, somehow, somewhere past the end of the string # pos is, somehow, somewhere past the end of the string
# <<< for now, we'll just treat it as if pos was in the final line # <<< for now, we'll just treat it as if pos was in the final line
break break
line_start = line_end + 1 # skip newline line_start = line_end + 1 # skip newline
line_id += 1 line_id += 1
line_size = line_end - line_start line_size = line_end - line_start
line_number = line_id + 1 line_number = line_id + 1
# line_offset is so ambiguous - is it offset *of* the line or offset of the cursor *within* the line? in this case, it's the latter # line_offset is so ambiguous - is it offset *of* the line or offset of the cursor *within* the line? in this case, it's the latter
line_offset = pos - line_start line_offset = pos - line_start
line_text = text[line_start:line_end] # excludes newline line_text = text[line_start:line_end] # excludes newline
caret_spacing = re.sub(r'[^\t]', ' ', line_text[:line_offset]) caret_spacing = re.sub(r'[^\t]', ' ', line_text[:line_offset])
return f'at line {line_number}, offset {line_offset}, line string {repr(line_text)}\n{line_text}\n{caret_spacing}^\n' return f'at line {line_number}, offset {line_offset}, line string {repr(line_text)}\n{line_text}\n{caret_spacing}^\n'
def clone(self): def clone(self):
# python's immutable strings should mean the actual string data for text is not copied # python's immutable strings should mean the actual string data for text is not copied
clone = Cursor(self.text, skip_space=self.skip_space, consume=self.consume, space=self.space) clone = Cursor(self.text, skip_space=self.skip_space, consume=self.consume, space=self.space)
# pos is the main purpose of the clone # pos is the main purpose of the clone
clone.pos = self.pos clone.pos = self.pos
# this other stuff, we're just cloning for completeness # this other stuff, we're just cloning for completeness
clone.start = self.start clone.start = self.start
clone.skip = self.skip clone.skip = self.skip
clone.end = self.end clone.end = self.end
return clone return clone
def string_match(self, string): def string_match(self, string):
''' '''
Check for an exact match between the provided string and the input. Check for an exact match between the provided string and the input.
Note that it's a string, not a regex. Every character is literal. Note that it's a string, not a regex. Every character is literal.
And it returns a bool, not a match object. And it returns a bool, not a match object.
''' '''
pos = self.pos pos = self.pos
self.start = pos self.start = pos
self.skip = pos self.skip = pos
self.end = pos self.end = pos
size = len(string) size = len(string)
if self.text[self.pos:self.pos + size] == string: if self.text[self.pos:self.pos + size] == string:
pos += size pos += size
self.pos = pos self.pos = pos
self.end = pos self.end = pos
return True return True
else: else:
return False return False
def match(self, regex, skip_space=None, consume=None, space=None): def match(self, regex, skip_space=None, consume=None, space=None):
''' '''
check if a regex matches at the cursor position check if a regex matches at the cursor position
given a match, update the cursor to consume the matched text (by default) given a match, update the cursor to consume the matched text (by default)
Typical usage: Typical usage:
if input.match(r'(\d+)'): if input.match(r'(\d+)'):
# handle numbers # handle numbers
value = int(input.m.group(1)) value = int(input.m.group(1))
# ... # ...
elif input.match(r'"'): elif input.match(r'"'):
# handle double-quoted strings # handle double-quoted strings
# ... # ...
elif input.match(r'for'): elif input.match(r'for'):
# "for" loop # "for" loop
# ... # ...
elif input.match(r'\s*$'): elif input.match(r'\s*$'):
# end of input # end of input
break break
else: else:
raise raise
''' '''
if skip_space == None: if skip_space == None:
skip_space = self.skip_space skip_space = self.skip_space
if consume == None: if consume == None:
consume = self.consume consume = self.consume
if space == None: if space == None:
space = self.space space = self.space
pos = self.pos pos = self.pos
self.start = pos self.start = pos
self.skip = pos self.skip = pos
self.end = pos self.end = pos
if skip_space: if skip_space:
space_compile_flags = re.DOTALL space_compile_flags = re.DOTALL
space = re.compile(space, space_compile_flags) # <<< todo: compile once and reuse space = re.compile(space, space_compile_flags) # <<< todo: compile once and reuse
space_match = space.match(self.text, pos=pos) space_match = space.match(self.text, pos=pos)
if space_match: if space_match:
pos = space_match.end() pos = space_match.end()
self.skip = pos self.skip = pos
compile_flags = re.DOTALL compile_flags = re.DOTALL
pattern = re.compile(regex, compile_flags) pattern = re.compile(regex, compile_flags)
match = pattern.match(self.text, pos=pos) match = pattern.match(self.text, pos=pos)
if match: if match:
pos = match.end() pos = match.end()
self.end = pos self.end = pos
if consume: if consume:
self.pos = pos self.pos = pos
return match return match
def match_exact(self, regex, skip_space=False, consume=True): def match_exact(self, regex, skip_space=False, consume=True):
# check if a regex matches at the cursor position # check if a regex matches at the cursor position
# consume the matched text (by default) # consume the matched text (by default)
# skip initial whitespace (by default) # skip initial whitespace (by default)
return self.match(regex, skip_space=skip_space, consume=consume) return self.match(regex, skip_space=skip_space, consume=consume)
def check(self, regex, skip_space=None, consume=False): def check(self, regex, skip_space=None, consume=False):
# check if a regex matches at the cursor position # check if a regex matches at the cursor position
# do not consume the matched text (by default) # do not consume the matched text (by default)
# skip initial whitespace (by default) # skip initial whitespace (by default)
# another suitable name for this would have been "lookahead" # another suitable name for this would have been "lookahead"
return self.match(regex, skip_space=skip_space, consume=consume) return self.match(regex, skip_space=skip_space, consume=consume)