Convert indentation from tabs to spaces, to match the codebase.

This commit is contained in:
Barry Downes 2023-08-06 10:41:08 +10:00
parent ed253b31df
commit 9c505aa38c
3 changed files with 271 additions and 271 deletions

View File

@ -9,111 +9,111 @@ from comfy.parse import ParseError, ParseLogicError
from comfy.comments import strip_c_comments
def translate_choices_with_c_comments(text, seed=None, strict=True, reescape=frozenset()):
text = strip_c_comments(text, strict=strict)
text = translate(text, seed=seed, strict=strict, reescape = reescape)
return text
text = strip_c_comments(text, strict=strict)
text = translate(text, seed=seed, strict=strict, reescape = reescape)
return text
def get_random_seed():
return int.from_bytes(os.urandom(8))
return int.from_bytes(os.urandom(8))
def translate(text, seed=None, strict=True, reescape=frozenset()):
'''
Parses the text, translating "{A|B|C}" choices into a single chosen option.
An option is chosen randomly from the available options.
For example: "a {green|red|blue} ball on a {wooden|metal} bench" might expand to "a red ball on a wooden bench".
Nesting choices is supported, so
"a woman wearing a {{lavish|garish|expensive|stylish|} {red|brown|blue|} dress|{sexy|realistic|} {police|nurse|maid} uniform|{black leather|wooly|thick} coat}"
could expand to
"a woman wearing a realistic police uniform".
All random choices are governed by the supplied random seed value, ensuring repeatability.
If strict is True, exceptions will be thrown if the input doesn't conform to expectations.
reescape indicates the set of metacharacters that, if escaped with a backslash in the input, should be re-escaped in the output.
This is useful to avoid need for multi-escaping when incorporating this parser as a single phase in a multi-phase parsing operation.
Note that while the default is a frozenset, you can pass anything that works with the "in" operator, such as a string or a set.
'''
def parse_choice(input):
options = []
while True:
options.append(parse_text_with_choices(input))
if m := input.match(r'\|'):
# loop around for another choice
pass
else:
# at this point, the input must be }
# although for incorrectly-formed input, it could be end of input too
# regardless, the correct action here is to break and return to the caller
break
# choose one of the options
text = rng.choice(options)
return text
def parse_text_with_choices(input):
out = []
while True:
if 0: pass
elif m := input.match(r'\\'):
# \ = escape character
if m := input.match(r'.'):
ch = m.group(0)
if ch in reescape:
out.append('\\')
out.append(ch)
else:
if strict:
raise ParseError(input, f'Unexpected end of input after backslash')
elif m := input.match(r'\{'):
# { ... | ... } choice
openbrace = input.prior()
chosen_text = parse_choice(input)
if not input.match(r'\}'):
if strict:
raise ParseError(openbrace, f"Missing matching closing brace '}}' for earlier open brace '{{'")
out.append(chosen_text)
elif m := input.match(r'[^\\\{\}\|]+'):
# 1 or more non-metacharacters
out.append(m.group(0))
else:
# didn't match \, {, / or non-metacharacters
# must be either |, } or end of input
break
return ''.join(out)
def parse_text_with_choices_outer(input):
# this function and the contained loop is required to support the non-strict parsing mode
# it catches the case where we exit parse_text_with_choices upon encountering | or }, and don't find ourselves withing a calling instance of parse_choice
out = []
while True:
out.append(parse_text_with_choices(input))
if 0:pass
elif input.match(r'$'):
break
elif input.match(r'\|'):
if strict:
raise ParseError(input.prior(), f"Encountered a choice delimiter '|' outside any choice block")
elif input.match(r'\}'):
if strict:
raise ParseError(input.prior(), f"Encountered a closing brace '}}' without a matching open brace")
else:
if strict:
raise ParseLogicError(input, f'Failed to parse up to the end of the prompt text')
break
return ''.join(out)
if seed == None:
seed = get_random_seed()
# init our local random number generator
rng = random.Random(seed)
input = parse.Cursor(text)
out = parse_text_with_choices_outer(input)
return out
'''
Parses the text, translating "{A|B|C}" choices into a single chosen option.
An option is chosen randomly from the available options.
For example: "a {green|red|blue} ball on a {wooden|metal} bench" might expand to "a red ball on a wooden bench".
Nesting choices is supported, so
"a woman wearing a {{lavish|garish|expensive|stylish|} {red|brown|blue|} dress|{sexy|realistic|} {police|nurse|maid} uniform|{black leather|wooly|thick} coat}"
could expand to
"a woman wearing a realistic police uniform".
All random choices are governed by the supplied random seed value, ensuring repeatability.
If strict is True, exceptions will be thrown if the input doesn't conform to expectations.
reescape indicates the set of metacharacters that, if escaped with a backslash in the input, should be re-escaped in the output.
This is useful to avoid need for multi-escaping when incorporating this parser as a single phase in a multi-phase parsing operation.
Note that while the default is a frozenset, you can pass anything that works with the "in" operator, such as a string or a set.
'''
def parse_choice(input):
options = []
while True:
options.append(parse_text_with_choices(input))
if m := input.match(r'\|'):
# loop around for another choice
pass
else:
# at this point, the input must be }
# although for incorrectly-formed input, it could be end of input too
# regardless, the correct action here is to break and return to the caller
break
# choose one of the options
text = rng.choice(options)
return text
def parse_text_with_choices(input):
out = []
while True:
if 0: pass
elif m := input.match(r'\\'):
# \ = escape character
if m := input.match(r'.'):
ch = m.group(0)
if ch in reescape:
out.append('\\')
out.append(ch)
else:
if strict:
raise ParseError(input, f'Unexpected end of input after backslash')
elif m := input.match(r'\{'):
# { ... | ... } choice
openbrace = input.prior()
chosen_text = parse_choice(input)
if not input.match(r'\}'):
if strict:
raise ParseError(openbrace, f"Missing matching closing brace '}}' for earlier open brace '{{'")
out.append(chosen_text)
elif m := input.match(r'[^\\\{\}\|]+'):
# 1 or more non-metacharacters
out.append(m.group(0))
else:
# didn't match \, {, / or non-metacharacters
# must be either |, } or end of input
break
return ''.join(out)
def parse_text_with_choices_outer(input):
# this function and the contained loop is required to support the non-strict parsing mode
# it catches the case where we exit parse_text_with_choices upon encountering | or }, and don't find ourselves withing a calling instance of parse_choice
out = []
while True:
out.append(parse_text_with_choices(input))
if 0:pass
elif input.match(r'$'):
break
elif input.match(r'\|'):
if strict:
raise ParseError(input.prior(), f"Encountered a choice delimiter '|' outside any choice block")
elif input.match(r'\}'):
if strict:
raise ParseError(input.prior(), f"Encountered a closing brace '}}' without a matching open brace")
else:
if strict:
raise ParseLogicError(input, f'Failed to parse up to the end of the prompt text')
break
return ''.join(out)
if seed == None:
seed = get_random_seed()
# init our local random number generator
rng = random.Random(seed)
input = parse.Cursor(text)
out = parse_text_with_choices_outer(input)
return out

View File

@ -2,11 +2,11 @@
import re
def strip_c_comments(text, strict=True):
# Processes the text and strips out any C-style block "/* ... */" or line "// ..." comments found.
# from old dynamicPrompts.js: return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g,'');
return re.sub(r'/\*[.\n]*?(?:\*/|$)|//.*', '', text)
# Processes the text and strips out any C-style block "/* ... */" or line "// ..." comments found.
# from old dynamicPrompts.js: return str.replace(/\/\*[\s\S]*?\*\/|\/\/.*/g,'');
return re.sub(r'/\*[.\n]*?(?:\*/|$)|//.*', '', text)
def strip_hash_comments(text, strict=True):
# Processes the text and strips out any hash "# ... " comments found.
return re.sub(r'#.*', '', text)
# Processes the text and strips out any hash "# ... " comments found.
return re.sub(r'#.*', '', text)

View File

@ -3,172 +3,172 @@ import re
class ParseError(Exception):
def __init__(self, input, message):
self.input = input.clone() # clone the parse cursor at the point of the error
self.message = message
def __str__(self):
return f'{self.message} {self.input.loc()}'
def __init__(self, input, message):
self.input = input.clone() # clone the parse cursor at the point of the error
self.message = message
def __str__(self):
return f'{self.message} {self.input.loc()}'
class ParseLogicError(ParseError):
# like a ParseError, in that it has an associated cursor position which will help in understanding the error
# but unlike a ParseError, because it wasn't the user's fault
# something that shouldn't be possible occurred in the code
pass
# like a ParseError, in that it has an associated cursor position which will help in understanding the error
# but unlike a ParseError, because it wasn't the user's fault
# something that shouldn't be possible occurred in the code
pass
class Cursor:
def __init__(self, text, skip_space=False, consume=True, space=r'\s+'):
self.text = text
self.pos = 0 # current text position
self.start = 0 # last match start position before whitespace skipping
self.skip = 0 # last match start position after whitespace skipping
self.end = 0 # last match end position
self.skip_space = skip_space
self.consume = consume
self.space = space
def prior(self):
# returns a cursor pointing at the position prior to the last match
prior = self.clone()
prior.end = prior.start
prior.pos = prior.start
return prior
def loc(self):
# describe the cursor position in a human-readable form, suitable for error messages
pos = self.pos
text = self.text
endline = re.compile(r'\n|$')
# locate the line in which the current position is located
line_start = 0
line_id = 0
while True:
# determine line end position
match = endline.search(text, pos=line_start)
more_lines = match.group() == '\n'
line_end = match.start()
# we add 1 to include the newline in the positions covered (if present)
# <<< at the end of the string, with no newline, it still kinda works okay I think
if line_start <= pos < (line_end + 1):
# pos is within the current line
break
if not more_lines:
# pos is, somehow, somewhere past the end of the string
# <<< for now, we'll just treat it as if pos was in the final line
break
line_start = line_end + 1 # skip newline
line_id += 1
line_size = line_end - line_start
line_number = line_id + 1
# line_offset is so ambiguous - is it offset *of* the line or offset of the cursor *within* the line? in this case, it's the latter
line_offset = pos - line_start
line_text = text[line_start:line_end] # excludes newline
caret_spacing = re.sub(r'[^\t]', ' ', line_text[:line_offset])
return f'at line {line_number}, offset {line_offset}, line string {repr(line_text)}\n{line_text}\n{caret_spacing}^\n'
def clone(self):
# python's immutable strings should mean the actual string data for text is not copied
clone = Cursor(self.text, skip_space=self.skip_space, consume=self.consume, space=self.space)
# pos is the main purpose of the clone
clone.pos = self.pos
# this other stuff, we're just cloning for completeness
clone.start = self.start
clone.skip = self.skip
clone.end = self.end
return clone
def string_match(self, string):
'''
Check for an exact match between the provided string and the input.
Note that it's a string, not a regex. Every character is literal.
And it returns a bool, not a match object.
'''
pos = self.pos
self.start = pos
self.skip = pos
self.end = pos
size = len(string)
if self.text[self.pos:self.pos + size] == string:
pos += size
self.pos = pos
self.end = pos
return True
else:
return False
def match(self, regex, skip_space=None, consume=None, space=None):
'''
check if a regex matches at the cursor position
given a match, update the cursor to consume the matched text (by default)
Typical usage:
if input.match(r'(\d+)'):
# handle numbers
value = int(input.m.group(1))
# ...
elif input.match(r'"'):
# handle double-quoted strings
# ...
elif input.match(r'for'):
# "for" loop
# ...
elif input.match(r'\s*$'):
# end of input
break
else:
raise
'''
if skip_space == None:
skip_space = self.skip_space
if consume == None:
consume = self.consume
if space == None:
space = self.space
pos = self.pos
self.start = pos
self.skip = pos
self.end = pos
if skip_space:
space_compile_flags = re.DOTALL
space = re.compile(space, space_compile_flags) # <<< todo: compile once and reuse
space_match = space.match(self.text, pos=pos)
if space_match:
pos = space_match.end()
self.skip = pos
compile_flags = re.DOTALL
pattern = re.compile(regex, compile_flags)
match = pattern.match(self.text, pos=pos)
if match:
pos = match.end()
self.end = pos
if consume:
self.pos = pos
return match
def match_exact(self, regex, skip_space=False, consume=True):
# check if a regex matches at the cursor position
# consume the matched text (by default)
# skip initial whitespace (by default)
return self.match(regex, skip_space=skip_space, consume=consume)
def check(self, regex, skip_space=None, consume=False):
# check if a regex matches at the cursor position
# do not consume the matched text (by default)
# skip initial whitespace (by default)
# another suitable name for this would have been "lookahead"
return self.match(regex, skip_space=skip_space, consume=consume)
def __init__(self, text, skip_space=False, consume=True, space=r'\s+'):
self.text = text
self.pos = 0 # current text position
self.start = 0 # last match start position before whitespace skipping
self.skip = 0 # last match start position after whitespace skipping
self.end = 0 # last match end position
self.skip_space = skip_space
self.consume = consume
self.space = space
def prior(self):
# returns a cursor pointing at the position prior to the last match
prior = self.clone()
prior.end = prior.start
prior.pos = prior.start
return prior
def loc(self):
# describe the cursor position in a human-readable form, suitable for error messages
pos = self.pos
text = self.text
endline = re.compile(r'\n|$')
# locate the line in which the current position is located
line_start = 0
line_id = 0
while True:
# determine line end position
match = endline.search(text, pos=line_start)
more_lines = match.group() == '\n'
line_end = match.start()
# we add 1 to include the newline in the positions covered (if present)
# <<< at the end of the string, with no newline, it still kinda works okay I think
if line_start <= pos < (line_end + 1):
# pos is within the current line
break
if not more_lines:
# pos is, somehow, somewhere past the end of the string
# <<< for now, we'll just treat it as if pos was in the final line
break
line_start = line_end + 1 # skip newline
line_id += 1
line_size = line_end - line_start
line_number = line_id + 1
# line_offset is so ambiguous - is it offset *of* the line or offset of the cursor *within* the line? in this case, it's the latter
line_offset = pos - line_start
line_text = text[line_start:line_end] # excludes newline
caret_spacing = re.sub(r'[^\t]', ' ', line_text[:line_offset])
return f'at line {line_number}, offset {line_offset}, line string {repr(line_text)}\n{line_text}\n{caret_spacing}^\n'
def clone(self):
# python's immutable strings should mean the actual string data for text is not copied
clone = Cursor(self.text, skip_space=self.skip_space, consume=self.consume, space=self.space)
# pos is the main purpose of the clone
clone.pos = self.pos
# this other stuff, we're just cloning for completeness
clone.start = self.start
clone.skip = self.skip
clone.end = self.end
return clone
def string_match(self, string):
'''
Check for an exact match between the provided string and the input.
Note that it's a string, not a regex. Every character is literal.
And it returns a bool, not a match object.
'''
pos = self.pos
self.start = pos
self.skip = pos
self.end = pos
size = len(string)
if self.text[self.pos:self.pos + size] == string:
pos += size
self.pos = pos
self.end = pos
return True
else:
return False
def match(self, regex, skip_space=None, consume=None, space=None):
'''
check if a regex matches at the cursor position
given a match, update the cursor to consume the matched text (by default)
Typical usage:
if input.match(r'(\d+)'):
# handle numbers
value = int(input.m.group(1))
# ...
elif input.match(r'"'):
# handle double-quoted strings
# ...
elif input.match(r'for'):
# "for" loop
# ...
elif input.match(r'\s*$'):
# end of input
break
else:
raise
'''
if skip_space == None:
skip_space = self.skip_space
if consume == None:
consume = self.consume
if space == None:
space = self.space
pos = self.pos
self.start = pos
self.skip = pos
self.end = pos
if skip_space:
space_compile_flags = re.DOTALL
space = re.compile(space, space_compile_flags) # <<< todo: compile once and reuse
space_match = space.match(self.text, pos=pos)
if space_match:
pos = space_match.end()
self.skip = pos
compile_flags = re.DOTALL
pattern = re.compile(regex, compile_flags)
match = pattern.match(self.text, pos=pos)
if match:
pos = match.end()
self.end = pos
if consume:
self.pos = pos
return match
def match_exact(self, regex, skip_space=False, consume=True):
# check if a regex matches at the cursor position
# consume the matched text (by default)
# skip initial whitespace (by default)
return self.match(regex, skip_space=skip_space, consume=consume)
def check(self, regex, skip_space=None, consume=False):
# check if a regex matches at the cursor position
# do not consume the matched text (by default)
# skip initial whitespace (by default)
# another suitable name for this would have been "lookahead"
return self.match(regex, skip_space=skip_space, consume=consume)