mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-10 00:07:33 +08:00
refactor(assets): extract cursor JSON escaping helper; size wire cap above per-field caps
Addresses review feedback on cursor.py: - Extract the inline escape chain into _apply_wire_compatible_json_escapes() with a comment pinning it to the wire format's escape set, so the parity intent is explicit rather than reading as an ad-hoc transform. - Raise MAX_ENCODED_CURSOR_LENGTH to 8192 (comfortably above the ~5.2KB worst-case the per-field caps can produce) and drop the mint-time length guard. Encoder/decoder symmetry now holds by construction: the encoder can't produce a cursor the decode path rejects, so there is no confusing user-visible 'cursor too long' failure at mint time. - Rewrite the two over-wire-cap tests to assert worst-case multibyte and escape-heavy values mint and round-trip, instead of being rejected.
This commit is contained in:
parent
546d7d2eb6
commit
f7558232fa
@ -42,7 +42,14 @@ class InvalidCursorError(ValueError):
|
||||
# MAX_CURSOR_VALUE_LENGTH is 512 to fit the `AssetReference.name` column max
|
||||
# (`String(512)`) — otherwise a long-named asset would mint a cursor the same
|
||||
# server then refuses on the next request.
|
||||
MAX_ENCODED_CURSOR_LENGTH = 1024
|
||||
#
|
||||
# MAX_ENCODED_CURSOR_LENGTH is the decode-path guard, sized comfortably above
|
||||
# the largest cursor the per-field caps can produce. Worst case is value + id
|
||||
# at their caps with every character escape-expanding to the six-byte `\uXXXX`
|
||||
# form, which is ~5.2 KB once base64url-encoded. At 8192 the encoder can never
|
||||
# mint a cursor that exceeds it, so a freshly minted cursor always decodes on
|
||||
# the next request and there is no user-visible "cursor too long" failure.
|
||||
MAX_ENCODED_CURSOR_LENGTH = 8192
|
||||
MAX_CURSOR_VALUE_LENGTH = 512
|
||||
MAX_CURSOR_ID_LENGTH = 128
|
||||
|
||||
@ -58,6 +65,27 @@ class CursorPayload:
|
||||
_VALID_ORDERS = ("asc", "desc")
|
||||
|
||||
|
||||
def _apply_wire_compatible_json_escapes(raw: str) -> str:
|
||||
"""Escape the characters the cursor wire format requires escaped.
|
||||
|
||||
The wire format escapes `<`, `>`, `&`, U+2028, and U+2029 — and nothing
|
||||
else, leaving other non-ASCII as literal UTF-8 — so a value carrying any of
|
||||
them encodes to identical bytes across every compatible implementation of
|
||||
the payload format. None of these characters appear in JSON structural
|
||||
syntax, so a global replace on the serialized output can only touch encoded
|
||||
string values. Explicit `\\uXXXX` escapes for U+2028 / U+2029 keep this
|
||||
source stable against editor / git tooling that normalizes those invisible
|
||||
separators.
|
||||
"""
|
||||
return (
|
||||
raw.replace("<", "\\u003c")
|
||||
.replace(">", "\\u003e")
|
||||
.replace("&", "\\u0026")
|
||||
.replace("\u2028", "\\u2028")
|
||||
.replace("\u2029", "\\u2029")
|
||||
)
|
||||
|
||||
|
||||
def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
|
||||
"""Encode a cursor payload as a base64url (no-padding) string.
|
||||
|
||||
@ -78,29 +106,13 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
|
||||
raise InvalidCursorError("value exceeds maximum length")
|
||||
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
||||
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
||||
# Match the default JSON escaping of HTML-significant characters and JS
|
||||
# line/paragraph separators (U+2028 / U+2029) so an asset name carrying
|
||||
# any of them encodes to identical bytes across runtimes. None of these
|
||||
# characters appear in JSON structural syntax, so a global replace on the
|
||||
# serialized output can only touch encoded values. Use explicit \uXXXX
|
||||
# escapes for U+2028 / U+2029 so the source survives any editor / git
|
||||
# tooling that normalizes invisible separators.
|
||||
raw = (
|
||||
raw.replace("<", "\\u003c")
|
||||
.replace(">", "\\u003e")
|
||||
.replace("&", "\\u0026")
|
||||
.replace("\u2028", "\\u2028")
|
||||
.replace("\u2029", "\\u2029")
|
||||
)
|
||||
raw = _apply_wire_compatible_json_escapes(raw)
|
||||
encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||
# Final wire-size guard: the per-field caps above are char-counted, but the
|
||||
# wire cap applies to the base64url of the UTF-8-encoded, escape-expanded
|
||||
# payload. A value full of multibyte or HTML-significant characters (e.g.
|
||||
# 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even
|
||||
# though it passes the char-count check. Refuse to mint a cursor the decoder
|
||||
# on the next request would reject.
|
||||
if len(encoded) > MAX_ENCODED_CURSOR_LENGTH:
|
||||
raise InvalidCursorError("encoded cursor exceeds maximum length")
|
||||
# No mint-time length guard is needed: the per-field caps above bound the
|
||||
# encoded length well below MAX_ENCODED_CURSOR_LENGTH (see its definition),
|
||||
# so the encoder can never produce a cursor the decode path would reject.
|
||||
# This keeps encoder/decoder symmetry without a user-visible failure when a
|
||||
# value happens to be multibyte- or escape-heavy.
|
||||
return encoded
|
||||
|
||||
|
||||
|
||||
@ -190,8 +190,10 @@ class TestDatetimeOverflow:
|
||||
|
||||
|
||||
class TestEncoderDecoderSymmetry:
|
||||
"""The encoder must reject inputs the decoder rejects, or the same server
|
||||
will mint a cursor it then 400s on the next request.
|
||||
"""The encoder must never mint a cursor the decoder would reject, or the
|
||||
same server would 400 on a cursor it just handed out. Per-field caps keep
|
||||
the encoded length below the wire cap, so a freshly minted cursor always
|
||||
round-trips.
|
||||
"""
|
||||
|
||||
def test_long_name_within_cap_round_trips(self):
|
||||
@ -215,20 +217,26 @@ class TestEncoderDecoderSymmetry:
|
||||
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
|
||||
encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
|
||||
|
||||
def test_encoder_rejects_multibyte_value_over_wire_cap(self):
|
||||
"""A value that passes the char-count cap can still inflate past the
|
||||
wire cap once UTF-8-encoded. Asset name made of 512 × multibyte
|
||||
characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not
|
||||
minted into a cursor the next request will 400."""
|
||||
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
|
||||
encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte")
|
||||
def test_multibyte_value_at_cap_round_trips(self):
|
||||
"""A value at the char-count cap made of multibyte characters
|
||||
(e.g. 'é' = 2 UTF-8 bytes) stays under the wire cap, so it mints and
|
||||
round-trips — the per-field caps, not a mint-time length check, are
|
||||
what bound cursor size."""
|
||||
value = "é" * MAX_CURSOR_VALUE_LENGTH
|
||||
encoded = encode_cursor("name", value, "asset-multibyte")
|
||||
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == value
|
||||
|
||||
def test_encoder_rejects_escape_heavy_value_over_wire_cap(self):
|
||||
"""Same wire-cap concern via escape expansion: each `<` serializes to
|
||||
the six-byte sequence `\\u003c`, so 512 of them blow past the encoded
|
||||
cap even though the raw char count is within the per-field limit."""
|
||||
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
|
||||
encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape")
|
||||
def test_escape_heavy_value_at_cap_round_trips(self):
|
||||
"""Escape expansion is the worst case: each `<` serializes to the
|
||||
six-byte `\\u003c`. A value of 512 of them is the largest a cursor can
|
||||
get, and it still fits the wire cap, mints, and round-trips."""
|
||||
value = "<" * MAX_CURSOR_VALUE_LENGTH
|
||||
encoded = encode_cursor("name", value, "asset-escape")
|
||||
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == value
|
||||
|
||||
|
||||
class TestOrderBinding:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user