mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-14 20:09:24 +08:00
refactor(assets): drop cross-runtime cursor escaping; cursors are opaque
The custom JSON escaping of <, >, &, U+2028, and U+2029 existed only to keep the encoded cursor byte-identical with the Cloud implementation of the same payload format. Cursors are opaque tokens, so byte-level compatibility across implementations is not needed — plain json.dumps output is sufficient. Remove the escaping helper and the byte-identity test fixtures that pinned the wire format; keep round-trip coverage for the affected characters.
This commit is contained in:
parent
f7558232fa
commit
9341fc6894
@ -10,10 +10,10 @@ so replaying a `desc` cursor against an `asc` request fails with
|
||||
`o` is mandatory on every payload — a cursor without it is rejected as
|
||||
malformed.
|
||||
|
||||
Encoding is base64url with no padding. JSON serialization escapes `<`,
|
||||
`>`, `&`, U+2028, and U+2029 in encoded string values so asset names
|
||||
containing those characters produce a stable, byte-identical wire form
|
||||
across any compatible implementation of the same payload format.
|
||||
Encoding is base64url with no padding. Cursors are opaque tokens: the
|
||||
payload format is internal to this server, and clients must treat a
|
||||
cursor as a black box handed back via `next_cursor`. No byte-level
|
||||
compatibility with any other implementation is required.
|
||||
|
||||
Time values are serialized as Unix microseconds (UTC) — microsecond
|
||||
precision is sufficient to round-trip the timestamps stored by the
|
||||
@ -45,10 +45,11 @@ class InvalidCursorError(ValueError):
|
||||
#
|
||||
# MAX_ENCODED_CURSOR_LENGTH is the decode-path guard, sized comfortably above
|
||||
# the largest cursor the per-field caps can produce. Worst case is value + id
|
||||
# at their caps with every character escape-expanding to the six-byte `\uXXXX`
|
||||
# form, which is ~5.2 KB once base64url-encoded. At 8192 the encoder can never
|
||||
# mint a cursor that exceeds it, so a freshly minted cursor always decodes on
|
||||
# the next request and there is no user-visible "cursor too long" failure.
|
||||
# at their caps with every character JSON-escaping to the six-byte `\uXXXX`
|
||||
# form (control characters), which is ~5.2 KB once base64url-encoded. At 8192
|
||||
# the encoder can never mint a cursor that exceeds it, so a freshly minted
|
||||
# cursor always decodes on the next request and there is no user-visible
|
||||
# "cursor too long" failure.
|
||||
MAX_ENCODED_CURSOR_LENGTH = 8192
|
||||
MAX_CURSOR_VALUE_LENGTH = 512
|
||||
MAX_CURSOR_ID_LENGTH = 128
|
||||
@ -65,27 +66,6 @@ class CursorPayload:
|
||||
_VALID_ORDERS = ("asc", "desc")
|
||||
|
||||
|
||||
def _apply_wire_compatible_json_escapes(raw: str) -> str:
|
||||
"""Escape the characters the cursor wire format requires escaped.
|
||||
|
||||
The wire format escapes `<`, `>`, `&`, U+2028, and U+2029 — and nothing
|
||||
else, leaving other non-ASCII as literal UTF-8 — so a value carrying any of
|
||||
them encodes to identical bytes across every compatible implementation of
|
||||
the payload format. None of these characters appear in JSON structural
|
||||
syntax, so a global replace on the serialized output can only touch encoded
|
||||
string values. Explicit `\\uXXXX` escapes for U+2028 / U+2029 keep this
|
||||
source stable against editor / git tooling that normalizes those invisible
|
||||
separators.
|
||||
"""
|
||||
return (
|
||||
raw.replace("<", "\\u003c")
|
||||
.replace(">", "\\u003e")
|
||||
.replace("&", "\\u0026")
|
||||
.replace("\u2028", "\\u2028")
|
||||
.replace("\u2029", "\\u2029")
|
||||
)
|
||||
|
||||
|
||||
def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
|
||||
"""Encode a cursor payload as a base64url (no-padding) string.
|
||||
|
||||
@ -106,14 +86,10 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
|
||||
raise InvalidCursorError("value exceeds maximum length")
|
||||
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
||||
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
||||
raw = _apply_wire_compatible_json_escapes(raw)
|
||||
encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||
# No mint-time length guard is needed: the per-field caps above bound the
|
||||
# encoded length well below MAX_ENCODED_CURSOR_LENGTH (see its definition),
|
||||
# so the encoder can never produce a cursor the decode path would reject.
|
||||
# This keeps encoder/decoder symmetry without a user-visible failure when a
|
||||
# value happens to be multibyte- or escape-heavy.
|
||||
return encoded
|
||||
return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||
|
||||
|
||||
def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
"""Tests for app.assets.services.cursor.
|
||||
|
||||
The byte-identity fixtures below pin the wire format so a parallel
|
||||
implementation in another runtime can mint exchange-compatible cursors
|
||||
for the same payload. Drift here would break frontend pagination against
|
||||
any compatible backend.
|
||||
Cursors are opaque tokens internal to this server — these tests cover
|
||||
round-tripping, validation, and length caps, not any particular wire
|
||||
byte layout.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@ -37,6 +36,8 @@ class TestRoundTrip:
|
||||
("size", "1024", "asset-123"),
|
||||
("name", "my-asset.png", "asset-abc"),
|
||||
("name", "résumé.txt", "asset-uni"),
|
||||
("name", "foo<&>bar.png", "asset-html"),
|
||||
("name", 'quo"te\\back\nnewline.png', "asset-esc"),
|
||||
],
|
||||
)
|
||||
def test_encode_decode(self, sort_field, value, id):
|
||||
@ -229,10 +230,11 @@ class TestEncoderDecoderSymmetry:
|
||||
assert payload.value == value
|
||||
|
||||
def test_escape_heavy_value_at_cap_round_trips(self):
|
||||
"""Escape expansion is the worst case: each `<` serializes to the
|
||||
six-byte `\\u003c`. A value of 512 of them is the largest a cursor can
|
||||
get, and it still fits the wire cap, mints, and round-trips."""
|
||||
value = "<" * MAX_CURSOR_VALUE_LENGTH
|
||||
"""JSON escape expansion is the worst case: each control character
|
||||
serializes to the six-byte `\\uXXXX` form. A value of 512 of them is
|
||||
the largest a cursor can get, and it still fits the wire cap, mints,
|
||||
and round-trips."""
|
||||
value = "\x01" * MAX_CURSOR_VALUE_LENGTH
|
||||
encoded = encode_cursor("name", value, "asset-escape")
|
||||
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
@ -274,89 +276,3 @@ class TestOrderBinding:
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="missing or non-string o"):
|
||||
decode_cursor(encoded, ALLOWED, expected_order="desc")
|
||||
|
||||
|
||||
class TestHtmlSignificantCharEscaping:
|
||||
"""An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode
|
||||
to the same escaped wire bytes as any compatible implementation of the
|
||||
same payload format. Drift here breaks cross-runtime byte-identity for
|
||||
those characters.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value, escaped_substring",
|
||||
[
|
||||
("foo<bar>.png", "\\u003c"), # `<` escaped
|
||||
("foo<bar>.png", "\\u003e"), # `>` escaped
|
||||
("foo&bar.png", "\\u0026"),
|
||||
("foo
bar.png", "\\u2028"), # JS line separator
|
||||
("foo
bar.png", "\\u2029"), # JS paragraph separator
|
||||
],
|
||||
)
|
||||
def test_html_significant_chars_escaped(self, value, escaped_substring):
|
||||
encoded = encode_cursor("name", value, "id-1")
|
||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
||||
assert escaped_substring in decoded_bytes.decode("ascii"), (
|
||||
f"Expected {escaped_substring!r} in serialized payload, got: {decoded_bytes!r}"
|
||||
)
|
||||
|
||||
def test_value_round_trips_through_escape(self):
|
||||
"""Encoding then decoding a value with `<>&` should yield the original
|
||||
string — the escape only affects the wire form, not the decoded value."""
|
||||
original = "foo<&>bar.png"
|
||||
encoded = encode_cursor("name", original, "id-1")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == original
|
||||
|
||||
|
||||
class TestByteIdentityFixtures:
|
||||
"""Pin the wire format so it doesn't drift silently.
|
||||
|
||||
These fixtures assert exact byte equality of the encoded JSON payload —
|
||||
a change in key order, escape choice, separator whitespace, or anything
|
||||
else that shifts a byte fails the test loudly rather than diverging
|
||||
silently from any external consumer of the same payload format.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort_field, value, id, order, expected_payload",
|
||||
[
|
||||
(
|
||||
"created_at",
|
||||
"1716200000000000",
|
||||
"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7",
|
||||
"desc",
|
||||
'{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"size",
|
||||
"1024",
|
||||
"asset-123",
|
||||
"asc",
|
||||
'{"s":"size","v":"1024","id":"asset-123","o":"asc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"my-asset.png",
|
||||
"asset-abc",
|
||||
"desc",
|
||||
'{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"foo<bar>&baz.png",
|
||||
"asset-html",
|
||||
"desc",
|
||||
# `<`, `>`, `&` escape to <, >, & in the value.
|
||||
'{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload):
|
||||
encoded = encode_cursor(sort_field, value, id, order=order)
|
||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
||||
assert decoded_bytes.decode("utf-8") == expected_payload, (
|
||||
f"wire format drifted for sort={sort_field!r}, value={value!r}:\n"
|
||||
f" expected: {expected_payload!r}\n"
|
||||
f" actual: {decoded_bytes.decode('utf-8')!r}"
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user