mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-15 04:19:43 +08:00
refactor(assets): drop cross-runtime cursor escaping; cursors are opaque
The custom JSON escaping of <, >, &, U+2028, and U+2029 existed only to keep the encoded cursor byte-identical with the Cloud implementation of the same payload format. Cursors are opaque tokens, so byte-level compatibility across implementations is not needed — plain json.dumps output is sufficient. Remove the escaping helper and the byte-identity test fixtures that pinned the wire format; keep round-trip coverage for the affected characters.
This commit is contained in:
parent
f7558232fa
commit
9341fc6894
@ -10,10 +10,10 @@ so replaying a `desc` cursor against an `asc` request fails with
|
|||||||
`o` is mandatory on every payload — a cursor without it is rejected as
|
`o` is mandatory on every payload — a cursor without it is rejected as
|
||||||
malformed.
|
malformed.
|
||||||
|
|
||||||
Encoding is base64url with no padding. JSON serialization escapes `<`,
|
Encoding is base64url with no padding. Cursors are opaque tokens: the
|
||||||
`>`, `&`, U+2028, and U+2029 in encoded string values so asset names
|
payload format is internal to this server, and clients must treat a
|
||||||
containing those characters produce a stable, byte-identical wire form
|
cursor as a black box handed back via `next_cursor`. No byte-level
|
||||||
across any compatible implementation of the same payload format.
|
compatibility with any other implementation is required.
|
||||||
|
|
||||||
Time values are serialized as Unix microseconds (UTC) — microsecond
|
Time values are serialized as Unix microseconds (UTC) — microsecond
|
||||||
precision is sufficient to round-trip the timestamps stored by the
|
precision is sufficient to round-trip the timestamps stored by the
|
||||||
@ -45,10 +45,11 @@ class InvalidCursorError(ValueError):
|
|||||||
#
|
#
|
||||||
# MAX_ENCODED_CURSOR_LENGTH is the decode-path guard, sized comfortably above
|
# MAX_ENCODED_CURSOR_LENGTH is the decode-path guard, sized comfortably above
|
||||||
# the largest cursor the per-field caps can produce. Worst case is value + id
|
# the largest cursor the per-field caps can produce. Worst case is value + id
|
||||||
# at their caps with every character escape-expanding to the six-byte `\uXXXX`
|
# at their caps with every character JSON-escaping to the six-byte `\uXXXX`
|
||||||
# form, which is ~5.2 KB once base64url-encoded. At 8192 the encoder can never
|
# form (control characters), which is ~5.2 KB once base64url-encoded. At 8192
|
||||||
# mint a cursor that exceeds it, so a freshly minted cursor always decodes on
|
# the encoder can never mint a cursor that exceeds it, so a freshly minted
|
||||||
# the next request and there is no user-visible "cursor too long" failure.
|
# cursor always decodes on the next request and there is no user-visible
|
||||||
|
# "cursor too long" failure.
|
||||||
MAX_ENCODED_CURSOR_LENGTH = 8192
|
MAX_ENCODED_CURSOR_LENGTH = 8192
|
||||||
MAX_CURSOR_VALUE_LENGTH = 512
|
MAX_CURSOR_VALUE_LENGTH = 512
|
||||||
MAX_CURSOR_ID_LENGTH = 128
|
MAX_CURSOR_ID_LENGTH = 128
|
||||||
@ -65,27 +66,6 @@ class CursorPayload:
|
|||||||
_VALID_ORDERS = ("asc", "desc")
|
_VALID_ORDERS = ("asc", "desc")
|
||||||
|
|
||||||
|
|
||||||
def _apply_wire_compatible_json_escapes(raw: str) -> str:
|
|
||||||
"""Escape the characters the cursor wire format requires escaped.
|
|
||||||
|
|
||||||
The wire format escapes `<`, `>`, `&`, U+2028, and U+2029 — and nothing
|
|
||||||
else, leaving other non-ASCII as literal UTF-8 — so a value carrying any of
|
|
||||||
them encodes to identical bytes across every compatible implementation of
|
|
||||||
the payload format. None of these characters appear in JSON structural
|
|
||||||
syntax, so a global replace on the serialized output can only touch encoded
|
|
||||||
string values. Explicit `\\uXXXX` escapes for U+2028 / U+2029 keep this
|
|
||||||
source stable against editor / git tooling that normalizes those invisible
|
|
||||||
separators.
|
|
||||||
"""
|
|
||||||
return (
|
|
||||||
raw.replace("<", "\\u003c")
|
|
||||||
.replace(">", "\\u003e")
|
|
||||||
.replace("&", "\\u0026")
|
|
||||||
.replace("\u2028", "\\u2028")
|
|
||||||
.replace("\u2029", "\\u2029")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
|
def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
|
||||||
"""Encode a cursor payload as a base64url (no-padding) string.
|
"""Encode a cursor payload as a base64url (no-padding) string.
|
||||||
|
|
||||||
@ -106,14 +86,10 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
|
|||||||
raise InvalidCursorError("value exceeds maximum length")
|
raise InvalidCursorError("value exceeds maximum length")
|
||||||
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
||||||
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
||||||
raw = _apply_wire_compatible_json_escapes(raw)
|
|
||||||
encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
|
||||||
# No mint-time length guard is needed: the per-field caps above bound the
|
# No mint-time length guard is needed: the per-field caps above bound the
|
||||||
# encoded length well below MAX_ENCODED_CURSOR_LENGTH (see its definition),
|
# encoded length well below MAX_ENCODED_CURSOR_LENGTH (see its definition),
|
||||||
# so the encoder can never produce a cursor the decode path would reject.
|
# so the encoder can never produce a cursor the decode path would reject.
|
||||||
# This keeps encoder/decoder symmetry without a user-visible failure when a
|
return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||||
# value happens to be multibyte- or escape-heavy.
|
|
||||||
return encoded
|
|
||||||
|
|
||||||
|
|
||||||
def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
|
def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
|
||||||
|
|||||||
@ -1,9 +1,8 @@
|
|||||||
"""Tests for app.assets.services.cursor.
|
"""Tests for app.assets.services.cursor.
|
||||||
|
|
||||||
The byte-identity fixtures below pin the wire format so a parallel
|
Cursors are opaque tokens internal to this server — these tests cover
|
||||||
implementation in another runtime can mint exchange-compatible cursors
|
round-tripping, validation, and length caps, not any particular wire
|
||||||
for the same payload. Drift here would break frontend pagination against
|
byte layout.
|
||||||
any compatible backend.
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@ -37,6 +36,8 @@ class TestRoundTrip:
|
|||||||
("size", "1024", "asset-123"),
|
("size", "1024", "asset-123"),
|
||||||
("name", "my-asset.png", "asset-abc"),
|
("name", "my-asset.png", "asset-abc"),
|
||||||
("name", "résumé.txt", "asset-uni"),
|
("name", "résumé.txt", "asset-uni"),
|
||||||
|
("name", "foo<&>bar.png", "asset-html"),
|
||||||
|
("name", 'quo"te\\back\nnewline.png', "asset-esc"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_encode_decode(self, sort_field, value, id):
|
def test_encode_decode(self, sort_field, value, id):
|
||||||
@ -229,10 +230,11 @@ class TestEncoderDecoderSymmetry:
|
|||||||
assert payload.value == value
|
assert payload.value == value
|
||||||
|
|
||||||
def test_escape_heavy_value_at_cap_round_trips(self):
|
def test_escape_heavy_value_at_cap_round_trips(self):
|
||||||
"""Escape expansion is the worst case: each `<` serializes to the
|
"""JSON escape expansion is the worst case: each control character
|
||||||
six-byte `\\u003c`. A value of 512 of them is the largest a cursor can
|
serializes to the six-byte `\\uXXXX` form. A value of 512 of them is
|
||||||
get, and it still fits the wire cap, mints, and round-trips."""
|
the largest a cursor can get, and it still fits the wire cap, mints,
|
||||||
value = "<" * MAX_CURSOR_VALUE_LENGTH
|
and round-trips."""
|
||||||
|
value = "\x01" * MAX_CURSOR_VALUE_LENGTH
|
||||||
encoded = encode_cursor("name", value, "asset-escape")
|
encoded = encode_cursor("name", value, "asset-escape")
|
||||||
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
||||||
payload = decode_cursor(encoded, ALLOWED)
|
payload = decode_cursor(encoded, ALLOWED)
|
||||||
@ -274,89 +276,3 @@ class TestOrderBinding:
|
|||||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||||
with pytest.raises(InvalidCursorError, match="missing or non-string o"):
|
with pytest.raises(InvalidCursorError, match="missing or non-string o"):
|
||||||
decode_cursor(encoded, ALLOWED, expected_order="desc")
|
decode_cursor(encoded, ALLOWED, expected_order="desc")
|
||||||
|
|
||||||
|
|
||||||
class TestHtmlSignificantCharEscaping:
|
|
||||||
"""An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode
|
|
||||||
to the same escaped wire bytes as any compatible implementation of the
|
|
||||||
same payload format. Drift here breaks cross-runtime byte-identity for
|
|
||||||
those characters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"value, escaped_substring",
|
|
||||||
[
|
|
||||||
("foo<bar>.png", "\\u003c"), # `<` escaped
|
|
||||||
("foo<bar>.png", "\\u003e"), # `>` escaped
|
|
||||||
("foo&bar.png", "\\u0026"),
|
|
||||||
("foo
bar.png", "\\u2028"), # JS line separator
|
|
||||||
("foo
bar.png", "\\u2029"), # JS paragraph separator
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_html_significant_chars_escaped(self, value, escaped_substring):
|
|
||||||
encoded = encode_cursor("name", value, "id-1")
|
|
||||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
|
||||||
assert escaped_substring in decoded_bytes.decode("ascii"), (
|
|
||||||
f"Expected {escaped_substring!r} in serialized payload, got: {decoded_bytes!r}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_value_round_trips_through_escape(self):
|
|
||||||
"""Encoding then decoding a value with `<>&` should yield the original
|
|
||||||
string — the escape only affects the wire form, not the decoded value."""
|
|
||||||
original = "foo<&>bar.png"
|
|
||||||
encoded = encode_cursor("name", original, "id-1")
|
|
||||||
payload = decode_cursor(encoded, ALLOWED)
|
|
||||||
assert payload.value == original
|
|
||||||
|
|
||||||
|
|
||||||
class TestByteIdentityFixtures:
|
|
||||||
"""Pin the wire format so it doesn't drift silently.
|
|
||||||
|
|
||||||
These fixtures assert exact byte equality of the encoded JSON payload —
|
|
||||||
a change in key order, escape choice, separator whitespace, or anything
|
|
||||||
else that shifts a byte fails the test loudly rather than diverging
|
|
||||||
silently from any external consumer of the same payload format.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"sort_field, value, id, order, expected_payload",
|
|
||||||
[
|
|
||||||
(
|
|
||||||
"created_at",
|
|
||||||
"1716200000000000",
|
|
||||||
"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7",
|
|
||||||
"desc",
|
|
||||||
'{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}',
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"size",
|
|
||||||
"1024",
|
|
||||||
"asset-123",
|
|
||||||
"asc",
|
|
||||||
'{"s":"size","v":"1024","id":"asset-123","o":"asc"}',
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"name",
|
|
||||||
"my-asset.png",
|
|
||||||
"asset-abc",
|
|
||||||
"desc",
|
|
||||||
'{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}',
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"name",
|
|
||||||
"foo<bar>&baz.png",
|
|
||||||
"asset-html",
|
|
||||||
"desc",
|
|
||||||
# `<`, `>`, `&` escape to <, >, & in the value.
|
|
||||||
'{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}',
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload):
|
|
||||||
encoded = encode_cursor(sort_field, value, id, order=order)
|
|
||||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
|
||||||
assert decoded_bytes.decode("utf-8") == expected_payload, (
|
|
||||||
f"wire format drifted for sort={sort_field!r}, value={value!r}:\n"
|
|
||||||
f" expected: {expected_payload!r}\n"
|
|
||||||
f" actual: {decoded_bytes.decode('utf-8')!r}"
|
|
||||||
)
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user