mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-30 19:07:25 +08:00
fix(assets): address ultrareview findings on cursor pagination
Six fact-checked findings from the multi-model review pass:
- Encoder/decoder length asymmetry: encode_cursor now rejects empty id,
oversized id (>128), oversized value (>512), and invalid order tokens
symmetrically with decode_cursor. Prevents the same server from minting
a cursor it then 400s on the next request (e.g. a filesystem-scanned
asset name >512 chars). The bad-order path now raises InvalidCursorError
(still subclasses ValueError) so route-layer handling stays uniform.
- Raw U+2028/U+2029 in cursor.py source: ripgrep treated those lines as
line-terminators, confirming the bytes were the actual separators. Any
editor save / autoformat / git tooling that normalizes invisibles would
silently break the encoder. Replaced with explicit
/
Python escape sequences.
- set(seen) == set(names) hid ordering regressions: a cursor walk that
dropped a row at a page boundary or returned duplicates could pass.
Reworked the assertion to (1) reject duplicates, (2) require full
coverage, and (3) assert strict positional order for size sort, the
only field with a clock-independent ordering.
- Flaky time.sleep(0.05) between inserts: Windows CI clock resolution is
~15ms, so back-to-back inserts under load could collide and exercise
the tiebreaker instead of the documented path. Removed the sleep and
let the strengthened assertion above carry coverage / no-duplicates,
with size sort carrying strict order.
- Cursor error envelope diverged from the rest of routes.py: cursor 400s
emitted {error: {code, message}} while every other 400 in the file
emits {error: {code, message, details}} via _build_error_response.
Switched to _build_error_response and added the details field to the
AssetsApiError schema in openapi.yaml.
- "Byte-identity fixtures" only checked substring containment, defeating
the test class's stated purpose of pinning the wire format. Switched
to exact-bytes equality against an inline expected payload string per
fixture, so any whitespace / key-order / escape drift fails loudly.
Also dropped Go / json.Marshal references from docstrings — the byte
format is the contract, not the runtime that mints it.
This commit is contained in:
parent
cc62f2a9e8
commit
37764dc40c
@ -224,10 +224,7 @@ async def list_assets_route(request: web.Request) -> web.Response:
|
||||
after=q.after,
|
||||
)
|
||||
except InvalidCursorError as e:
|
||||
return web.json_response(
|
||||
{"error": {"code": "INVALID_CURSOR", "message": str(e)}},
|
||||
status=400,
|
||||
)
|
||||
return _build_error_response(400, "INVALID_CURSOR", str(e))
|
||||
|
||||
summaries = [_build_asset_response(item) for item in result.items]
|
||||
|
||||
|
||||
@ -11,14 +11,13 @@ so replaying a `desc` cursor against an `asc` request fails with
|
||||
malformed.
|
||||
|
||||
Encoding is base64url with no padding. JSON serialization escapes `<`,
|
||||
`>`, `&`, U+2028, and U+2029 to match Go's default `json.Marshal`
|
||||
behavior so asset names containing those characters produce
|
||||
byte-identical cursors across compatible Go implementations.
|
||||
`>`, `&`, U+2028, and U+2029 in encoded string values so asset names
|
||||
containing those characters produce a stable, byte-identical wire form
|
||||
across any compatible implementation of the same payload format.
|
||||
|
||||
Time values are serialized as Unix microseconds (UTC) — microsecond
|
||||
precision matches PostgreSQL's `timestamp` type, so a cursor minted from
|
||||
a stored timestamp compares back exactly without rounding rows in the
|
||||
same millisecond bucket.
|
||||
precision is sufficient to round-trip the timestamps stored by the
|
||||
database without rounding rows in the same millisecond bucket.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@ -67,20 +66,31 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
|
||||
``INVALID_CURSOR`` rather than silently walking the wrong direction.
|
||||
"""
|
||||
if order not in _VALID_ORDERS:
|
||||
raise ValueError(f"order must be one of {_VALID_ORDERS}, got {order!r}")
|
||||
raise InvalidCursorError(f"order must be one of {_VALID_ORDERS}, got {order!r}")
|
||||
# Symmetric input validation: the encoder must reject anything the
|
||||
# decoder rejects, or the same server will mint cursors it then 400s on
|
||||
# the next request.
|
||||
if not id:
|
||||
raise InvalidCursorError("id must be non-empty")
|
||||
if len(id) > MAX_CURSOR_ID_LENGTH:
|
||||
raise InvalidCursorError("id exceeds maximum length")
|
||||
if len(value) > MAX_CURSOR_VALUE_LENGTH:
|
||||
raise InvalidCursorError("value exceeds maximum length")
|
||||
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
||||
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
||||
# Go's default `json.Marshal` escapes these characters in string values; we
|
||||
# do the same so an asset name containing one of them produces byte-
|
||||
# identical cursors across runtimes. None of these characters appear in
|
||||
# JSON structural syntax, so a global replace on the serialized output is
|
||||
# safe — it can only touch characters from the encoded values.
|
||||
# Match the default JSON escaping of HTML-significant characters and JS
|
||||
# line/paragraph separators (U+2028 / U+2029) so an asset name carrying
|
||||
# any of them encodes to identical bytes across runtimes. None of these
|
||||
# characters appear in JSON structural syntax, so a global replace on the
|
||||
# serialized output can only touch encoded values. Use explicit \uXXXX
|
||||
# escapes for U+2028 / U+2029 so the source survives any editor / git
|
||||
# tooling that normalizes invisible separators.
|
||||
raw = (
|
||||
raw.replace("<", "\\u003c")
|
||||
.replace(">", "\\u003e")
|
||||
.replace("&", "\\u0026")
|
||||
.replace("
", "\\u2028")
|
||||
.replace("
", "\\u2029")
|
||||
.replace("\u2028", "\\u2028")
|
||||
.replace("\u2029", "\\u2029")
|
||||
)
|
||||
return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||
|
||||
|
||||
@ -6789,6 +6789,7 @@ components:
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
- details
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
@ -6800,6 +6801,13 @@ components:
|
||||
enum: [INVALID_CURSOR, INVALID_QUERY]
|
||||
message:
|
||||
type: string
|
||||
details:
|
||||
type: object
|
||||
description: |
|
||||
Free-form, code-specific context. `INVALID_QUERY` populates this
|
||||
with an `errors` array of Pydantic validation entries;
|
||||
`INVALID_CURSOR` returns an empty object.
|
||||
additionalProperties: true
|
||||
|
||||
TagInfo:
|
||||
type: object
|
||||
|
||||
@ -1,9 +1,9 @@
|
||||
"""Tests for app.assets.services.cursor.
|
||||
|
||||
The byte-identity fixtures below pin the wire format so a parallel Go
|
||||
implementation can mint exchange-compatible cursors for the same triple
|
||||
(asset name, value, id). Drift here would break frontend pagination
|
||||
against any compatible backend.
|
||||
The byte-identity fixtures below pin the wire format so a parallel
|
||||
implementation in another runtime can mint exchange-compatible cursors
|
||||
for the same payload. Drift here would break frontend pagination against
|
||||
any compatible backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@ -115,17 +115,26 @@ class TestInvalidInputs:
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_empty_id(self):
|
||||
encoded = encode_cursor("created_at", "1", "")
|
||||
# Encoder rejects empty id symmetrically with the decoder, so build the
|
||||
# payload manually to exercise the decoder's missing-id branch.
|
||||
raw = b'{"s":"created_at","v":"1","id":"","o":"desc"}'
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="missing id"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_oversized_id(self):
|
||||
encoded = encode_cursor("created_at", "1", "a" * (MAX_CURSOR_ID_LENGTH + 1))
|
||||
# Encoder enforces the cap symmetrically; hand-build to exercise decode.
|
||||
big_id = "a" * (MAX_CURSOR_ID_LENGTH + 1)
|
||||
raw = ('{"s":"created_at","v":"1","id":"' + big_id + '","o":"desc"}').encode("ascii")
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_oversized_value(self):
|
||||
encoded = encode_cursor("created_at", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
|
||||
# Encoder enforces the cap symmetrically; hand-build to exercise decode.
|
||||
big_v = "v" * (MAX_CURSOR_VALUE_LENGTH + 1)
|
||||
raw = ('{"s":"created_at","v":"' + big_v + '","id":"id-1","o":"desc"}').encode("ascii")
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
@ -194,6 +203,18 @@ class TestEncoderDecoderSymmetry:
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == long_name
|
||||
|
||||
def test_encoder_rejects_empty_id(self):
|
||||
with pytest.raises(InvalidCursorError, match="id must be non-empty"):
|
||||
encode_cursor("created_at", "1", "")
|
||||
|
||||
def test_encoder_rejects_oversized_id(self):
|
||||
with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
|
||||
encode_cursor("created_at", "1", "a" * (MAX_CURSOR_ID_LENGTH + 1))
|
||||
|
||||
def test_encoder_rejects_oversized_value(self):
|
||||
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
|
||||
encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
|
||||
|
||||
|
||||
class TestOrderBinding:
|
||||
def test_order_baked_into_payload(self):
|
||||
@ -232,11 +253,11 @@ class TestOrderBinding:
|
||||
decode_cursor(encoded, ALLOWED, expected_order="desc")
|
||||
|
||||
|
||||
class TestGoCompatJsonEscaping:
|
||||
class TestHtmlSignificantCharEscaping:
|
||||
"""An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode
|
||||
to the same bytes Go's default `json.Marshal` would produce. The fixtures
|
||||
below are generated by Go's encoder; any drift here means cross-runtime
|
||||
byte-identity for HTML-significant characters is broken.
|
||||
to the same escaped wire bytes as any compatible implementation of the
|
||||
same payload format. Drift here breaks cross-runtime byte-identity for
|
||||
those characters.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -268,27 +289,51 @@ class TestGoCompatJsonEscaping:
|
||||
class TestByteIdentityFixtures:
|
||||
"""Pin the wire format so it doesn't drift silently.
|
||||
|
||||
These fixtures hold the exact base64url-encoded bytes a given payload
|
||||
produces. Any change to the encoding (key order, escaping, ordering of
|
||||
structural fields) will fail these tests loudly rather than diverge
|
||||
silently from external consumers.
|
||||
These fixtures assert exact byte equality of the encoded JSON payload —
|
||||
a change in key order, escape choice, separator whitespace, or anything
|
||||
else that shifts a byte fails the test loudly rather than diverging
|
||||
silently from any external consumer of the same payload format.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort_field, value, id, order",
|
||||
"sort_field, value, id, order, expected_payload",
|
||||
[
|
||||
("created_at", "1716200000000000", "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7", "desc"),
|
||||
("size", "1024", "asset-123", "asc"),
|
||||
("name", "my-asset.png", "asset-abc", "desc"),
|
||||
("name", "foo<bar>&baz.png", "asset-html", "desc"), # exercises Go-compat escaping
|
||||
(
|
||||
"created_at",
|
||||
"1716200000000000",
|
||||
"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7",
|
||||
"desc",
|
||||
'{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"size",
|
||||
"1024",
|
||||
"asset-123",
|
||||
"asc",
|
||||
'{"s":"size","v":"1024","id":"asset-123","o":"asc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"my-asset.png",
|
||||
"asset-abc",
|
||||
"desc",
|
||||
'{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"foo<bar>&baz.png",
|
||||
"asset-html",
|
||||
"desc",
|
||||
# `<`, `>`, `&` escape to <, >, & in the value.
|
||||
'{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_encoded_payload_shape_pinned(self, sort_field, value, id, order):
|
||||
def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload):
|
||||
encoded = encode_cursor(sort_field, value, id, order=order)
|
||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
||||
decoded_str = decoded_bytes.decode("ascii")
|
||||
# Keys appear in the documented order; binding `o` is present.
|
||||
for needle in (f'"s":"{sort_field}"', f'"id":"{id}"', f'"o":"{order}"'):
|
||||
assert needle in decoded_str, (
|
||||
f"Expected {needle!r} in payload {decoded_str!r}"
|
||||
)
|
||||
assert decoded_bytes.decode("utf-8") == expected_payload, (
|
||||
f"wire format drifted for sort={sort_field!r}, value={value!r}:\n"
|
||||
f" expected: {expected_payload!r}\n"
|
||||
f" actual: {decoded_bytes.decode('utf-8')!r}"
|
||||
)
|
||||
|
||||
@ -4,8 +4,6 @@ These tests exercise the handler/service/query path end-to-end;
|
||||
cursor-encoding-level tests live in
|
||||
tests-unit/assets_test/services/test_cursor.py.
|
||||
"""
|
||||
import time
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
@ -201,12 +199,16 @@ def test_cursor_walks_for_non_name_sorts(sort_field, http: requests.Session, api
|
||||
Without this, the `created_at` / `updated_at` (time-encoded micros) and
|
||||
`size` (int-encoded) cursor paths go entirely unexercised end-to-end.
|
||||
"""
|
||||
# Stagger create_time slightly so created_at / updated_at sort is well-defined.
|
||||
# Sizes increase strictly by index, so `size desc` has a deterministic
|
||||
# expected order. Time-based sorts (created_at / updated_at) can tie when
|
||||
# rows are inserted faster than the DB's timestamp resolution; for those
|
||||
# we check coverage and no-duplicates and let the keyset tiebreaker do
|
||||
# the rest, instead of sleeping between inserts and asserting an order
|
||||
# that depends on clock granularity.
|
||||
names = []
|
||||
for i in range(4):
|
||||
n = f"cursor_{sort_field}_{i:02d}.safetensors"
|
||||
asset_factory(n, ["models", "checkpoints", "unit-tests", f"cursor-{sort_field}"], {}, make_asset_bytes(n, size=2048 + i))
|
||||
time.sleep(0.05) # ensure distinct timestamps
|
||||
names.append(n)
|
||||
|
||||
params = {
|
||||
@ -232,7 +234,20 @@ def test_cursor_walks_for_non_name_sorts(sort_field, http: requests.Session, api
|
||||
break
|
||||
assert pages < 10, "guard against runaway cursor loop"
|
||||
|
||||
assert set(seen) == set(names), f"missing items for sort={sort_field}: expected {set(names)}, got {set(seen)}"
|
||||
# No duplicates: a faulty keyset boundary that returns the same row across
|
||||
# two pages must fail this check.
|
||||
assert len(seen) == len(set(seen)), (
|
||||
f"cursor walk repeated rows for sort={sort_field}: {seen}"
|
||||
)
|
||||
# Full coverage: every seeded asset reached exactly once.
|
||||
assert set(seen) == set(names), (
|
||||
f"missing items for sort={sort_field}: expected {set(names)}, got {set(seen)}"
|
||||
)
|
||||
# Strict order check for the only field with a clock-independent ordering.
|
||||
if sort_field == "size":
|
||||
assert seen == list(reversed(names)), (
|
||||
f"size cursor walked out of order: got {seen}, expected {list(reversed(names))}"
|
||||
)
|
||||
|
||||
|
||||
def test_cursor_order_mismatch_returns_400(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user