fix(assets): cap cursors by encoded wire size, not just char count
Some checks failed
Python Linting / Run Ruff (push) Has been cancelled
Python Linting / Run Pylint (push) Has been cancelled
Build package / Build Test (3.10) (push) Has been cancelled
Build package / Build Test (3.11) (push) Has been cancelled
Build package / Build Test (3.12) (push) Has been cancelled
Build package / Build Test (3.13) (push) Has been cancelled
Build package / Build Test (3.14) (push) Has been cancelled

Char-count guards on value/id can still let multibyte or escape-heavy
inputs blow past MAX_ENCODED_CURSOR_LENGTH once UTF-8 + escape expansion
+ base64url runs. A 512-character name of 'é' (2 bytes UTF-8) or '<'
(serializes to the 6-byte '<' escape) passes the char check, mints
a ~1500-byte cursor, then 400s when handed back on the next request.

Compute the final encoded form and reject it before returning if it
exceeds the wire cap. Adds regression tests for both inflation paths.
This commit is contained in:
Matt Miller 2026-05-21 14:41:17 -07:00
parent 37764dc40c
commit 2015bbb54a
2 changed files with 25 additions and 1 deletions

View File

@ -92,7 +92,16 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") ->
.replace("\u2028", "\\u2028") .replace("\u2028", "\\u2028")
.replace("\u2029", "\\u2029") .replace("\u2029", "\\u2029")
) )
return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii") encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
# Final wire-size guard: the per-field caps above are char-counted, but the
# wire cap applies to the base64url of the UTF-8-encoded, escape-expanded
# payload. A value full of multibyte or HTML-significant characters (e.g.
# 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even
# though it passes the char-count check. Refuse to mint a cursor the decoder
# on the next request would reject.
if len(encoded) > MAX_ENCODED_CURSOR_LENGTH:
raise InvalidCursorError("encoded cursor exceeds maximum length")
return encoded
def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str: def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:

View File

@ -215,6 +215,21 @@ class TestEncoderDecoderSymmetry:
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"): with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1") encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
def test_encoder_rejects_multibyte_value_over_wire_cap(self):
"""A value that passes the char-count cap can still inflate past the
wire cap once UTF-8-encoded. Asset name made of 512 × multibyte
characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not
minted into a cursor the next request will 400."""
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte")
def test_encoder_rejects_escape_heavy_value_over_wire_cap(self):
"""Same wire-cap concern via escape expansion: each `<` serializes to
the six-byte sequence `\\u003c`, so 512 of them blow past the encoded
cap even though the raw char count is within the per-field limit."""
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape")
class TestOrderBinding: class TestOrderBinding:
def test_order_baked_into_payload(self): def test_order_baked_into_payload(self):