From 2015bbb54a118bccdfb1c94d77072eea49ce58a3 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Thu, 21 May 2026 14:41:17 -0700 Subject: [PATCH] fix(assets): cap cursors by encoded wire size, not just char count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Char-count guards on value/id can still let multibyte or escape-heavy inputs blow past MAX_ENCODED_CURSOR_LENGTH once UTF-8 + escape expansion + base64url runs. A 512-character name of 'é' (2 bytes UTF-8) or '<' (serializes to the 6-byte '<' escape) passes the char check, mints a ~1500-byte cursor, then 400s when handed back on the next request. Compute the final encoded form and reject it before returning if it exceeds the wire cap. Adds regression tests for both inflation paths. --- app/assets/services/cursor.py | 11 ++++++++++- tests-unit/assets_test/services/test_cursor.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/app/assets/services/cursor.py b/app/assets/services/cursor.py index 1bd8af15a..41ad99573 100644 --- a/app/assets/services/cursor.py +++ b/app/assets/services/cursor.py @@ -92,7 +92,16 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> .replace("\u2028", "\\u2028") .replace("\u2029", "\\u2029") ) - return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii") + encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii") + # Final wire-size guard: the per-field caps above are char-counted, but the + # wire cap applies to the base64url of the UTF-8-encoded, escape-expanded + # payload. A value full of multibyte or HTML-significant characters (e.g. + # 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even + # though it passes the char-count check. Refuse to mint a cursor the decoder + # on the next request would reject. + if len(encoded) > MAX_ENCODED_CURSOR_LENGTH: + raise InvalidCursorError("encoded cursor exceeds maximum length") + return encoded def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str: diff --git a/tests-unit/assets_test/services/test_cursor.py b/tests-unit/assets_test/services/test_cursor.py index f015b9708..db114ad47 100644 --- a/tests-unit/assets_test/services/test_cursor.py +++ b/tests-unit/assets_test/services/test_cursor.py @@ -215,6 +215,21 @@ class TestEncoderDecoderSymmetry: with pytest.raises(InvalidCursorError, match="value exceeds maximum length"): encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1") + def test_encoder_rejects_multibyte_value_over_wire_cap(self): + """A value that passes the char-count cap can still inflate past the + wire cap once UTF-8-encoded. Asset name made of 512 × multibyte + characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not + minted into a cursor the next request will 400.""" + with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"): + encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte") + + def test_encoder_rejects_escape_heavy_value_over_wire_cap(self): + """Same wire-cap concern via escape expansion: each `<` serializes to + the six-byte sequence `\\u003c`, so 512 of them blow past the encoded + cap even though the raw char count is within the per-field limit.""" + with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"): + encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape") + class TestOrderBinding: def test_order_baked_into_payload(self):