diff --git a/app/assets/services/cursor.py b/app/assets/services/cursor.py index 1bd8af15a..41ad99573 100644 --- a/app/assets/services/cursor.py +++ b/app/assets/services/cursor.py @@ -92,7 +92,16 @@ def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> .replace("\u2028", "\\u2028") .replace("\u2029", "\\u2029") ) - return base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii") + encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii") + # Final wire-size guard: the per-field caps above are char-counted, but the + # wire cap applies to the base64url of the UTF-8-encoded, escape-expanded + # payload. A value full of multibyte or HTML-significant characters (e.g. + # 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even + # though it passes the char-count check. Refuse to mint a cursor the decoder + # on the next request would reject. + if len(encoded) > MAX_ENCODED_CURSOR_LENGTH: + raise InvalidCursorError("encoded cursor exceeds maximum length") + return encoded def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str: diff --git a/tests-unit/assets_test/services/test_cursor.py b/tests-unit/assets_test/services/test_cursor.py index f015b9708..db114ad47 100644 --- a/tests-unit/assets_test/services/test_cursor.py +++ b/tests-unit/assets_test/services/test_cursor.py @@ -215,6 +215,21 @@ class TestEncoderDecoderSymmetry: with pytest.raises(InvalidCursorError, match="value exceeds maximum length"): encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1") + def test_encoder_rejects_multibyte_value_over_wire_cap(self): + """A value that passes the char-count cap can still inflate past the + wire cap once UTF-8-encoded. Asset name made of 512 × multibyte + characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not + minted into a cursor the next request will 400.""" + with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"): + encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte") + + def test_encoder_rejects_escape_heavy_value_over_wire_cap(self): + """Same wire-cap concern via escape expansion: each `<` serializes to + the six-byte sequence `\\u003c`, so 512 of them blow past the encoded + cap even though the raw char count is within the per-field limit.""" + with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"): + encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape") + class TestOrderBinding: def test_order_baked_into_payload(self):