"""Tests for app.assets.services.cursor. The byte-identity fixtures below pin the wire format so a parallel implementation in another runtime can mint exchange-compatible cursors for the same payload. Drift here would break frontend pagination against any compatible backend. """ from __future__ import annotations import base64 from datetime import datetime, timedelta, timezone import pytest from app.assets.services.cursor import ( MAX_CURSOR_ID_LENGTH, MAX_CURSOR_VALUE_LENGTH, MAX_ENCODED_CURSOR_LENGTH, CursorPayload, InvalidCursorError, decode_cursor, decode_cursor_int, decode_cursor_time, encode_cursor, encode_cursor_from_time, ) ALLOWED = ("created_at", "updated_at", "name", "size") class TestRoundTrip: @pytest.mark.parametrize( "sort_field, value, id", [ ("created_at", "1716200000000000", "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7"), ("size", "1024", "asset-123"), ("name", "my-asset.png", "asset-abc"), ("name", "résumé.txt", "asset-uni"), ], ) def test_encode_decode(self, sort_field, value, id): encoded = encode_cursor(sort_field, value, id) assert encoded != "" payload = decode_cursor(encoded, ALLOWED) assert payload.sort_field == sort_field assert payload.value == value assert payload.id == id class TestTimeCursor: def test_microsecond_precision_preserved(self): # Pick a time with non-zero microseconds — encoding at ms would lose the µs. ts = datetime(2024, 5, 20, 12, 53, 20, 123456, tzinfo=timezone.utc) encoded = encode_cursor_from_time("created_at", ts, "id-1") payload = decode_cursor(encoded, ALLOWED) # Value must be a microsecond integer string, not a millisecond one. assert payload.value == "1716209600123456" decoded = decode_cursor_time(payload) assert decoded == ts def test_decode_returns_utc(self): payload = CursorPayload(sort_field="created_at", value="1716200000123456", id="id-1", order="desc") decoded = decode_cursor_time(payload) assert decoded.tzinfo == timezone.utc def test_naive_datetime_rejected_on_encode(self): naive = datetime(2024, 5, 20, 12, 0, 0) with pytest.raises(ValueError): encode_cursor_from_time("created_at", naive, "id-1") def test_non_integer_value_rejected_on_decode(self): with pytest.raises(InvalidCursorError): decode_cursor_time(CursorPayload("created_at", "not-a-number", "id-1", "desc")) def test_none_payload_rejected(self): with pytest.raises(InvalidCursorError): decode_cursor_time(None) def test_non_utc_aware_normalized(self): # Same instant, different timezone — must encode to the same micros. utc_ts = datetime(2024, 5, 20, 12, 0, 0, tzinfo=timezone.utc) offset_ts = utc_ts.astimezone(timezone(timedelta(hours=-5))) assert encode_cursor_from_time("created_at", utc_ts, "x") == encode_cursor_from_time( "created_at", offset_ts, "x" ) class TestIntCursor: def test_decode_int(self): assert decode_cursor_int(CursorPayload("size", "1024", "id-1", "desc")) == 1024 def test_decode_int_rejects_non_int(self): with pytest.raises(InvalidCursorError): decode_cursor_int(CursorPayload("size", "abc", "id-1", "desc")) def test_decode_int_rejects_none(self): with pytest.raises(InvalidCursorError): decode_cursor_int(None) class TestInvalidInputs: def test_oversized_cursor(self): oversized = "a" * (MAX_ENCODED_CURSOR_LENGTH + 1) with pytest.raises(InvalidCursorError, match="maximum length"): decode_cursor(oversized, ALLOWED) def test_not_base64(self): with pytest.raises(InvalidCursorError): decode_cursor("not base64!!!", ALLOWED) def test_not_json(self): encoded = base64.urlsafe_b64encode(b"definitely not json").rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError): decode_cursor(encoded, ALLOWED) def test_empty_id(self): # Encoder rejects empty id symmetrically with the decoder, so build the # payload manually to exercise the decoder's missing-id branch. raw = b'{"s":"created_at","v":"1","id":"","o":"desc"}' encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="missing id"): decode_cursor(encoded, ALLOWED) def test_oversized_id(self): # Encoder enforces the cap symmetrically; hand-build to exercise decode. big_id = "a" * (MAX_CURSOR_ID_LENGTH + 1) raw = ('{"s":"created_at","v":"1","id":"' + big_id + '","o":"desc"}').encode("ascii") encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="id exceeds maximum length"): decode_cursor(encoded, ALLOWED) def test_oversized_value(self): # Encoder enforces the cap symmetrically; hand-build to exercise decode. big_v = "v" * (MAX_CURSOR_VALUE_LENGTH + 1) raw = ('{"s":"created_at","v":"' + big_v + '","id":"id-1","o":"desc"}').encode("ascii") encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="value exceeds maximum length"): decode_cursor(encoded, ALLOWED) def test_unsupported_sort_field(self): encoded = encode_cursor("execution_time", "1", "id-1") with pytest.raises(InvalidCursorError, match="unsupported sort field"): decode_cursor(encoded, ALLOWED) def test_no_allowed_fields_rejects_everything(self): encoded = encode_cursor("created_at", "1", "id-1") with pytest.raises(InvalidCursorError): decode_cursor(encoded, ()) def test_non_dict_payload_rejected(self): encoded = base64.urlsafe_b64encode(b'["array","not","dict"]').rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="expected object"): decode_cursor(encoded, ALLOWED) class TestEncodeAtCapsFits: def test_max_field_lengths_fit_wire_cap(self): # Worst-case payload: value and id at their per-field caps, with a long # sort field name. The encoded cursor must fit within MAX_ENCODED_CURSOR_LENGTH # so the wire cap cannot reject a cursor the encoder mints at the per-field caps. value = "v" * MAX_CURSOR_VALUE_LENGTH id = "i" * MAX_CURSOR_ID_LENGTH sort_field = "very_long_sort_field_name" encoded = encode_cursor(sort_field, value, id) assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH payload = decode_cursor(encoded, (sort_field,)) assert payload.value == value assert payload.id == id class TestDatetimeOverflow: """Crafted cursors with extreme micros must map to InvalidCursorError, not OverflowError/OSError leaking as 500. """ @pytest.mark.parametrize( "micros_str", [ "999999999999999999999", # 10^21 µs — past datetime.MAX_YEAR by ~14 orders "-999999999999999999999", # symmetric negative — pre-epoch overflow ], ) def test_out_of_range_micros_rejected(self, micros_str): encoded = encode_cursor("created_at", micros_str, "asset-x") payload = decode_cursor(encoded, ALLOWED) with pytest.raises(InvalidCursorError): decode_cursor_time(payload) class TestEncoderDecoderSymmetry: """The encoder must reject inputs the decoder rejects, or the same server will mint a cursor it then 400s on the next request. """ def test_long_name_within_cap_round_trips(self): """Assets allow names up to 512 chars (`String(512)`); the cursor encoder must round-trip a value at that cap so a freshly minted cursor never fails decode on the next request.""" long_name = "n" * MAX_CURSOR_VALUE_LENGTH encoded = encode_cursor("name", long_name, "asset-x") payload = decode_cursor(encoded, ALLOWED) assert payload.value == long_name def test_encoder_rejects_empty_id(self): with pytest.raises(InvalidCursorError, match="id must be non-empty"): encode_cursor("created_at", "1", "") def test_encoder_rejects_oversized_id(self): with pytest.raises(InvalidCursorError, match="id exceeds maximum length"): encode_cursor("created_at", "1", "a" * (MAX_CURSOR_ID_LENGTH + 1)) def test_encoder_rejects_oversized_value(self): with pytest.raises(InvalidCursorError, match="value exceeds maximum length"): encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1") class TestOrderBinding: def test_order_baked_into_payload(self): encoded = encode_cursor("created_at", "1", "id-1", order="asc") payload = decode_cursor(encoded, ALLOWED) assert payload.order == "asc" def test_mismatched_order_rejected(self): encoded = encode_cursor("created_at", "1", "id-1", order="desc") with pytest.raises(InvalidCursorError, match="does not match request order"): decode_cursor(encoded, ALLOWED, expected_order="asc") def test_matching_order_accepted(self): encoded = encode_cursor("created_at", "1", "id-1", order="desc") payload = decode_cursor(encoded, ALLOWED, expected_order="desc") assert payload.order == "desc" def test_invalid_order_token_rejected_on_encode(self): with pytest.raises(ValueError): encode_cursor("created_at", "1", "id-1", order="sideways") def test_invalid_order_token_rejected_on_decode(self): # Hand-craft a payload with an illegal `o` value. raw = b'{"s":"name","v":"x","id":"id-1","o":"sideways"}' encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="unsupported order"): decode_cursor(encoded, ALLOWED) def test_cursor_without_order_rejected(self): """`o` is mandatory. A cursor minted without it is rejected as malformed rather than silently walking the keyset in whatever direction the request happens to ask for.""" raw = b'{"s":"name","v":"x","id":"id-1"}' encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") with pytest.raises(InvalidCursorError, match="missing or non-string o"): decode_cursor(encoded, ALLOWED, expected_order="desc") class TestHtmlSignificantCharEscaping: """An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode to the same escaped wire bytes as any compatible implementation of the same payload format. Drift here breaks cross-runtime byte-identity for those characters. """ @pytest.mark.parametrize( "value, escaped_substring", [ ("foo.png", "\\u003c"), # `<` escaped ("foo.png", "\\u003e"), # `>` escaped ("foo&bar.png", "\\u0026"), ("foo
bar.png", "\\u2028"), # JS line separator ("foo
bar.png", "\\u2029"), # JS paragraph separator ], ) def test_html_significant_chars_escaped(self, value, escaped_substring): encoded = encode_cursor("name", value, "id-1") decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4)) assert escaped_substring in decoded_bytes.decode("ascii"), ( f"Expected {escaped_substring!r} in serialized payload, got: {decoded_bytes!r}" ) def test_value_round_trips_through_escape(self): """Encoding then decoding a value with `<>&` should yield the original string — the escape only affects the wire form, not the decoded value.""" original = "foo<&>bar.png" encoded = encode_cursor("name", original, "id-1") payload = decode_cursor(encoded, ALLOWED) assert payload.value == original class TestByteIdentityFixtures: """Pin the wire format so it doesn't drift silently. These fixtures assert exact byte equality of the encoded JSON payload — a change in key order, escape choice, separator whitespace, or anything else that shifts a byte fails the test loudly rather than diverging silently from any external consumer of the same payload format. """ @pytest.mark.parametrize( "sort_field, value, id, order, expected_payload", [ ( "created_at", "1716200000000000", "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7", "desc", '{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}', ), ( "size", "1024", "asset-123", "asc", '{"s":"size","v":"1024","id":"asset-123","o":"asc"}', ), ( "name", "my-asset.png", "asset-abc", "desc", '{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}', ), ( "name", "foo&baz.png", "asset-html", "desc", # `<`, `>`, `&` escape to <, >, & in the value. '{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}', ), ], ) def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload): encoded = encode_cursor(sort_field, value, id, order=order) decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4)) assert decoded_bytes.decode("utf-8") == expected_payload, ( f"wire format drifted for sort={sort_field!r}, value={value!r}:\n" f" expected: {expected_payload!r}\n" f" actual: {decoded_bytes.decode('utf-8')!r}" )