Add cursor pagination for GET api assets (#14014)

Amp-Thread-ID: https://ampcode.com/threads/T-019e4ca5-b71a-7168-8f56-58b2325f34c3 Co-authored-by: Amp <amp@ampcode.com>
2026-05-30 19:07:25 +08:00 · 2026-05-22 10:53:01 +12:00 · 2026-05-22 10:53:01 +12:00 · 00c88a4634
commit 00c88a4634
parent 916b33c795
11 changed files with 1258 additions and 17 deletions
--- a/app/assets/api/routes.py
+++ b/app/assets/api/routes.py
@ -39,6 +39,7 @@ from app.assets.services import (
    update_asset_metadata,
    upload_from_temp_path,
 )
 from app.assets.services.cursor import InvalidCursorError
 from app.assets.services.path_utils import compute_paths_for_response
 from app.assets.services.tagging import list_tag_histogram
@ -182,7 +183,7 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu
        user_metadata=result.ref.user_metadata or {},
        metadata=result.ref.system_metadata,
        job_id=result.ref.job_id,
-        prompt_id=result.ref.job_id,  # deprecated: mirrors job_id for cloud compat
+        prompt_id=result.ref.job_id,  # deprecated alias of job_id, kept for compatibility
        created_at=result.ref.created_at,
        updated_at=result.ref.updated_at,
        last_access_time=result.ref.last_access_time,
@ -219,24 +220,37 @@ async def list_assets_route(request: web.Request) -> web.Response:
    order_candidate = (q.order or "desc").lower()
    order = order_candidate if order_candidate in {"asc", "desc"} else "desc"
-    result = list_assets_page(
+    try:
-        owner_id=USER_MANAGER.get_request_user_id(request),
+        result = list_assets_page(
-        include_tags=q.include_tags,
+            owner_id=USER_MANAGER.get_request_user_id(request),
-        exclude_tags=q.exclude_tags,
+            include_tags=q.include_tags,
-        name_contains=q.name_contains,
+            exclude_tags=q.exclude_tags,
-        metadata_filter=q.metadata_filter,
+            name_contains=q.name_contains,
-        limit=q.limit,
+            metadata_filter=q.metadata_filter,
-        offset=q.offset,
+            limit=q.limit,
-        sort=sort,
+            offset=q.offset,
-        order=order,
+            sort=sort,
-    )
+            order=order,
            after=q.after,
        )
    except InvalidCursorError as e:
        return _build_error_response(400, "INVALID_CURSOR", str(e))
    summaries = [_build_asset_response(item) for item in result.items]
    # has_more semantics differ by mode:
    #   - cursor mode: a non-empty next_cursor means there are more results.
    #   - offset mode: derived from total - (offset + page size).
    if q.after is not None:
        has_more = result.next_cursor is not None
    else:
        has_more = (q.offset + len(summaries)) < result.total
    payload = schemas_out.AssetsList(
        assets=summaries,
        total=result.total,
-        has_more=(q.offset + len(summaries)) < result.total,
+        has_more=has_more,
        next_cursor=result.next_cursor,
    )
    return web.json_response(payload.model_dump(mode="json", exclude_none=True))
--- a/app/assets/api/schemas_in.py
+++ b/app/assets/api/schemas_in.py
@ -59,6 +59,11 @@ class ListAssetsQuery(BaseModel):
    limit: conint(ge=1, le=500) = 20
    offset: conint(ge=0) = 0
    # Opaque keyset cursor. When supplied, `offset` is ignored. Cursor pagination
    # is supported for sort values `created_at`, `updated_at`, `name`, `size`.
    # Supplying `after` together with `sort=last_access_time` returns
    # 400 INVALID_CURSOR; that sort only supports offset/limit.
    after: str | None = None
    sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = (
        "created_at"
--- a/app/assets/api/schemas_out.py
+++ b/app/assets/api/schemas_out.py
@ -43,6 +43,8 @@ class AssetsList(BaseModel):
    assets: list[Asset]
    total: int
    has_more: bool
    # Opaque cursor for the next page. Omitted when there are no more results.
    next_cursor: str | None = None
 class TagUsage(BaseModel):
--- a/app/assets/database/queries/asset_reference.py
+++ b/app/assets/database/queries/asset_reference.py
@ -266,9 +266,18 @@ def list_references_page(
    metadata_filter: dict | None = None,
    sort: str | None = None,
    order: str | None = None,
    after_cursor_value: object | None = None,
    after_cursor_id: str | None = None,
 ) -> tuple[list[AssetReference], dict[str, list[str]], int]:
    """List references with pagination, filtering, and sorting.
    When ``after_cursor_value``/``after_cursor_id`` are supplied the query uses
    keyset pagination — ``offset`` is ignored and a WHERE clause selects rows
    strictly after the given ``(sort_col, id)`` position in the active sort
    direction. The cursor value must already be typed for the column
    (datetime for time sorts, int for size, str for name); the caller decodes
    the opaque cursor string and resolves to the typed value.
    Returns (references, tag_map, total_count).
    """
    base = (
@ -297,9 +306,31 @@ def list_references_page(
        "size": Asset.size_bytes,
    }
    sort_col = sort_map.get(sort, AssetReference.created_at)
-    sort_exp = sort_col.desc() if order == "desc" else sort_col.asc()
+    descending = order == "desc"
-    base = base.order_by(sort_exp).limit(limit).offset(offset)
+    # Keyset WHERE: (sort_col, id) strictly less-than / greater-than the cursor.
    # Equivalent to: sort_col <op> v  OR  (sort_col = v AND id <op> cursor_id).
    if after_cursor_value is not None and after_cursor_id is not None:
        if descending:
            keyset = sa.or_(
                sort_col < after_cursor_value,
                sa.and_(sort_col == after_cursor_value, AssetReference.id < after_cursor_id),
            )
        else:
            keyset = sa.or_(
                sort_col > after_cursor_value,
                sa.and_(sort_col == after_cursor_value, AssetReference.id > after_cursor_id),
            )
        base = base.where(keyset)
    # Secondary ORDER BY id (matching the primary direction) gives the keyset
    # comparison a deterministic tiebreaker on duplicate sort_col values.
    id_exp = AssetReference.id.desc() if descending else AssetReference.id.asc()
    sort_exp = sort_col.desc() if descending else sort_col.asc()
    base = base.order_by(sort_exp, id_exp).limit(limit)
    if after_cursor_id is None:
        base = base.offset(offset)
    count_stmt = (
        select(sa.func.count())
--- a/app/assets/services/asset_management.py
+++ b/app/assets/services/asset_management.py
@ -1,8 +1,19 @@
 import contextlib
 import mimetypes
 import os
 from datetime import timezone
 from typing import Sequence
 from app.assets.services.cursor import (
    CursorPayload,
    InvalidCursorError,
    decode_cursor,
    decode_cursor_int,
    decode_cursor_time,
    encode_cursor,
    encode_cursor_from_time,
 )
 from app.assets.database.models import Asset
 from app.assets.database.queries import (
@ -242,6 +253,11 @@ def get_asset_by_hash(asset_hash: str) -> AssetData | None:
        return extract_asset_data(asset)
 # Sort fields that support cursor pagination. `last_access_time` is not
 # in this list — it falls back to offset/limit.
 _CURSOR_SORT_FIELDS = ("created_at", "updated_at", "name", "size")
 def list_assets_page(
    owner_id: str = "",
    include_tags: Sequence[str] | None = None,
@ -252,7 +268,39 @@ def list_assets_page(
    offset: int = 0,
    sort: str = "created_at",
    order: str = "desc",
    after: str | None = None,
 ) -> ListAssetsResult:
    """List assets with optional cursor pagination.
    When ``after`` is supplied it overrides ``offset``. The cursor's sort field
    must match ``sort`` and be in the cursor-supported allowlist; mismatches
    raise InvalidCursorError so the handler can map to 400 INVALID_CURSOR.
    """
    cursor_value: object | None = None
    cursor_id: str | None = None
    # Mint next_cursor on every page where the sort is cursor-supported, not
    # only when the request itself arrived with a cursor. Otherwise a first
    # request (no `after`) returns next_cursor=None and the client can never
    # enter cursor mode.
    mint_cursor = sort in _CURSOR_SORT_FIELDS
    if after is not None:
        if sort not in _CURSOR_SORT_FIELDS:
            raise InvalidCursorError(
                f"cursor pagination is not supported for sort={sort!r}"
            )
        payload = decode_cursor(after, _CURSOR_SORT_FIELDS, expected_order=order)
        if payload.sort_field != sort:
            raise InvalidCursorError(
                f"cursor sort field {payload.sort_field!r} does not match request sort {sort!r}"
            )
        cursor_value, cursor_id = _resolve_cursor_value(payload), payload.id
    # Over-fetch by one row so we can distinguish "exactly `limit` rows total
    # remaining" from "more rows past this page" without a second query. Drop
    # the sentinel before returning.
    fetch_limit = limit + 1 if mint_cursor else limit
    with create_session() as session:
        refs, tag_map, total = list_references_page(
            session,
@ -261,12 +309,22 @@ def list_assets_page(
            exclude_tags=exclude_tags,
            name_contains=name_contains,
            metadata_filter=metadata_filter,
-            limit=limit,
+            limit=fetch_limit,
            offset=offset,
            sort=sort,
            order=order,
            after_cursor_value=cursor_value,
            after_cursor_id=cursor_id,
        )
        next_cursor: str | None = None
        if mint_cursor and len(refs) > limit:
            # There's at least one more row past this page — mint a cursor from
            # the last row of the page (i.e. index `limit - 1`, since we
            # over-fetched), and drop the sentinel.
            next_cursor = _encode_next_cursor(refs[limit - 1], sort, order)
            refs = refs[:limit]
        items: list[AssetSummaryData] = []
        for ref in refs:
            items.append(
@ -277,7 +335,39 @@ def list_assets_page(
                )
            )
-        return ListAssetsResult(items=items, total=total)
+        return ListAssetsResult(items=items, total=total, next_cursor=next_cursor)
 def _resolve_cursor_value(payload: CursorPayload) -> object:
    """Map a decoded cursor payload to a column-typed Python value."""
    if payload.sort_field in ("created_at", "updated_at"):
        # DB stores naive UTC; strip tzinfo so the comparison binds against a
        # `TIMESTAMP WITHOUT TIME ZONE` column without an offset shift.
        return decode_cursor_time(payload).replace(tzinfo=None)
    if payload.sort_field == "size":
        return decode_cursor_int(payload)
    return payload.value  # name, str-typed
 def _encode_next_cursor(ref, sort: str, order: str) -> str | None:
    """Mint a cursor pointing at *ref* for the given sort dimension.
    Returns None when the boundary row carries a NULL sort value (e.g. an asset
    record whose size_bytes hasn't been backfilled). Continuing pagination
    across a NULL boundary is undefined under keyset ordering — better to
    truncate cleanly here than to mint a cursor that mis-positions.
    """
    if sort == "name":
        return encode_cursor("name", ref.name, ref.id, order=order)
    if sort == "size":
        if ref.asset is None or ref.asset.size_bytes is None:
            return None
        return encode_cursor("size", str(ref.asset.size_bytes), ref.id, order=order)
    # created_at / updated_at — DB datetimes are naive UTC; attach tz before encoding.
    value = ref.created_at if sort == "created_at" else ref.updated_at
    if value is None:
        return None
    return encode_cursor_from_time(sort, value.replace(tzinfo=timezone.utc), ref.id, order=order)
 def resolve_hash_to_path(
--- a/app/assets/services/cursor.py
+++ b/app/assets/services/cursor.py
@ -0,0 +1,225 @@
 """Opaque keyset-pagination cursor for /api/assets.
 Payload JSON uses short keys to keep the encoded length small:
    {"s": <sort_field>, "v": <value>, "id": <id>, "o": <order>}
 The `o` key binds the cursor to the sort direction it was minted under,
 so replaying a `desc` cursor against an `asc` request fails with
 ``INVALID_CURSOR`` rather than silently walking the wrong direction.
 `o` is mandatory on every payload — a cursor without it is rejected as
 malformed.
 Encoding is base64url with no padding. JSON serialization escapes `<`,
 `>`, `&`, U+2028, and U+2029 in encoded string values so asset names
 containing those characters produce a stable, byte-identical wire form
 across any compatible implementation of the same payload format.
 Time values are serialized as Unix microseconds (UTC) — microsecond
 precision is sufficient to round-trip the timestamps stored by the
 database without rounding rows in the same millisecond bucket.
 """
 from __future__ import annotations
 import base64
 import json
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from typing import Iterable, Optional
 class InvalidCursorError(ValueError):
    """Raised on a malformed, oversized, or unsupported-sort-field cursor.
    Map to a 400 response with code ``INVALID_CURSOR`` at the handler.
    """
 # Wire-format length caps. Cursors are user-controlled, so caps protect the
 # decode path from oversized allocations and downstream SQL predicates from
 # unbounded strings.
 #
 # MAX_CURSOR_VALUE_LENGTH is 512 to fit the `AssetReference.name` column max
 # (`String(512)`) — otherwise a long-named asset would mint a cursor the same
 # server then refuses on the next request.
 MAX_ENCODED_CURSOR_LENGTH = 1024
 MAX_CURSOR_VALUE_LENGTH = 512
 MAX_CURSOR_ID_LENGTH = 128
@dataclass(frozen=True)
 class CursorPayload:
    sort_field: str
    value: str
    id: str
    order: str
 _VALID_ORDERS = ("asc", "desc")
 def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
    """Encode a cursor payload as a base64url (no-padding) string.
    `order` binds the cursor to the sort direction it was minted under so a
    later request with a flipped `order` query parameter is rejected with
    ``INVALID_CURSOR`` rather than silently walking the wrong direction.
    """
    if order not in _VALID_ORDERS:
        raise InvalidCursorError(f"order must be one of {_VALID_ORDERS}, got {order!r}")
    # Symmetric input validation: the encoder must reject anything the
    # decoder rejects, or the same server will mint cursors it then 400s on
    # the next request.
    if not id:
        raise InvalidCursorError("id must be non-empty")
    if len(id) > MAX_CURSOR_ID_LENGTH:
        raise InvalidCursorError("id exceeds maximum length")
    if len(value) > MAX_CURSOR_VALUE_LENGTH:
        raise InvalidCursorError("value exceeds maximum length")
    payload = {"s": sort_field, "v": value, "id": id, "o": order}
    raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
    # Match the default JSON escaping of HTML-significant characters and JS
    # line/paragraph separators (U+2028 / U+2029) so an asset name carrying
    # any of them encodes to identical bytes across runtimes. None of these
    # characters appear in JSON structural syntax, so a global replace on the
    # serialized output can only touch encoded values. Use explicit \uXXXX
    # escapes for U+2028 / U+2029 so the source survives any editor / git
    # tooling that normalizes invisible separators.
    raw = (
        raw.replace("<", "\\u003c")
           .replace(">", "\\u003e")
           .replace("&", "\\u0026")
           .replace("\u2028", "\\u2028")
           .replace("\u2029", "\\u2029")
    )
    encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
    # Final wire-size guard: the per-field caps above are char-counted, but the
    # wire cap applies to the base64url of the UTF-8-encoded, escape-expanded
    # payload. A value full of multibyte or HTML-significant characters (e.g.
    # 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even
    # though it passes the char-count check. Refuse to mint a cursor the decoder
    # on the next request would reject.
    if len(encoded) > MAX_ENCODED_CURSOR_LENGTH:
        raise InvalidCursorError("encoded cursor exceeds maximum length")
    return encoded
 def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
    """Encode a time-typed cursor at Unix microsecond precision.
    Accepts an aware datetime (any timezone) and normalizes to UTC. Naive
    datetimes are rejected so callers can't accidentally encode the local
    wall-clock value of a UTC-stored timestamp.
    """
    if t.tzinfo is None:
        raise ValueError("encode_cursor_from_time requires an aware datetime")
    micros = _datetime_to_unix_micros(t.astimezone(timezone.utc))
    return encode_cursor(sort_field, str(micros), id, order=order)
 def decode_cursor(
    cursor: str,
    allowed_sort_fields: Iterable[str],
    expected_order: str | None = None,
 ) -> CursorPayload:
    """Parse an opaque cursor.
    ``allowed_sort_fields`` is the endpoint's accepted sort-field list — a
    cursor carrying a field outside this set is rejected so a cursor minted
    for one column can't be replayed against another (e.g. a ``created_at``
    timestamp string compared against a ``name`` column).
    ``expected_order`` (``"asc"``/``"desc"``), when supplied, must match the
    payload's ``o`` field. ``o`` is required on every payload; a cursor
    missing it is rejected as malformed.
    Passing no allowed fields rejects every cursor.
    """
    if len(cursor) > MAX_ENCODED_CURSOR_LENGTH:
        raise InvalidCursorError("cursor exceeds maximum length")
    try:
        # urlsafe_b64decode requires correct padding; we strip on encode, so
        # restore the trailing '=' pad here.
        padding = "=" * (-len(cursor) % 4)
        raw = base64.urlsafe_b64decode(cursor + padding)
    except (ValueError, base64.binascii.Error) as e:
        raise InvalidCursorError(f"encoding: {e}") from e
    try:
        decoded = json.loads(raw)
    except (json.JSONDecodeError, UnicodeDecodeError) as e:
        raise InvalidCursorError(f"payload: {e}") from e
    if not isinstance(decoded, dict):
        raise InvalidCursorError("payload: expected object")
    sort_field = decoded.get("s")
    value = decoded.get("v")
    id = decoded.get("id")
    order = decoded.get("o")
    if not isinstance(sort_field, str) or not isinstance(value, str) or not isinstance(id, str):
        raise InvalidCursorError("payload: missing or non-string s/v/id")
    if id == "":
        raise InvalidCursorError("missing id")
    if len(id) > MAX_CURSOR_ID_LENGTH:
        raise InvalidCursorError("id exceeds maximum length")
    if len(value) > MAX_CURSOR_VALUE_LENGTH:
        raise InvalidCursorError("value exceeds maximum length")
    if sort_field not in allowed_sort_fields:
        raise InvalidCursorError(f"unsupported sort field {sort_field!r}")
    if not isinstance(order, str):
        raise InvalidCursorError("missing or non-string o")
    if order not in _VALID_ORDERS:
        raise InvalidCursorError(f"unsupported order {order!r}")
    if expected_order is not None and order != expected_order:
        raise InvalidCursorError(
            f"cursor order {order!r} does not match request order {expected_order!r}"
        )
    return CursorPayload(sort_field=sort_field, value=value, id=id, order=order)
 def decode_cursor_time(payload: Optional[CursorPayload]) -> datetime:
    """Parse a time-typed cursor value as Unix microseconds, returning UTC."""
    if payload is None:
        raise InvalidCursorError("nil cursor payload")
    try:
        micros = int(payload.value)
    except ValueError as e:
        raise InvalidCursorError(f"value is not a valid timestamp: {e}") from e
    try:
        return _unix_micros_to_datetime(micros)
    except (OverflowError, OSError, ValueError) as e:
        # Crafted out-of-range microseconds (e.g. > datetime.MAX_YEAR) blow up
        # in fromtimestamp / datetime construction. Map to 400, not 500.
        raise InvalidCursorError(f"value is out of representable range: {e}") from e
 def decode_cursor_int(payload: Optional[CursorPayload]) -> int:
    """Parse a cursor value as a base-10 integer."""
    if payload is None:
        raise InvalidCursorError("nil cursor payload")
    try:
        return int(payload.value)
    except ValueError as e:
        raise InvalidCursorError(f"value is not a valid integer: {e}") from e
 _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
 def _datetime_to_unix_micros(t: datetime) -> int:
    """Convert an aware UTC datetime to Unix microseconds (integer math)."""
    delta = t - _EPOCH
    return (delta.days * 86_400 + delta.seconds) * 1_000_000 + delta.microseconds
 def _unix_micros_to_datetime(micros: int) -> datetime:
    """Convert Unix microseconds to a UTC datetime, preserving precision."""
    seconds, micro_remainder = divmod(micros, 1_000_000)
    return datetime.fromtimestamp(seconds, tz=timezone.utc).replace(microsecond=micro_remainder)
--- a/app/assets/services/schemas.py
+++ b/app/assets/services/schemas.py
@ -71,6 +71,7 @@ class AssetSummaryData:
 class ListAssetsResult:
    items: list[AssetSummaryData]
    total: int
    next_cursor: str | None = None
@dataclass(frozen=True)
--- a/openapi.yaml
+++ b/openapi.yaml
@ -1517,6 +1517,22 @@ paths:
          schema:
            type: integer
            default: 0
          description: |
            Offset-based pagination. Cursor pagination via `after` is preferred
            for sequential walks (stable across concurrent inserts/deletes) but
            `offset` remains fully supported for random access (jump-to-page
            UIs, "showing items X–Y of N" displays). When both are supplied,
            `after` wins and `offset` is ignored.
        - name: after
          in: query
          schema:
            type: string
          description: |
            Opaque cursor for keyset pagination. Pass the `next_cursor` value
            from a previous response to fetch the next page. Stable across
            inserts/deletes between pages. Supported with `sort` values
            `created_at`, `updated_at`, `name`, and `size`. Malformed or
            unsupported cursors return 400 with `INVALID_CURSOR`.
        - name: include_tags
          in: query
          schema:
@ -1575,6 +1591,12 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/ListAssetsResponse"
        "400":
          description: Malformed query or cursor (e.g. `INVALID_CURSOR`)
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/AssetsApiError"
    post:
      operationId: createAsset
      tags: [assets]
@ -6761,6 +6783,42 @@ components:
          type: integer
        has_more:
          type: boolean
        next_cursor:
          type: string
          description: |
            Opaque cursor to fetch the next page. Pass back as the `after`
            query parameter. Omitted when there are no more results.
    AssetsApiError:
      type: object
      description: Error envelope returned by the assets API on 400 responses.
      required:
        - error
      properties:
        error:
          type: object
          required:
            - code
            - message
            - details
          properties:
            code:
              type: string
              description: |
                Machine-readable error code. `INVALID_CURSOR` is returned when the
                `after` cursor is malformed, oversized, or its sort field does
                not match the request's `sort`. `INVALID_QUERY` covers other
                Pydantic validation failures.
              enum: [INVALID_CURSOR, INVALID_QUERY]
            message:
              type: string
            details:
              type: object
              description: |
                Free-form, code-specific context. `INVALID_QUERY` populates this
                with an `errors` array of Pydantic validation entries;
                `INVALID_CURSOR` returns an empty object.
              additionalProperties: true
    TagInfo:
      type: object
--- a/tests-unit/assets_test/queries/test_asset_reference_keyset.py
+++ b/tests-unit/assets_test/queries/test_asset_reference_keyset.py
@ -0,0 +1,112 @@
 """Keyset-pagination tiebreaker tests for list_references_page.
 When multiple rows share the same primary sort value (e.g. four assets
 created in the same microsecond), the secondary `ORDER BY id` is what keeps
 keyset pagination from losing or repeating rows. This file exercises that
 branch directly against an in-memory SQLite session — engineering identical
 timestamps via HTTP is unreliable enough that we work at the query layer.
 """
 import uuid
 from datetime import datetime
 import pytest
 from sqlalchemy.orm import Session
 from app.assets.database.models import Asset, AssetReference
 from app.assets.database.queries.asset_reference import list_references_page
 def _make_ref(session: Session, created_at: datetime, name: str, owner: str = "") -> AssetReference:
    asset = Asset(hash=f"blake3:{uuid.uuid4().hex}", size_bytes=1024)
    session.add(asset)
    session.flush()
    ref = AssetReference(
        id=str(uuid.uuid4()),
        asset_id=asset.id,
        owner_id=owner,
        name=name,
        file_path=f"/tmp/{name}",
        created_at=created_at,
        updated_at=created_at,
        last_access_time=created_at,
        is_missing=False,
    )
    session.add(ref)
    return ref
@pytest.mark.parametrize("order", ["desc", "asc"])
 def test_tiebreaker_walks_duplicate_sort_values(session: Session, order: str):
    """Four rows with the SAME created_at must paginate cleanly under cursor
    mode — no row dropped, no row repeated, despite the primary sort column
    being non-discriminating.
    """
    shared_ts = datetime(2024, 5, 20, 12, 0, 0)  # naive UTC, like the DB stores
    refs = [_make_ref(session, shared_ts, f"tie_{i}.png") for i in range(4)]
    session.commit()
    expected_ids = sorted([r.id for r in refs], reverse=(order == "desc"))
    # Walk the cursor by hand: page size 2, take 3 pages (2 + 2 + 0).
    seen: list[str] = []
    after_value = None
    after_id = None
    for _ in range(4):  # generous loop bound; ought to be 2 iterations
        page, _tag_map, _total = list_references_page(
            session,
            limit=2,
            sort="created_at",
            order=order,
            after_cursor_value=after_value,
            after_cursor_id=after_id,
        )
        if not page:
            break
        seen.extend(p.id for p in page)
        # Use the last row's (created_at, id) as the next cursor input.
        last = page[-1]
        after_value, after_id = last.created_at, last.id
        if len(page) < 2:
            break
    assert seen == expected_ids, (
        f"keyset tiebreaker failed for order={order}: expected {expected_ids}, got {seen}"
    )
 def test_tiebreaker_no_duplicates_under_mixed_collisions(session: Session):
    """Some rows share a timestamp, some don't. The cursor must still walk
    every row exactly once regardless of where ties sit relative to a
    page boundary."""
    t1 = datetime(2024, 5, 20, 12, 0, 0)
    t2 = datetime(2024, 5, 20, 12, 0, 1)
    layout = [t1, t1, t1, t2, t2]  # three rows at t1, two at t2
    refs = [_make_ref(session, ts, f"mix_{i}.png") for i, ts in enumerate(layout)]
    session.commit()
    all_ids = {r.id for r in refs}
    seen_set: set[str] = set()
    seen_list: list[str] = []
    after_value = None
    after_id = None
    for _ in range(6):
        page, _, _ = list_references_page(
            session,
            limit=2,
            sort="created_at",
            order="desc",
            after_cursor_value=after_value,
            after_cursor_id=after_id,
        )
        if not page:
            break
        for p in page:
            assert p.id not in seen_set, f"duplicate row {p.id} appeared in cursor walk"
            seen_set.add(p.id)
            seen_list.append(p.id)
        last = page[-1]
        after_value, after_id = last.created_at, last.id
        if len(page) < 2:
            break
    assert seen_set == all_ids, f"missing rows: expected {all_ids}, got {seen_set}"
--- a/tests-unit/assets_test/services/test_cursor.py
+++ b/tests-unit/assets_test/services/test_cursor.py
@ -0,0 +1,354 @@
 """Tests for app.assets.services.cursor.
 The byte-identity fixtures below pin the wire format so a parallel
 implementation in another runtime can mint exchange-compatible cursors
 for the same payload. Drift here would break frontend pagination against
 any compatible backend.
 """
 from __future__ import annotations
 import base64
 from datetime import datetime, timedelta, timezone
 import pytest
 from app.assets.services.cursor import (
    MAX_CURSOR_ID_LENGTH,
    MAX_CURSOR_VALUE_LENGTH,
    MAX_ENCODED_CURSOR_LENGTH,
    CursorPayload,
    InvalidCursorError,
    decode_cursor,
    decode_cursor_int,
    decode_cursor_time,
    encode_cursor,
    encode_cursor_from_time,
 )
 ALLOWED = ("created_at", "updated_at", "name", "size")
 class TestRoundTrip:
    @pytest.mark.parametrize(
        "sort_field, value, id",
        [
            ("created_at", "1716200000000000", "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7"),
            ("size", "1024", "asset-123"),
            ("name", "my-asset.png", "asset-abc"),
            ("name", "résumé.txt", "asset-uni"),
        ],
    )
    def test_encode_decode(self, sort_field, value, id):
        encoded = encode_cursor(sort_field, value, id)
        assert encoded != ""
        payload = decode_cursor(encoded, ALLOWED)
        assert payload.sort_field == sort_field
        assert payload.value == value
        assert payload.id == id
 class TestTimeCursor:
    def test_microsecond_precision_preserved(self):
        # Pick a time with non-zero microseconds — encoding at ms would lose the µs.
        ts = datetime(2024, 5, 20, 12, 53, 20, 123456, tzinfo=timezone.utc)
        encoded = encode_cursor_from_time("created_at", ts, "id-1")
        payload = decode_cursor(encoded, ALLOWED)
        # Value must be a microsecond integer string, not a millisecond one.
        assert payload.value == "1716209600123456"
        decoded = decode_cursor_time(payload)
        assert decoded == ts
    def test_decode_returns_utc(self):
        payload = CursorPayload(sort_field="created_at", value="1716200000123456", id="id-1", order="desc")
        decoded = decode_cursor_time(payload)
        assert decoded.tzinfo == timezone.utc
    def test_naive_datetime_rejected_on_encode(self):
        naive = datetime(2024, 5, 20, 12, 0, 0)
        with pytest.raises(ValueError):
            encode_cursor_from_time("created_at", naive, "id-1")
    def test_non_integer_value_rejected_on_decode(self):
        with pytest.raises(InvalidCursorError):
            decode_cursor_time(CursorPayload("created_at", "not-a-number", "id-1", "desc"))
    def test_none_payload_rejected(self):
        with pytest.raises(InvalidCursorError):
            decode_cursor_time(None)
    def test_non_utc_aware_normalized(self):
        # Same instant, different timezone — must encode to the same micros.
        utc_ts = datetime(2024, 5, 20, 12, 0, 0, tzinfo=timezone.utc)
        offset_ts = utc_ts.astimezone(timezone(timedelta(hours=-5)))
        assert encode_cursor_from_time("created_at", utc_ts, "x") == encode_cursor_from_time(
            "created_at", offset_ts, "x"
        )
 class TestIntCursor:
    def test_decode_int(self):
        assert decode_cursor_int(CursorPayload("size", "1024", "id-1", "desc")) == 1024
    def test_decode_int_rejects_non_int(self):
        with pytest.raises(InvalidCursorError):
            decode_cursor_int(CursorPayload("size", "abc", "id-1", "desc"))
    def test_decode_int_rejects_none(self):
        with pytest.raises(InvalidCursorError):
            decode_cursor_int(None)
 class TestInvalidInputs:
    def test_oversized_cursor(self):
        oversized = "a" * (MAX_ENCODED_CURSOR_LENGTH + 1)
        with pytest.raises(InvalidCursorError, match="maximum length"):
            decode_cursor(oversized, ALLOWED)
    def test_not_base64(self):
        with pytest.raises(InvalidCursorError):
            decode_cursor("not base64!!!", ALLOWED)
    def test_not_json(self):
        encoded = base64.urlsafe_b64encode(b"definitely not json").rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError):
            decode_cursor(encoded, ALLOWED)
    def test_empty_id(self):
        # Encoder rejects empty id symmetrically with the decoder, so build the
        # payload manually to exercise the decoder's missing-id branch.
        raw = b'{"s":"created_at","v":"1","id":"","o":"desc"}'
        encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="missing id"):
            decode_cursor(encoded, ALLOWED)
    def test_oversized_id(self):
        # Encoder enforces the cap symmetrically; hand-build to exercise decode.
        big_id = "a" * (MAX_CURSOR_ID_LENGTH + 1)
        raw = ('{"s":"created_at","v":"1","id":"' + big_id + '","o":"desc"}').encode("ascii")
        encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
            decode_cursor(encoded, ALLOWED)
    def test_oversized_value(self):
        # Encoder enforces the cap symmetrically; hand-build to exercise decode.
        big_v = "v" * (MAX_CURSOR_VALUE_LENGTH + 1)
        raw = ('{"s":"created_at","v":"' + big_v + '","id":"id-1","o":"desc"}').encode("ascii")
        encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
            decode_cursor(encoded, ALLOWED)
    def test_unsupported_sort_field(self):
        encoded = encode_cursor("execution_time", "1", "id-1")
        with pytest.raises(InvalidCursorError, match="unsupported sort field"):
            decode_cursor(encoded, ALLOWED)
    def test_no_allowed_fields_rejects_everything(self):
        encoded = encode_cursor("created_at", "1", "id-1")
        with pytest.raises(InvalidCursorError):
            decode_cursor(encoded, ())
    def test_non_dict_payload_rejected(self):
        encoded = base64.urlsafe_b64encode(b'["array","not","dict"]').rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="expected object"):
            decode_cursor(encoded, ALLOWED)
 class TestEncodeAtCapsFits:
    def test_max_field_lengths_fit_wire_cap(self):
        # Worst-case payload: value and id at their per-field caps, with a long
        # sort field name. The encoded cursor must fit within MAX_ENCODED_CURSOR_LENGTH
        # so the wire cap cannot reject a cursor the encoder mints at the per-field caps.
        value = "v" * MAX_CURSOR_VALUE_LENGTH
        id = "i" * MAX_CURSOR_ID_LENGTH
        sort_field = "very_long_sort_field_name"
        encoded = encode_cursor(sort_field, value, id)
        assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
        payload = decode_cursor(encoded, (sort_field,))
        assert payload.value == value
        assert payload.id == id
 class TestDatetimeOverflow:
    """Crafted cursors with extreme micros must map to InvalidCursorError,
    not OverflowError/OSError leaking as 500.
    """
    @pytest.mark.parametrize(
        "micros_str",
        [
            "999999999999999999999",   # 10^21 µs — past datetime.MAX_YEAR by ~14 orders
            "-999999999999999999999",  # symmetric negative — pre-epoch overflow
        ],
    )
    def test_out_of_range_micros_rejected(self, micros_str):
        encoded = encode_cursor("created_at", micros_str, "asset-x")
        payload = decode_cursor(encoded, ALLOWED)
        with pytest.raises(InvalidCursorError):
            decode_cursor_time(payload)
 class TestEncoderDecoderSymmetry:
    """The encoder must reject inputs the decoder rejects, or the same server
    will mint a cursor it then 400s on the next request.
    """
    def test_long_name_within_cap_round_trips(self):
        """Assets allow names up to 512 chars (`String(512)`); the cursor
        encoder must round-trip a value at that cap so a freshly minted
        cursor never fails decode on the next request."""
        long_name = "n" * MAX_CURSOR_VALUE_LENGTH
        encoded = encode_cursor("name", long_name, "asset-x")
        payload = decode_cursor(encoded, ALLOWED)
        assert payload.value == long_name
    def test_encoder_rejects_empty_id(self):
        with pytest.raises(InvalidCursorError, match="id must be non-empty"):
            encode_cursor("created_at", "1", "")
    def test_encoder_rejects_oversized_id(self):
        with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
            encode_cursor("created_at", "1", "a" * (MAX_CURSOR_ID_LENGTH + 1))
    def test_encoder_rejects_oversized_value(self):
        with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
            encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
    def test_encoder_rejects_multibyte_value_over_wire_cap(self):
        """A value that passes the char-count cap can still inflate past the
        wire cap once UTF-8-encoded. Asset name made of 512 × multibyte
        characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not
        minted into a cursor the next request will 400."""
        with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
            encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte")
    def test_encoder_rejects_escape_heavy_value_over_wire_cap(self):
        """Same wire-cap concern via escape expansion: each `<` serializes to
        the six-byte sequence `\\u003c`, so 512 of them blow past the encoded
        cap even though the raw char count is within the per-field limit."""
        with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
            encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape")
 class TestOrderBinding:
    def test_order_baked_into_payload(self):
        encoded = encode_cursor("created_at", "1", "id-1", order="asc")
        payload = decode_cursor(encoded, ALLOWED)
        assert payload.order == "asc"
    def test_mismatched_order_rejected(self):
        encoded = encode_cursor("created_at", "1", "id-1", order="desc")
        with pytest.raises(InvalidCursorError, match="does not match request order"):
            decode_cursor(encoded, ALLOWED, expected_order="asc")
    def test_matching_order_accepted(self):
        encoded = encode_cursor("created_at", "1", "id-1", order="desc")
        payload = decode_cursor(encoded, ALLOWED, expected_order="desc")
        assert payload.order == "desc"
    def test_invalid_order_token_rejected_on_encode(self):
        with pytest.raises(ValueError):
            encode_cursor("created_at", "1", "id-1", order="sideways")
    def test_invalid_order_token_rejected_on_decode(self):
        # Hand-craft a payload with an illegal `o` value.
        raw = b'{"s":"name","v":"x","id":"id-1","o":"sideways"}'
        encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="unsupported order"):
            decode_cursor(encoded, ALLOWED)
    def test_cursor_without_order_rejected(self):
        """`o` is mandatory. A cursor minted without it is rejected as
        malformed rather than silently walking the keyset in whatever
        direction the request happens to ask for."""
        raw = b'{"s":"name","v":"x","id":"id-1"}'
        encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
        with pytest.raises(InvalidCursorError, match="missing or non-string o"):
            decode_cursor(encoded, ALLOWED, expected_order="desc")
 class TestHtmlSignificantCharEscaping:
    """An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode
    to the same escaped wire bytes as any compatible implementation of the
    same payload format. Drift here breaks cross-runtime byte-identity for
    those characters.
    """
    @pytest.mark.parametrize(
        "value, escaped_substring",
        [
            ("foo<bar>.png", "\\u003c"),  # `<` escaped
            ("foo<bar>.png", "\\u003e"),  # `>` escaped
            ("foo&bar.png", "\\u0026"),
            ("foo bar.png", "\\u2028"),  # JS line separator
            ("foo bar.png", "\\u2029"),  # JS paragraph separator
        ],
    )
    def test_html_significant_chars_escaped(self, value, escaped_substring):
        encoded = encode_cursor("name", value, "id-1")
        decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
        assert escaped_substring in decoded_bytes.decode("ascii"), (
            f"Expected {escaped_substring!r} in serialized payload, got: {decoded_bytes!r}"
        )
    def test_value_round_trips_through_escape(self):
        """Encoding then decoding a value with `<>&` should yield the original
        string — the escape only affects the wire form, not the decoded value."""
        original = "foo<&>bar.png"
        encoded = encode_cursor("name", original, "id-1")
        payload = decode_cursor(encoded, ALLOWED)
        assert payload.value == original
 class TestByteIdentityFixtures:
    """Pin the wire format so it doesn't drift silently.
    These fixtures assert exact byte equality of the encoded JSON payload —
    a change in key order, escape choice, separator whitespace, or anything
    else that shifts a byte fails the test loudly rather than diverging
    silently from any external consumer of the same payload format.
    """
    @pytest.mark.parametrize(
        "sort_field, value, id, order, expected_payload",
        [
            (
                "created_at",
                "1716200000000000",
                "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7",
                "desc",
                '{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}',
            ),
            (
                "size",
                "1024",
                "asset-123",
                "asc",
                '{"s":"size","v":"1024","id":"asset-123","o":"asc"}',
            ),
            (
                "name",
                "my-asset.png",
                "asset-abc",
                "desc",
                '{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}',
            ),
            (
                "name",
                "foo<bar>&baz.png",
                "asset-html",
                "desc",
                # `<`, `>`, `&` escape to <, >, & in the value.
                '{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}',
            ),
        ],
    )
    def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload):
        encoded = encode_cursor(sort_field, value, id, order=order)
        decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
        assert decoded_bytes.decode("utf-8") == expected_payload, (
            f"wire format drifted for sort={sort_field!r}, value={value!r}:\n"
            f"  expected: {expected_payload!r}\n"
            f"  actual:   {decoded_bytes.decode('utf-8')!r}"
        )
--- a/tests-unit/assets_test/test_list_cursor.py
+++ b/tests-unit/assets_test/test_list_cursor.py
@ -0,0 +1,349 @@
 """Integration tests for cursor-based pagination on GET /api/assets.
 These tests exercise the handler/service/query path end-to-end;
 cursor-encoding-level tests live in
 tests-unit/assets_test/services/test_cursor.py.
 """
 import pytest
 import requests
 def _seed(asset_factory, make_asset_bytes, count: int, tag: str) -> list[str]:
    names = [f"cursor_{i:02d}.safetensors" for i in range(count)]
    for n in names:
        asset_factory(
            n,
            ["models", "checkpoints", "unit-tests", tag],
            {},
            make_asset_bytes(n, size=2048),
        )
    return sorted(names)
 def test_cursor_pages_all_items_in_order(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    names = _seed(asset_factory, make_asset_bytes, count=5, tag="cursor-walk")
    params = {
        "include_tags": "unit-tests,cursor-walk",
        "sort": "name",
        "order": "asc",
        "limit": "2",
    }
    seen: list[str] = []
    after: str | None = None
    pages = 0
    while True:
        page_params = dict(params)
        if after is not None:
            page_params["after"] = after
        r = http.get(api_base + "/api/assets", params=page_params, timeout=120)
        assert r.status_code == 200, r.text
        body = r.json()
        seen.extend(a["name"] for a in body["assets"])
        pages += 1
        after = body.get("next_cursor")
        if after is None:
            break
        assert body["has_more"] is True
        assert pages < 10, "guard against runaway cursor loop"
    assert seen == names, f"expected {names}, got {seen}"
    # Last page should have has_more False
    assert body["has_more"] is False
    assert "next_cursor" not in body
 def test_cursor_invalid_returns_400(http: requests.Session, api_base: str):
    r = http.get(
        api_base + "/api/assets",
        params={"after": "not-a-real-cursor", "sort": "created_at"},
        timeout=120,
    )
    assert r.status_code == 400, r.text
    body = r.json()
    assert body["error"]["code"] == "INVALID_CURSOR"
 def test_cursor_sort_mismatch_returns_400(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    _seed(asset_factory, make_asset_bytes, count=2, tag="cursor-mismatch")
    # Take a real cursor minted for sort=name.
    r = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-mismatch",
            "sort": "name",
            "order": "asc",
            "limit": "1",
        },
        timeout=120,
    )
    assert r.status_code == 200
    cursor = r.json()["next_cursor"]
    assert cursor is not None
    # Replay against sort=created_at — should fail with INVALID_CURSOR.
    r2 = http.get(
        api_base + "/api/assets",
        params={"after": cursor, "sort": "created_at"},
        timeout=120,
    )
    assert r2.status_code == 400, r2.text
    assert r2.json()["error"]["code"] == "INVALID_CURSOR"
 def test_cursor_wins_over_offset(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    names = _seed(asset_factory, make_asset_bytes, count=4, tag="cursor-vs-offset")
    # Take a cursor that points past the first item.
    r = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-vs-offset",
            "sort": "name",
            "order": "asc",
            "limit": "1",
        },
        timeout=120,
    )
    assert r.status_code == 200, r.text
    cursor = r.json()["next_cursor"]
    assert cursor is not None
    # Pass both 'after' and a large offset. Cursor must win; offset is ignored.
    r2 = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-vs-offset",
            "sort": "name",
            "order": "asc",
            "limit": "1",
            "after": cursor,
            "offset": "999",
        },
        timeout=120,
    )
    assert r2.status_code == 200
    body = r2.json()
    # Should land on the second name in sorted order — not skip ahead by 999.
    assert [a["name"] for a in body["assets"]] == [names[1]]
 def test_next_cursor_absent_when_no_more_results(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    _seed(asset_factory, make_asset_bytes, count=2, tag="cursor-exhaust")
    r = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-exhaust",
            "sort": "name",
            "order": "asc",
            "limit": "50",
        },
        timeout=120,
    )
    assert r.status_code == 200, r.text
    body = r.json()
    assert body["has_more"] is False
    assert "next_cursor" not in body
 def test_cursor_pagination_first_page_mints_cursor(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    """First-page request (no `after`) must still return `next_cursor` when
    more rows exist, or pagination is unreachable from a cold start.
    """
    _seed(asset_factory, make_asset_bytes, count=3, tag="cursor-first-page")
    r = http.get(
        api_base + "/api/assets",
        params={"include_tags": "unit-tests,cursor-first-page", "sort": "name", "order": "asc", "limit": "2"},
        timeout=120,
    )
    assert r.status_code == 200, r.text
    body = r.json()
    assert body["has_more"] is True
    assert body.get("next_cursor"), "first page must mint a cursor when more rows exist"
 def test_cursor_no_spurious_cursor_when_page_size_equals_remainder(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    """When `total` is an exact multiple of `limit`, the final page must
    NOT carry a next_cursor — there is nothing past it.
    """
    _seed(asset_factory, make_asset_bytes, count=4, tag="cursor-exact-multiple")
    # Page 1
    r = http.get(
        api_base + "/api/assets",
        params={"include_tags": "unit-tests,cursor-exact-multiple", "sort": "name", "order": "asc", "limit": "2"},
        timeout=120,
    )
    assert r.status_code == 200, r.text
    cursor = r.json()["next_cursor"]
    assert cursor is not None
    # Page 2 — should exhaust the set with no cursor for a phantom page 3
    r2 = http.get(
        api_base + "/api/assets",
        params={"include_tags": "unit-tests,cursor-exact-multiple", "sort": "name", "order": "asc", "limit": "2", "after": cursor},
        timeout=120,
    )
    assert r2.status_code == 200, r2.text
    body = r2.json()
    assert len(body["assets"]) == 2
    assert body["has_more"] is False
    assert "next_cursor" not in body
@pytest.mark.parametrize("sort_field", ["created_at", "updated_at", "size"])
 def test_cursor_walks_for_non_name_sorts(sort_field, http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    """Cursor pagination must work for every sort field the contract claims.
    Without this, the `created_at` / `updated_at` (time-encoded micros) and
    `size` (int-encoded) cursor paths go entirely unexercised end-to-end.
    """
    # Sizes increase strictly by index, so `size desc` has a deterministic
    # expected order. Time-based sorts (created_at / updated_at) can tie when
    # rows are inserted faster than the DB's timestamp resolution; for those
    # we check coverage and no-duplicates and let the keyset tiebreaker do
    # the rest, instead of sleeping between inserts and asserting an order
    # that depends on clock granularity.
    names = []
    for i in range(4):
        n = f"cursor_{sort_field}_{i:02d}.safetensors"
        asset_factory(n, ["models", "checkpoints", "unit-tests", f"cursor-{sort_field}"], {}, make_asset_bytes(n, size=2048 + i))
        names.append(n)
    params = {
        "include_tags": f"unit-tests,cursor-{sort_field}",
        "sort": sort_field,
        "order": "desc",
        "limit": "2",
    }
    seen: list[str] = []
    after: str | None = None
    pages = 0
    while True:
        page_params = dict(params)
        if after is not None:
            page_params["after"] = after
        r = http.get(api_base + "/api/assets", params=page_params, timeout=120)
        assert r.status_code == 200, r.text
        body = r.json()
        seen.extend(a["name"] for a in body["assets"])
        after = body.get("next_cursor")
        pages += 1
        if after is None:
            break
        assert pages < 10, "guard against runaway cursor loop"
    # No duplicates: a faulty keyset boundary that returns the same row across
    # two pages must fail this check.
    assert len(seen) == len(set(seen)), (
        f"cursor walk repeated rows for sort={sort_field}: {seen}"
    )
    # Full coverage: every seeded asset reached exactly once.
    assert set(seen) == set(names), (
        f"missing items for sort={sort_field}: expected {set(names)}, got {set(seen)}"
    )
    # Strict order check for the only field with a clock-independent ordering.
    if sort_field == "size":
        assert seen == list(reversed(names)), (
            f"size cursor walked out of order: got {seen}, expected {list(reversed(names))}"
        )
 def test_cursor_order_mismatch_returns_400(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    """A cursor minted under desc order replayed against asc must 400, not
    silently walk the wrong direction."""
    _seed(asset_factory, make_asset_bytes, count=3, tag="cursor-order-flip")
    r = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-order-flip",
            "sort": "name",
            "order": "desc",
            "limit": "1",
        },
        timeout=120,
    )
    assert r.status_code == 200, r.text
    cursor = r.json()["next_cursor"]
    assert cursor is not None
    # Replay with order flipped to asc — server must reject the cursor.
    r2 = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-order-flip",
            "sort": "name",
            "order": "asc",
            "limit": "1",
            "after": cursor,
        },
        timeout=120,
    )
    assert r2.status_code == 400, r2.text
    assert r2.json()["error"]["code"] == "INVALID_CURSOR"
 def test_cursor_invalid_cursor_at_microsecond_boundary(http: requests.Session, api_base: str):
    """A cursor carrying an out-of-range microsecond timestamp must map to
    400 INVALID_CURSOR, not 500."""
    import base64
    import json
    # 10^18 microseconds ≈ year 33658, well past datetime.MAX_YEAR.
    # `o` and `order=` must be set; otherwise decode fails earlier on the
    # missing-order branch and the µs-overflow path is never exercised.
    payload = {"s": "created_at", "o": "desc", "v": "999999999999999999999", "id": "asset-x"}
    raw = json.dumps(payload, separators=(",", ":")).encode("utf-8")
    cursor = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
    r = http.get(
        api_base + "/api/assets",
        params={"after": cursor, "sort": "created_at", "order": "desc"},
        timeout=120,
    )
    assert r.status_code == 400, r.text
    assert r.json()["error"]["code"] == "INVALID_CURSOR"
 def test_cursor_pagination_stable_after_delete(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
    names = _seed(asset_factory, make_asset_bytes, count=4, tag="cursor-delete")
    # Page 1.
    r = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-delete",
            "sort": "name",
            "order": "asc",
            "limit": "2",
        },
        timeout=120,
    )
    assert r.status_code == 200
    body = r.json()
    page1_names = [a["name"] for a in body["assets"]]
    cursor = body["next_cursor"]
    assert cursor is not None
    assert page1_names == names[:2]
    # Delete an item from page 1 (already returned) — cursor should still
    # locate the next page from where it was minted, not re-index.
    target_id = body["assets"][0]["id"]
    d = http.delete(api_base + f"/api/assets/{target_id}", timeout=120)
    assert d.status_code in (200, 204), d.text
    # Page 2 via cursor.
    r2 = http.get(
        api_base + "/api/assets",
        params={
            "include_tags": "unit-tests,cursor-delete",
            "sort": "name",
            "order": "asc",
            "limit": "2",
            "after": cursor,
        },
        timeout=120,
    )
    assert r2.status_code == 200, r2.text
    body2 = r2.json()
    assert [a["name"] for a in body2["assets"]] == names[2:]