mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-05-30 19:07:25 +08:00
Add cursor pagination for GET api assets (#14014)
Amp-Thread-ID: https://ampcode.com/threads/T-019e4ca5-b71a-7168-8f56-58b2325f34c3 Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
916b33c795
commit
00c88a4634
@ -39,6 +39,7 @@ from app.assets.services import (
|
||||
update_asset_metadata,
|
||||
upload_from_temp_path,
|
||||
)
|
||||
from app.assets.services.cursor import InvalidCursorError
|
||||
from app.assets.services.path_utils import compute_paths_for_response
|
||||
from app.assets.services.tagging import list_tag_histogram
|
||||
|
||||
@ -182,7 +183,7 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu
|
||||
user_metadata=result.ref.user_metadata or {},
|
||||
metadata=result.ref.system_metadata,
|
||||
job_id=result.ref.job_id,
|
||||
prompt_id=result.ref.job_id, # deprecated: mirrors job_id for cloud compat
|
||||
prompt_id=result.ref.job_id, # deprecated alias of job_id, kept for compatibility
|
||||
created_at=result.ref.created_at,
|
||||
updated_at=result.ref.updated_at,
|
||||
last_access_time=result.ref.last_access_time,
|
||||
@ -219,24 +220,37 @@ async def list_assets_route(request: web.Request) -> web.Response:
|
||||
order_candidate = (q.order or "desc").lower()
|
||||
order = order_candidate if order_candidate in {"asc", "desc"} else "desc"
|
||||
|
||||
result = list_assets_page(
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
include_tags=q.include_tags,
|
||||
exclude_tags=q.exclude_tags,
|
||||
name_contains=q.name_contains,
|
||||
metadata_filter=q.metadata_filter,
|
||||
limit=q.limit,
|
||||
offset=q.offset,
|
||||
sort=sort,
|
||||
order=order,
|
||||
)
|
||||
try:
|
||||
result = list_assets_page(
|
||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||
include_tags=q.include_tags,
|
||||
exclude_tags=q.exclude_tags,
|
||||
name_contains=q.name_contains,
|
||||
metadata_filter=q.metadata_filter,
|
||||
limit=q.limit,
|
||||
offset=q.offset,
|
||||
sort=sort,
|
||||
order=order,
|
||||
after=q.after,
|
||||
)
|
||||
except InvalidCursorError as e:
|
||||
return _build_error_response(400, "INVALID_CURSOR", str(e))
|
||||
|
||||
summaries = [_build_asset_response(item) for item in result.items]
|
||||
|
||||
# has_more semantics differ by mode:
|
||||
# - cursor mode: a non-empty next_cursor means there are more results.
|
||||
# - offset mode: derived from total - (offset + page size).
|
||||
if q.after is not None:
|
||||
has_more = result.next_cursor is not None
|
||||
else:
|
||||
has_more = (q.offset + len(summaries)) < result.total
|
||||
|
||||
payload = schemas_out.AssetsList(
|
||||
assets=summaries,
|
||||
total=result.total,
|
||||
has_more=(q.offset + len(summaries)) < result.total,
|
||||
has_more=has_more,
|
||||
next_cursor=result.next_cursor,
|
||||
)
|
||||
return web.json_response(payload.model_dump(mode="json", exclude_none=True))
|
||||
|
||||
|
||||
@ -59,6 +59,11 @@ class ListAssetsQuery(BaseModel):
|
||||
|
||||
limit: conint(ge=1, le=500) = 20
|
||||
offset: conint(ge=0) = 0
|
||||
# Opaque keyset cursor. When supplied, `offset` is ignored. Cursor pagination
|
||||
# is supported for sort values `created_at`, `updated_at`, `name`, `size`.
|
||||
# Supplying `after` together with `sort=last_access_time` returns
|
||||
# 400 INVALID_CURSOR; that sort only supports offset/limit.
|
||||
after: str | None = None
|
||||
|
||||
sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = (
|
||||
"created_at"
|
||||
|
||||
@ -43,6 +43,8 @@ class AssetsList(BaseModel):
|
||||
assets: list[Asset]
|
||||
total: int
|
||||
has_more: bool
|
||||
# Opaque cursor for the next page. Omitted when there are no more results.
|
||||
next_cursor: str | None = None
|
||||
|
||||
|
||||
class TagUsage(BaseModel):
|
||||
|
||||
@ -266,9 +266,18 @@ def list_references_page(
|
||||
metadata_filter: dict | None = None,
|
||||
sort: str | None = None,
|
||||
order: str | None = None,
|
||||
after_cursor_value: object | None = None,
|
||||
after_cursor_id: str | None = None,
|
||||
) -> tuple[list[AssetReference], dict[str, list[str]], int]:
|
||||
"""List references with pagination, filtering, and sorting.
|
||||
|
||||
When ``after_cursor_value``/``after_cursor_id`` are supplied the query uses
|
||||
keyset pagination — ``offset`` is ignored and a WHERE clause selects rows
|
||||
strictly after the given ``(sort_col, id)`` position in the active sort
|
||||
direction. The cursor value must already be typed for the column
|
||||
(datetime for time sorts, int for size, str for name); the caller decodes
|
||||
the opaque cursor string and resolves to the typed value.
|
||||
|
||||
Returns (references, tag_map, total_count).
|
||||
"""
|
||||
base = (
|
||||
@ -297,9 +306,31 @@ def list_references_page(
|
||||
"size": Asset.size_bytes,
|
||||
}
|
||||
sort_col = sort_map.get(sort, AssetReference.created_at)
|
||||
sort_exp = sort_col.desc() if order == "desc" else sort_col.asc()
|
||||
descending = order == "desc"
|
||||
|
||||
base = base.order_by(sort_exp).limit(limit).offset(offset)
|
||||
# Keyset WHERE: (sort_col, id) strictly less-than / greater-than the cursor.
|
||||
# Equivalent to: sort_col <op> v OR (sort_col = v AND id <op> cursor_id).
|
||||
if after_cursor_value is not None and after_cursor_id is not None:
|
||||
if descending:
|
||||
keyset = sa.or_(
|
||||
sort_col < after_cursor_value,
|
||||
sa.and_(sort_col == after_cursor_value, AssetReference.id < after_cursor_id),
|
||||
)
|
||||
else:
|
||||
keyset = sa.or_(
|
||||
sort_col > after_cursor_value,
|
||||
sa.and_(sort_col == after_cursor_value, AssetReference.id > after_cursor_id),
|
||||
)
|
||||
base = base.where(keyset)
|
||||
|
||||
# Secondary ORDER BY id (matching the primary direction) gives the keyset
|
||||
# comparison a deterministic tiebreaker on duplicate sort_col values.
|
||||
id_exp = AssetReference.id.desc() if descending else AssetReference.id.asc()
|
||||
sort_exp = sort_col.desc() if descending else sort_col.asc()
|
||||
|
||||
base = base.order_by(sort_exp, id_exp).limit(limit)
|
||||
if after_cursor_id is None:
|
||||
base = base.offset(offset)
|
||||
|
||||
count_stmt = (
|
||||
select(sa.func.count())
|
||||
|
||||
@ -1,8 +1,19 @@
|
||||
import contextlib
|
||||
import mimetypes
|
||||
import os
|
||||
from datetime import timezone
|
||||
from typing import Sequence
|
||||
|
||||
from app.assets.services.cursor import (
|
||||
CursorPayload,
|
||||
InvalidCursorError,
|
||||
decode_cursor,
|
||||
decode_cursor_int,
|
||||
decode_cursor_time,
|
||||
encode_cursor,
|
||||
encode_cursor_from_time,
|
||||
)
|
||||
|
||||
|
||||
from app.assets.database.models import Asset
|
||||
from app.assets.database.queries import (
|
||||
@ -242,6 +253,11 @@ def get_asset_by_hash(asset_hash: str) -> AssetData | None:
|
||||
return extract_asset_data(asset)
|
||||
|
||||
|
||||
# Sort fields that support cursor pagination. `last_access_time` is not
|
||||
# in this list — it falls back to offset/limit.
|
||||
_CURSOR_SORT_FIELDS = ("created_at", "updated_at", "name", "size")
|
||||
|
||||
|
||||
def list_assets_page(
|
||||
owner_id: str = "",
|
||||
include_tags: Sequence[str] | None = None,
|
||||
@ -252,7 +268,39 @@ def list_assets_page(
|
||||
offset: int = 0,
|
||||
sort: str = "created_at",
|
||||
order: str = "desc",
|
||||
after: str | None = None,
|
||||
) -> ListAssetsResult:
|
||||
"""List assets with optional cursor pagination.
|
||||
|
||||
When ``after`` is supplied it overrides ``offset``. The cursor's sort field
|
||||
must match ``sort`` and be in the cursor-supported allowlist; mismatches
|
||||
raise InvalidCursorError so the handler can map to 400 INVALID_CURSOR.
|
||||
"""
|
||||
cursor_value: object | None = None
|
||||
cursor_id: str | None = None
|
||||
# Mint next_cursor on every page where the sort is cursor-supported, not
|
||||
# only when the request itself arrived with a cursor. Otherwise a first
|
||||
# request (no `after`) returns next_cursor=None and the client can never
|
||||
# enter cursor mode.
|
||||
mint_cursor = sort in _CURSOR_SORT_FIELDS
|
||||
|
||||
if after is not None:
|
||||
if sort not in _CURSOR_SORT_FIELDS:
|
||||
raise InvalidCursorError(
|
||||
f"cursor pagination is not supported for sort={sort!r}"
|
||||
)
|
||||
payload = decode_cursor(after, _CURSOR_SORT_FIELDS, expected_order=order)
|
||||
if payload.sort_field != sort:
|
||||
raise InvalidCursorError(
|
||||
f"cursor sort field {payload.sort_field!r} does not match request sort {sort!r}"
|
||||
)
|
||||
cursor_value, cursor_id = _resolve_cursor_value(payload), payload.id
|
||||
|
||||
# Over-fetch by one row so we can distinguish "exactly `limit` rows total
|
||||
# remaining" from "more rows past this page" without a second query. Drop
|
||||
# the sentinel before returning.
|
||||
fetch_limit = limit + 1 if mint_cursor else limit
|
||||
|
||||
with create_session() as session:
|
||||
refs, tag_map, total = list_references_page(
|
||||
session,
|
||||
@ -261,12 +309,22 @@ def list_assets_page(
|
||||
exclude_tags=exclude_tags,
|
||||
name_contains=name_contains,
|
||||
metadata_filter=metadata_filter,
|
||||
limit=limit,
|
||||
limit=fetch_limit,
|
||||
offset=offset,
|
||||
sort=sort,
|
||||
order=order,
|
||||
after_cursor_value=cursor_value,
|
||||
after_cursor_id=cursor_id,
|
||||
)
|
||||
|
||||
next_cursor: str | None = None
|
||||
if mint_cursor and len(refs) > limit:
|
||||
# There's at least one more row past this page — mint a cursor from
|
||||
# the last row of the page (i.e. index `limit - 1`, since we
|
||||
# over-fetched), and drop the sentinel.
|
||||
next_cursor = _encode_next_cursor(refs[limit - 1], sort, order)
|
||||
refs = refs[:limit]
|
||||
|
||||
items: list[AssetSummaryData] = []
|
||||
for ref in refs:
|
||||
items.append(
|
||||
@ -277,7 +335,39 @@ def list_assets_page(
|
||||
)
|
||||
)
|
||||
|
||||
return ListAssetsResult(items=items, total=total)
|
||||
return ListAssetsResult(items=items, total=total, next_cursor=next_cursor)
|
||||
|
||||
|
||||
def _resolve_cursor_value(payload: CursorPayload) -> object:
|
||||
"""Map a decoded cursor payload to a column-typed Python value."""
|
||||
if payload.sort_field in ("created_at", "updated_at"):
|
||||
# DB stores naive UTC; strip tzinfo so the comparison binds against a
|
||||
# `TIMESTAMP WITHOUT TIME ZONE` column without an offset shift.
|
||||
return decode_cursor_time(payload).replace(tzinfo=None)
|
||||
if payload.sort_field == "size":
|
||||
return decode_cursor_int(payload)
|
||||
return payload.value # name, str-typed
|
||||
|
||||
|
||||
def _encode_next_cursor(ref, sort: str, order: str) -> str | None:
|
||||
"""Mint a cursor pointing at *ref* for the given sort dimension.
|
||||
|
||||
Returns None when the boundary row carries a NULL sort value (e.g. an asset
|
||||
record whose size_bytes hasn't been backfilled). Continuing pagination
|
||||
across a NULL boundary is undefined under keyset ordering — better to
|
||||
truncate cleanly here than to mint a cursor that mis-positions.
|
||||
"""
|
||||
if sort == "name":
|
||||
return encode_cursor("name", ref.name, ref.id, order=order)
|
||||
if sort == "size":
|
||||
if ref.asset is None or ref.asset.size_bytes is None:
|
||||
return None
|
||||
return encode_cursor("size", str(ref.asset.size_bytes), ref.id, order=order)
|
||||
# created_at / updated_at — DB datetimes are naive UTC; attach tz before encoding.
|
||||
value = ref.created_at if sort == "created_at" else ref.updated_at
|
||||
if value is None:
|
||||
return None
|
||||
return encode_cursor_from_time(sort, value.replace(tzinfo=timezone.utc), ref.id, order=order)
|
||||
|
||||
|
||||
def resolve_hash_to_path(
|
||||
|
||||
225
app/assets/services/cursor.py
Normal file
225
app/assets/services/cursor.py
Normal file
@ -0,0 +1,225 @@
|
||||
"""Opaque keyset-pagination cursor for /api/assets.
|
||||
|
||||
Payload JSON uses short keys to keep the encoded length small:
|
||||
|
||||
{"s": <sort_field>, "v": <value>, "id": <id>, "o": <order>}
|
||||
|
||||
The `o` key binds the cursor to the sort direction it was minted under,
|
||||
so replaying a `desc` cursor against an `asc` request fails with
|
||||
``INVALID_CURSOR`` rather than silently walking the wrong direction.
|
||||
`o` is mandatory on every payload — a cursor without it is rejected as
|
||||
malformed.
|
||||
|
||||
Encoding is base64url with no padding. JSON serialization escapes `<`,
|
||||
`>`, `&`, U+2028, and U+2029 in encoded string values so asset names
|
||||
containing those characters produce a stable, byte-identical wire form
|
||||
across any compatible implementation of the same payload format.
|
||||
|
||||
Time values are serialized as Unix microseconds (UTC) — microsecond
|
||||
precision is sufficient to round-trip the timestamps stored by the
|
||||
database without rounding rows in the same millisecond bucket.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Iterable, Optional
|
||||
|
||||
|
||||
class InvalidCursorError(ValueError):
|
||||
"""Raised on a malformed, oversized, or unsupported-sort-field cursor.
|
||||
|
||||
Map to a 400 response with code ``INVALID_CURSOR`` at the handler.
|
||||
"""
|
||||
|
||||
|
||||
# Wire-format length caps. Cursors are user-controlled, so caps protect the
|
||||
# decode path from oversized allocations and downstream SQL predicates from
|
||||
# unbounded strings.
|
||||
#
|
||||
# MAX_CURSOR_VALUE_LENGTH is 512 to fit the `AssetReference.name` column max
|
||||
# (`String(512)`) — otherwise a long-named asset would mint a cursor the same
|
||||
# server then refuses on the next request.
|
||||
MAX_ENCODED_CURSOR_LENGTH = 1024
|
||||
MAX_CURSOR_VALUE_LENGTH = 512
|
||||
MAX_CURSOR_ID_LENGTH = 128
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CursorPayload:
|
||||
sort_field: str
|
||||
value: str
|
||||
id: str
|
||||
order: str
|
||||
|
||||
|
||||
_VALID_ORDERS = ("asc", "desc")
|
||||
|
||||
|
||||
def encode_cursor(sort_field: str, value: str, id: str, order: str = "desc") -> str:
|
||||
"""Encode a cursor payload as a base64url (no-padding) string.
|
||||
|
||||
`order` binds the cursor to the sort direction it was minted under so a
|
||||
later request with a flipped `order` query parameter is rejected with
|
||||
``INVALID_CURSOR`` rather than silently walking the wrong direction.
|
||||
"""
|
||||
if order not in _VALID_ORDERS:
|
||||
raise InvalidCursorError(f"order must be one of {_VALID_ORDERS}, got {order!r}")
|
||||
# Symmetric input validation: the encoder must reject anything the
|
||||
# decoder rejects, or the same server will mint cursors it then 400s on
|
||||
# the next request.
|
||||
if not id:
|
||||
raise InvalidCursorError("id must be non-empty")
|
||||
if len(id) > MAX_CURSOR_ID_LENGTH:
|
||||
raise InvalidCursorError("id exceeds maximum length")
|
||||
if len(value) > MAX_CURSOR_VALUE_LENGTH:
|
||||
raise InvalidCursorError("value exceeds maximum length")
|
||||
payload = {"s": sort_field, "v": value, "id": id, "o": order}
|
||||
raw = json.dumps(payload, separators=(",", ":"), ensure_ascii=False)
|
||||
# Match the default JSON escaping of HTML-significant characters and JS
|
||||
# line/paragraph separators (U+2028 / U+2029) so an asset name carrying
|
||||
# any of them encodes to identical bytes across runtimes. None of these
|
||||
# characters appear in JSON structural syntax, so a global replace on the
|
||||
# serialized output can only touch encoded values. Use explicit \uXXXX
|
||||
# escapes for U+2028 / U+2029 so the source survives any editor / git
|
||||
# tooling that normalizes invisible separators.
|
||||
raw = (
|
||||
raw.replace("<", "\\u003c")
|
||||
.replace(">", "\\u003e")
|
||||
.replace("&", "\\u0026")
|
||||
.replace("\u2028", "\\u2028")
|
||||
.replace("\u2029", "\\u2029")
|
||||
)
|
||||
encoded = base64.urlsafe_b64encode(raw.encode("utf-8")).rstrip(b"=").decode("ascii")
|
||||
# Final wire-size guard: the per-field caps above are char-counted, but the
|
||||
# wire cap applies to the base64url of the UTF-8-encoded, escape-expanded
|
||||
# payload. A value full of multibyte or HTML-significant characters (e.g.
|
||||
# 512 \u00d7 "\u00e9" or 512 \u00d7 "<") inflates well past MAX_ENCODED_CURSOR_LENGTH even
|
||||
# though it passes the char-count check. Refuse to mint a cursor the decoder
|
||||
# on the next request would reject.
|
||||
if len(encoded) > MAX_ENCODED_CURSOR_LENGTH:
|
||||
raise InvalidCursorError("encoded cursor exceeds maximum length")
|
||||
return encoded
|
||||
|
||||
|
||||
def encode_cursor_from_time(sort_field: str, t: datetime, id: str, order: str = "desc") -> str:
|
||||
"""Encode a time-typed cursor at Unix microsecond precision.
|
||||
|
||||
Accepts an aware datetime (any timezone) and normalizes to UTC. Naive
|
||||
datetimes are rejected so callers can't accidentally encode the local
|
||||
wall-clock value of a UTC-stored timestamp.
|
||||
"""
|
||||
if t.tzinfo is None:
|
||||
raise ValueError("encode_cursor_from_time requires an aware datetime")
|
||||
micros = _datetime_to_unix_micros(t.astimezone(timezone.utc))
|
||||
return encode_cursor(sort_field, str(micros), id, order=order)
|
||||
|
||||
|
||||
def decode_cursor(
|
||||
cursor: str,
|
||||
allowed_sort_fields: Iterable[str],
|
||||
expected_order: str | None = None,
|
||||
) -> CursorPayload:
|
||||
"""Parse an opaque cursor.
|
||||
|
||||
``allowed_sort_fields`` is the endpoint's accepted sort-field list — a
|
||||
cursor carrying a field outside this set is rejected so a cursor minted
|
||||
for one column can't be replayed against another (e.g. a ``created_at``
|
||||
timestamp string compared against a ``name`` column).
|
||||
|
||||
``expected_order`` (``"asc"``/``"desc"``), when supplied, must match the
|
||||
payload's ``o`` field. ``o`` is required on every payload; a cursor
|
||||
missing it is rejected as malformed.
|
||||
|
||||
Passing no allowed fields rejects every cursor.
|
||||
"""
|
||||
if len(cursor) > MAX_ENCODED_CURSOR_LENGTH:
|
||||
raise InvalidCursorError("cursor exceeds maximum length")
|
||||
|
||||
try:
|
||||
# urlsafe_b64decode requires correct padding; we strip on encode, so
|
||||
# restore the trailing '=' pad here.
|
||||
padding = "=" * (-len(cursor) % 4)
|
||||
raw = base64.urlsafe_b64decode(cursor + padding)
|
||||
except (ValueError, base64.binascii.Error) as e:
|
||||
raise InvalidCursorError(f"encoding: {e}") from e
|
||||
|
||||
try:
|
||||
decoded = json.loads(raw)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
||||
raise InvalidCursorError(f"payload: {e}") from e
|
||||
|
||||
if not isinstance(decoded, dict):
|
||||
raise InvalidCursorError("payload: expected object")
|
||||
|
||||
sort_field = decoded.get("s")
|
||||
value = decoded.get("v")
|
||||
id = decoded.get("id")
|
||||
order = decoded.get("o")
|
||||
|
||||
if not isinstance(sort_field, str) or not isinstance(value, str) or not isinstance(id, str):
|
||||
raise InvalidCursorError("payload: missing or non-string s/v/id")
|
||||
|
||||
if id == "":
|
||||
raise InvalidCursorError("missing id")
|
||||
if len(id) > MAX_CURSOR_ID_LENGTH:
|
||||
raise InvalidCursorError("id exceeds maximum length")
|
||||
if len(value) > MAX_CURSOR_VALUE_LENGTH:
|
||||
raise InvalidCursorError("value exceeds maximum length")
|
||||
|
||||
if sort_field not in allowed_sort_fields:
|
||||
raise InvalidCursorError(f"unsupported sort field {sort_field!r}")
|
||||
|
||||
if not isinstance(order, str):
|
||||
raise InvalidCursorError("missing or non-string o")
|
||||
if order not in _VALID_ORDERS:
|
||||
raise InvalidCursorError(f"unsupported order {order!r}")
|
||||
if expected_order is not None and order != expected_order:
|
||||
raise InvalidCursorError(
|
||||
f"cursor order {order!r} does not match request order {expected_order!r}"
|
||||
)
|
||||
|
||||
return CursorPayload(sort_field=sort_field, value=value, id=id, order=order)
|
||||
|
||||
|
||||
def decode_cursor_time(payload: Optional[CursorPayload]) -> datetime:
|
||||
"""Parse a time-typed cursor value as Unix microseconds, returning UTC."""
|
||||
if payload is None:
|
||||
raise InvalidCursorError("nil cursor payload")
|
||||
try:
|
||||
micros = int(payload.value)
|
||||
except ValueError as e:
|
||||
raise InvalidCursorError(f"value is not a valid timestamp: {e}") from e
|
||||
try:
|
||||
return _unix_micros_to_datetime(micros)
|
||||
except (OverflowError, OSError, ValueError) as e:
|
||||
# Crafted out-of-range microseconds (e.g. > datetime.MAX_YEAR) blow up
|
||||
# in fromtimestamp / datetime construction. Map to 400, not 500.
|
||||
raise InvalidCursorError(f"value is out of representable range: {e}") from e
|
||||
|
||||
|
||||
def decode_cursor_int(payload: Optional[CursorPayload]) -> int:
|
||||
"""Parse a cursor value as a base-10 integer."""
|
||||
if payload is None:
|
||||
raise InvalidCursorError("nil cursor payload")
|
||||
try:
|
||||
return int(payload.value)
|
||||
except ValueError as e:
|
||||
raise InvalidCursorError(f"value is not a valid integer: {e}") from e
|
||||
|
||||
|
||||
_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _datetime_to_unix_micros(t: datetime) -> int:
|
||||
"""Convert an aware UTC datetime to Unix microseconds (integer math)."""
|
||||
delta = t - _EPOCH
|
||||
return (delta.days * 86_400 + delta.seconds) * 1_000_000 + delta.microseconds
|
||||
|
||||
|
||||
def _unix_micros_to_datetime(micros: int) -> datetime:
|
||||
"""Convert Unix microseconds to a UTC datetime, preserving precision."""
|
||||
seconds, micro_remainder = divmod(micros, 1_000_000)
|
||||
return datetime.fromtimestamp(seconds, tz=timezone.utc).replace(microsecond=micro_remainder)
|
||||
@ -71,6 +71,7 @@ class AssetSummaryData:
|
||||
class ListAssetsResult:
|
||||
items: list[AssetSummaryData]
|
||||
total: int
|
||||
next_cursor: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
|
||||
58
openapi.yaml
58
openapi.yaml
@ -1517,6 +1517,22 @@ paths:
|
||||
schema:
|
||||
type: integer
|
||||
default: 0
|
||||
description: |
|
||||
Offset-based pagination. Cursor pagination via `after` is preferred
|
||||
for sequential walks (stable across concurrent inserts/deletes) but
|
||||
`offset` remains fully supported for random access (jump-to-page
|
||||
UIs, "showing items X–Y of N" displays). When both are supplied,
|
||||
`after` wins and `offset` is ignored.
|
||||
- name: after
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: |
|
||||
Opaque cursor for keyset pagination. Pass the `next_cursor` value
|
||||
from a previous response to fetch the next page. Stable across
|
||||
inserts/deletes between pages. Supported with `sort` values
|
||||
`created_at`, `updated_at`, `name`, and `size`. Malformed or
|
||||
unsupported cursors return 400 with `INVALID_CURSOR`.
|
||||
- name: include_tags
|
||||
in: query
|
||||
schema:
|
||||
@ -1575,6 +1591,12 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ListAssetsResponse"
|
||||
"400":
|
||||
description: Malformed query or cursor (e.g. `INVALID_CURSOR`)
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/AssetsApiError"
|
||||
post:
|
||||
operationId: createAsset
|
||||
tags: [assets]
|
||||
@ -6761,6 +6783,42 @@ components:
|
||||
type: integer
|
||||
has_more:
|
||||
type: boolean
|
||||
next_cursor:
|
||||
type: string
|
||||
description: |
|
||||
Opaque cursor to fetch the next page. Pass back as the `after`
|
||||
query parameter. Omitted when there are no more results.
|
||||
|
||||
AssetsApiError:
|
||||
type: object
|
||||
description: Error envelope returned by the assets API on 400 responses.
|
||||
required:
|
||||
- error
|
||||
properties:
|
||||
error:
|
||||
type: object
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
- details
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
description: |
|
||||
Machine-readable error code. `INVALID_CURSOR` is returned when the
|
||||
`after` cursor is malformed, oversized, or its sort field does
|
||||
not match the request's `sort`. `INVALID_QUERY` covers other
|
||||
Pydantic validation failures.
|
||||
enum: [INVALID_CURSOR, INVALID_QUERY]
|
||||
message:
|
||||
type: string
|
||||
details:
|
||||
type: object
|
||||
description: |
|
||||
Free-form, code-specific context. `INVALID_QUERY` populates this
|
||||
with an `errors` array of Pydantic validation entries;
|
||||
`INVALID_CURSOR` returns an empty object.
|
||||
additionalProperties: true
|
||||
|
||||
TagInfo:
|
||||
type: object
|
||||
|
||||
112
tests-unit/assets_test/queries/test_asset_reference_keyset.py
Normal file
112
tests-unit/assets_test/queries/test_asset_reference_keyset.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""Keyset-pagination tiebreaker tests for list_references_page.
|
||||
|
||||
When multiple rows share the same primary sort value (e.g. four assets
|
||||
created in the same microsecond), the secondary `ORDER BY id` is what keeps
|
||||
keyset pagination from losing or repeating rows. This file exercises that
|
||||
branch directly against an in-memory SQLite session — engineering identical
|
||||
timestamps via HTTP is unreliable enough that we work at the query layer.
|
||||
"""
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.models import Asset, AssetReference
|
||||
from app.assets.database.queries.asset_reference import list_references_page
|
||||
|
||||
|
||||
def _make_ref(session: Session, created_at: datetime, name: str, owner: str = "") -> AssetReference:
|
||||
asset = Asset(hash=f"blake3:{uuid.uuid4().hex}", size_bytes=1024)
|
||||
session.add(asset)
|
||||
session.flush()
|
||||
ref = AssetReference(
|
||||
id=str(uuid.uuid4()),
|
||||
asset_id=asset.id,
|
||||
owner_id=owner,
|
||||
name=name,
|
||||
file_path=f"/tmp/{name}",
|
||||
created_at=created_at,
|
||||
updated_at=created_at,
|
||||
last_access_time=created_at,
|
||||
is_missing=False,
|
||||
)
|
||||
session.add(ref)
|
||||
return ref
|
||||
|
||||
|
||||
@pytest.mark.parametrize("order", ["desc", "asc"])
|
||||
def test_tiebreaker_walks_duplicate_sort_values(session: Session, order: str):
|
||||
"""Four rows with the SAME created_at must paginate cleanly under cursor
|
||||
mode — no row dropped, no row repeated, despite the primary sort column
|
||||
being non-discriminating.
|
||||
"""
|
||||
shared_ts = datetime(2024, 5, 20, 12, 0, 0) # naive UTC, like the DB stores
|
||||
refs = [_make_ref(session, shared_ts, f"tie_{i}.png") for i in range(4)]
|
||||
session.commit()
|
||||
|
||||
expected_ids = sorted([r.id for r in refs], reverse=(order == "desc"))
|
||||
|
||||
# Walk the cursor by hand: page size 2, take 3 pages (2 + 2 + 0).
|
||||
seen: list[str] = []
|
||||
after_value = None
|
||||
after_id = None
|
||||
for _ in range(4): # generous loop bound; ought to be 2 iterations
|
||||
page, _tag_map, _total = list_references_page(
|
||||
session,
|
||||
limit=2,
|
||||
sort="created_at",
|
||||
order=order,
|
||||
after_cursor_value=after_value,
|
||||
after_cursor_id=after_id,
|
||||
)
|
||||
if not page:
|
||||
break
|
||||
seen.extend(p.id for p in page)
|
||||
# Use the last row's (created_at, id) as the next cursor input.
|
||||
last = page[-1]
|
||||
after_value, after_id = last.created_at, last.id
|
||||
if len(page) < 2:
|
||||
break
|
||||
|
||||
assert seen == expected_ids, (
|
||||
f"keyset tiebreaker failed for order={order}: expected {expected_ids}, got {seen}"
|
||||
)
|
||||
|
||||
|
||||
def test_tiebreaker_no_duplicates_under_mixed_collisions(session: Session):
|
||||
"""Some rows share a timestamp, some don't. The cursor must still walk
|
||||
every row exactly once regardless of where ties sit relative to a
|
||||
page boundary."""
|
||||
t1 = datetime(2024, 5, 20, 12, 0, 0)
|
||||
t2 = datetime(2024, 5, 20, 12, 0, 1)
|
||||
layout = [t1, t1, t1, t2, t2] # three rows at t1, two at t2
|
||||
refs = [_make_ref(session, ts, f"mix_{i}.png") for i, ts in enumerate(layout)]
|
||||
session.commit()
|
||||
|
||||
all_ids = {r.id for r in refs}
|
||||
seen_set: set[str] = set()
|
||||
seen_list: list[str] = []
|
||||
after_value = None
|
||||
after_id = None
|
||||
for _ in range(6):
|
||||
page, _, _ = list_references_page(
|
||||
session,
|
||||
limit=2,
|
||||
sort="created_at",
|
||||
order="desc",
|
||||
after_cursor_value=after_value,
|
||||
after_cursor_id=after_id,
|
||||
)
|
||||
if not page:
|
||||
break
|
||||
for p in page:
|
||||
assert p.id not in seen_set, f"duplicate row {p.id} appeared in cursor walk"
|
||||
seen_set.add(p.id)
|
||||
seen_list.append(p.id)
|
||||
last = page[-1]
|
||||
after_value, after_id = last.created_at, last.id
|
||||
if len(page) < 2:
|
||||
break
|
||||
|
||||
assert seen_set == all_ids, f"missing rows: expected {all_ids}, got {seen_set}"
|
||||
354
tests-unit/assets_test/services/test_cursor.py
Normal file
354
tests-unit/assets_test/services/test_cursor.py
Normal file
@ -0,0 +1,354 @@
|
||||
"""Tests for app.assets.services.cursor.
|
||||
|
||||
The byte-identity fixtures below pin the wire format so a parallel
|
||||
implementation in another runtime can mint exchange-compatible cursors
|
||||
for the same payload. Drift here would break frontend pagination against
|
||||
any compatible backend.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from app.assets.services.cursor import (
|
||||
MAX_CURSOR_ID_LENGTH,
|
||||
MAX_CURSOR_VALUE_LENGTH,
|
||||
MAX_ENCODED_CURSOR_LENGTH,
|
||||
CursorPayload,
|
||||
InvalidCursorError,
|
||||
decode_cursor,
|
||||
decode_cursor_int,
|
||||
decode_cursor_time,
|
||||
encode_cursor,
|
||||
encode_cursor_from_time,
|
||||
)
|
||||
|
||||
|
||||
ALLOWED = ("created_at", "updated_at", "name", "size")
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
@pytest.mark.parametrize(
|
||||
"sort_field, value, id",
|
||||
[
|
||||
("created_at", "1716200000000000", "a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7"),
|
||||
("size", "1024", "asset-123"),
|
||||
("name", "my-asset.png", "asset-abc"),
|
||||
("name", "résumé.txt", "asset-uni"),
|
||||
],
|
||||
)
|
||||
def test_encode_decode(self, sort_field, value, id):
|
||||
encoded = encode_cursor(sort_field, value, id)
|
||||
assert encoded != ""
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.sort_field == sort_field
|
||||
assert payload.value == value
|
||||
assert payload.id == id
|
||||
|
||||
|
||||
class TestTimeCursor:
|
||||
def test_microsecond_precision_preserved(self):
|
||||
# Pick a time with non-zero microseconds — encoding at ms would lose the µs.
|
||||
ts = datetime(2024, 5, 20, 12, 53, 20, 123456, tzinfo=timezone.utc)
|
||||
encoded = encode_cursor_from_time("created_at", ts, "id-1")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
# Value must be a microsecond integer string, not a millisecond one.
|
||||
assert payload.value == "1716209600123456"
|
||||
decoded = decode_cursor_time(payload)
|
||||
assert decoded == ts
|
||||
|
||||
def test_decode_returns_utc(self):
|
||||
payload = CursorPayload(sort_field="created_at", value="1716200000123456", id="id-1", order="desc")
|
||||
decoded = decode_cursor_time(payload)
|
||||
assert decoded.tzinfo == timezone.utc
|
||||
|
||||
def test_naive_datetime_rejected_on_encode(self):
|
||||
naive = datetime(2024, 5, 20, 12, 0, 0)
|
||||
with pytest.raises(ValueError):
|
||||
encode_cursor_from_time("created_at", naive, "id-1")
|
||||
|
||||
def test_non_integer_value_rejected_on_decode(self):
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor_time(CursorPayload("created_at", "not-a-number", "id-1", "desc"))
|
||||
|
||||
def test_none_payload_rejected(self):
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor_time(None)
|
||||
|
||||
def test_non_utc_aware_normalized(self):
|
||||
# Same instant, different timezone — must encode to the same micros.
|
||||
utc_ts = datetime(2024, 5, 20, 12, 0, 0, tzinfo=timezone.utc)
|
||||
offset_ts = utc_ts.astimezone(timezone(timedelta(hours=-5)))
|
||||
assert encode_cursor_from_time("created_at", utc_ts, "x") == encode_cursor_from_time(
|
||||
"created_at", offset_ts, "x"
|
||||
)
|
||||
|
||||
|
||||
class TestIntCursor:
|
||||
def test_decode_int(self):
|
||||
assert decode_cursor_int(CursorPayload("size", "1024", "id-1", "desc")) == 1024
|
||||
|
||||
def test_decode_int_rejects_non_int(self):
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor_int(CursorPayload("size", "abc", "id-1", "desc"))
|
||||
|
||||
def test_decode_int_rejects_none(self):
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor_int(None)
|
||||
|
||||
|
||||
class TestInvalidInputs:
|
||||
def test_oversized_cursor(self):
|
||||
oversized = "a" * (MAX_ENCODED_CURSOR_LENGTH + 1)
|
||||
with pytest.raises(InvalidCursorError, match="maximum length"):
|
||||
decode_cursor(oversized, ALLOWED)
|
||||
|
||||
def test_not_base64(self):
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor("not base64!!!", ALLOWED)
|
||||
|
||||
def test_not_json(self):
|
||||
encoded = base64.urlsafe_b64encode(b"definitely not json").rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_empty_id(self):
|
||||
# Encoder rejects empty id symmetrically with the decoder, so build the
|
||||
# payload manually to exercise the decoder's missing-id branch.
|
||||
raw = b'{"s":"created_at","v":"1","id":"","o":"desc"}'
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="missing id"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_oversized_id(self):
|
||||
# Encoder enforces the cap symmetrically; hand-build to exercise decode.
|
||||
big_id = "a" * (MAX_CURSOR_ID_LENGTH + 1)
|
||||
raw = ('{"s":"created_at","v":"1","id":"' + big_id + '","o":"desc"}').encode("ascii")
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_oversized_value(self):
|
||||
# Encoder enforces the cap symmetrically; hand-build to exercise decode.
|
||||
big_v = "v" * (MAX_CURSOR_VALUE_LENGTH + 1)
|
||||
raw = ('{"s":"created_at","v":"' + big_v + '","id":"id-1","o":"desc"}').encode("ascii")
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_unsupported_sort_field(self):
|
||||
encoded = encode_cursor("execution_time", "1", "id-1")
|
||||
with pytest.raises(InvalidCursorError, match="unsupported sort field"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_no_allowed_fields_rejects_everything(self):
|
||||
encoded = encode_cursor("created_at", "1", "id-1")
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor(encoded, ())
|
||||
|
||||
def test_non_dict_payload_rejected(self):
|
||||
encoded = base64.urlsafe_b64encode(b'["array","not","dict"]').rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="expected object"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
|
||||
class TestEncodeAtCapsFits:
|
||||
def test_max_field_lengths_fit_wire_cap(self):
|
||||
# Worst-case payload: value and id at their per-field caps, with a long
|
||||
# sort field name. The encoded cursor must fit within MAX_ENCODED_CURSOR_LENGTH
|
||||
# so the wire cap cannot reject a cursor the encoder mints at the per-field caps.
|
||||
value = "v" * MAX_CURSOR_VALUE_LENGTH
|
||||
id = "i" * MAX_CURSOR_ID_LENGTH
|
||||
sort_field = "very_long_sort_field_name"
|
||||
|
||||
encoded = encode_cursor(sort_field, value, id)
|
||||
assert len(encoded) <= MAX_ENCODED_CURSOR_LENGTH
|
||||
payload = decode_cursor(encoded, (sort_field,))
|
||||
assert payload.value == value
|
||||
assert payload.id == id
|
||||
|
||||
|
||||
class TestDatetimeOverflow:
|
||||
"""Crafted cursors with extreme micros must map to InvalidCursorError,
|
||||
not OverflowError/OSError leaking as 500.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"micros_str",
|
||||
[
|
||||
"999999999999999999999", # 10^21 µs — past datetime.MAX_YEAR by ~14 orders
|
||||
"-999999999999999999999", # symmetric negative — pre-epoch overflow
|
||||
],
|
||||
)
|
||||
def test_out_of_range_micros_rejected(self, micros_str):
|
||||
encoded = encode_cursor("created_at", micros_str, "asset-x")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
with pytest.raises(InvalidCursorError):
|
||||
decode_cursor_time(payload)
|
||||
|
||||
|
||||
class TestEncoderDecoderSymmetry:
|
||||
"""The encoder must reject inputs the decoder rejects, or the same server
|
||||
will mint a cursor it then 400s on the next request.
|
||||
"""
|
||||
|
||||
def test_long_name_within_cap_round_trips(self):
|
||||
"""Assets allow names up to 512 chars (`String(512)`); the cursor
|
||||
encoder must round-trip a value at that cap so a freshly minted
|
||||
cursor never fails decode on the next request."""
|
||||
long_name = "n" * MAX_CURSOR_VALUE_LENGTH
|
||||
encoded = encode_cursor("name", long_name, "asset-x")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == long_name
|
||||
|
||||
def test_encoder_rejects_empty_id(self):
|
||||
with pytest.raises(InvalidCursorError, match="id must be non-empty"):
|
||||
encode_cursor("created_at", "1", "")
|
||||
|
||||
def test_encoder_rejects_oversized_id(self):
|
||||
with pytest.raises(InvalidCursorError, match="id exceeds maximum length"):
|
||||
encode_cursor("created_at", "1", "a" * (MAX_CURSOR_ID_LENGTH + 1))
|
||||
|
||||
def test_encoder_rejects_oversized_value(self):
|
||||
with pytest.raises(InvalidCursorError, match="value exceeds maximum length"):
|
||||
encode_cursor("name", "v" * (MAX_CURSOR_VALUE_LENGTH + 1), "id-1")
|
||||
|
||||
def test_encoder_rejects_multibyte_value_over_wire_cap(self):
|
||||
"""A value that passes the char-count cap can still inflate past the
|
||||
wire cap once UTF-8-encoded. Asset name made of 512 × multibyte
|
||||
characters (e.g. 'é' = 2 bytes) must be rejected at encode time, not
|
||||
minted into a cursor the next request will 400."""
|
||||
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
|
||||
encode_cursor("name", "é" * MAX_CURSOR_VALUE_LENGTH, "asset-multibyte")
|
||||
|
||||
def test_encoder_rejects_escape_heavy_value_over_wire_cap(self):
|
||||
"""Same wire-cap concern via escape expansion: each `<` serializes to
|
||||
the six-byte sequence `\\u003c`, so 512 of them blow past the encoded
|
||||
cap even though the raw char count is within the per-field limit."""
|
||||
with pytest.raises(InvalidCursorError, match="encoded cursor exceeds maximum length"):
|
||||
encode_cursor("name", "<" * MAX_CURSOR_VALUE_LENGTH, "asset-escape")
|
||||
|
||||
|
||||
class TestOrderBinding:
|
||||
def test_order_baked_into_payload(self):
|
||||
encoded = encode_cursor("created_at", "1", "id-1", order="asc")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.order == "asc"
|
||||
|
||||
def test_mismatched_order_rejected(self):
|
||||
encoded = encode_cursor("created_at", "1", "id-1", order="desc")
|
||||
with pytest.raises(InvalidCursorError, match="does not match request order"):
|
||||
decode_cursor(encoded, ALLOWED, expected_order="asc")
|
||||
|
||||
def test_matching_order_accepted(self):
|
||||
encoded = encode_cursor("created_at", "1", "id-1", order="desc")
|
||||
payload = decode_cursor(encoded, ALLOWED, expected_order="desc")
|
||||
assert payload.order == "desc"
|
||||
|
||||
def test_invalid_order_token_rejected_on_encode(self):
|
||||
with pytest.raises(ValueError):
|
||||
encode_cursor("created_at", "1", "id-1", order="sideways")
|
||||
|
||||
def test_invalid_order_token_rejected_on_decode(self):
|
||||
# Hand-craft a payload with an illegal `o` value.
|
||||
raw = b'{"s":"name","v":"x","id":"id-1","o":"sideways"}'
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="unsupported order"):
|
||||
decode_cursor(encoded, ALLOWED)
|
||||
|
||||
def test_cursor_without_order_rejected(self):
|
||||
"""`o` is mandatory. A cursor minted without it is rejected as
|
||||
malformed rather than silently walking the keyset in whatever
|
||||
direction the request happens to ask for."""
|
||||
raw = b'{"s":"name","v":"x","id":"id-1"}'
|
||||
encoded = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
with pytest.raises(InvalidCursorError, match="missing or non-string o"):
|
||||
decode_cursor(encoded, ALLOWED, expected_order="desc")
|
||||
|
||||
|
||||
class TestHtmlSignificantCharEscaping:
|
||||
"""An asset name containing `<`, `>`, `&`, U+2028, or U+2029 must encode
|
||||
to the same escaped wire bytes as any compatible implementation of the
|
||||
same payload format. Drift here breaks cross-runtime byte-identity for
|
||||
those characters.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value, escaped_substring",
|
||||
[
|
||||
("foo<bar>.png", "\\u003c"), # `<` escaped
|
||||
("foo<bar>.png", "\\u003e"), # `>` escaped
|
||||
("foo&bar.png", "\\u0026"),
|
||||
("foo
bar.png", "\\u2028"), # JS line separator
|
||||
("foo
bar.png", "\\u2029"), # JS paragraph separator
|
||||
],
|
||||
)
|
||||
def test_html_significant_chars_escaped(self, value, escaped_substring):
|
||||
encoded = encode_cursor("name", value, "id-1")
|
||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
||||
assert escaped_substring in decoded_bytes.decode("ascii"), (
|
||||
f"Expected {escaped_substring!r} in serialized payload, got: {decoded_bytes!r}"
|
||||
)
|
||||
|
||||
def test_value_round_trips_through_escape(self):
|
||||
"""Encoding then decoding a value with `<>&` should yield the original
|
||||
string — the escape only affects the wire form, not the decoded value."""
|
||||
original = "foo<&>bar.png"
|
||||
encoded = encode_cursor("name", original, "id-1")
|
||||
payload = decode_cursor(encoded, ALLOWED)
|
||||
assert payload.value == original
|
||||
|
||||
|
||||
class TestByteIdentityFixtures:
|
||||
"""Pin the wire format so it doesn't drift silently.
|
||||
|
||||
These fixtures assert exact byte equality of the encoded JSON payload —
|
||||
a change in key order, escape choice, separator whitespace, or anything
|
||||
else that shifts a byte fails the test loudly rather than diverging
|
||||
silently from any external consumer of the same payload format.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sort_field, value, id, order, expected_payload",
|
||||
[
|
||||
(
|
||||
"created_at",
|
||||
"1716200000000000",
|
||||
"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7",
|
||||
"desc",
|
||||
'{"s":"created_at","v":"1716200000000000","id":"a1b2c3d4-e5f6-7a89-b0c1-d2e3f4a5b6c7","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"size",
|
||||
"1024",
|
||||
"asset-123",
|
||||
"asc",
|
||||
'{"s":"size","v":"1024","id":"asset-123","o":"asc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"my-asset.png",
|
||||
"asset-abc",
|
||||
"desc",
|
||||
'{"s":"name","v":"my-asset.png","id":"asset-abc","o":"desc"}',
|
||||
),
|
||||
(
|
||||
"name",
|
||||
"foo<bar>&baz.png",
|
||||
"asset-html",
|
||||
"desc",
|
||||
# `<`, `>`, `&` escape to <, >, & in the value.
|
||||
'{"s":"name","v":"foo\\u003cbar\\u003e\\u0026baz.png","id":"asset-html","o":"desc"}',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_encoded_payload_shape_pinned(self, sort_field, value, id, order, expected_payload):
|
||||
encoded = encode_cursor(sort_field, value, id, order=order)
|
||||
decoded_bytes = base64.urlsafe_b64decode(encoded + "=" * (-len(encoded) % 4))
|
||||
assert decoded_bytes.decode("utf-8") == expected_payload, (
|
||||
f"wire format drifted for sort={sort_field!r}, value={value!r}:\n"
|
||||
f" expected: {expected_payload!r}\n"
|
||||
f" actual: {decoded_bytes.decode('utf-8')!r}"
|
||||
)
|
||||
349
tests-unit/assets_test/test_list_cursor.py
Normal file
349
tests-unit/assets_test/test_list_cursor.py
Normal file
@ -0,0 +1,349 @@
|
||||
"""Integration tests for cursor-based pagination on GET /api/assets.
|
||||
|
||||
These tests exercise the handler/service/query path end-to-end;
|
||||
cursor-encoding-level tests live in
|
||||
tests-unit/assets_test/services/test_cursor.py.
|
||||
"""
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
|
||||
def _seed(asset_factory, make_asset_bytes, count: int, tag: str) -> list[str]:
|
||||
names = [f"cursor_{i:02d}.safetensors" for i in range(count)]
|
||||
for n in names:
|
||||
asset_factory(
|
||||
n,
|
||||
["models", "checkpoints", "unit-tests", tag],
|
||||
{},
|
||||
make_asset_bytes(n, size=2048),
|
||||
)
|
||||
return sorted(names)
|
||||
|
||||
|
||||
def test_cursor_pages_all_items_in_order(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
names = _seed(asset_factory, make_asset_bytes, count=5, tag="cursor-walk")
|
||||
|
||||
params = {
|
||||
"include_tags": "unit-tests,cursor-walk",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "2",
|
||||
}
|
||||
|
||||
seen: list[str] = []
|
||||
after: str | None = None
|
||||
pages = 0
|
||||
while True:
|
||||
page_params = dict(params)
|
||||
if after is not None:
|
||||
page_params["after"] = after
|
||||
r = http.get(api_base + "/api/assets", params=page_params, timeout=120)
|
||||
assert r.status_code == 200, r.text
|
||||
body = r.json()
|
||||
seen.extend(a["name"] for a in body["assets"])
|
||||
pages += 1
|
||||
after = body.get("next_cursor")
|
||||
if after is None:
|
||||
break
|
||||
assert body["has_more"] is True
|
||||
assert pages < 10, "guard against runaway cursor loop"
|
||||
|
||||
assert seen == names, f"expected {names}, got {seen}"
|
||||
# Last page should have has_more False
|
||||
assert body["has_more"] is False
|
||||
assert "next_cursor" not in body
|
||||
|
||||
|
||||
def test_cursor_invalid_returns_400(http: requests.Session, api_base: str):
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"after": "not-a-real-cursor", "sort": "created_at"},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 400, r.text
|
||||
body = r.json()
|
||||
assert body["error"]["code"] == "INVALID_CURSOR"
|
||||
|
||||
|
||||
def test_cursor_sort_mismatch_returns_400(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
_seed(asset_factory, make_asset_bytes, count=2, tag="cursor-mismatch")
|
||||
|
||||
# Take a real cursor minted for sort=name.
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-mismatch",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "1",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
cursor = r.json()["next_cursor"]
|
||||
assert cursor is not None
|
||||
|
||||
# Replay against sort=created_at — should fail with INVALID_CURSOR.
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"after": cursor, "sort": "created_at"},
|
||||
timeout=120,
|
||||
)
|
||||
assert r2.status_code == 400, r2.text
|
||||
assert r2.json()["error"]["code"] == "INVALID_CURSOR"
|
||||
|
||||
|
||||
def test_cursor_wins_over_offset(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
names = _seed(asset_factory, make_asset_bytes, count=4, tag="cursor-vs-offset")
|
||||
|
||||
# Take a cursor that points past the first item.
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-vs-offset",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "1",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
cursor = r.json()["next_cursor"]
|
||||
assert cursor is not None
|
||||
|
||||
# Pass both 'after' and a large offset. Cursor must win; offset is ignored.
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-vs-offset",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "1",
|
||||
"after": cursor,
|
||||
"offset": "999",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r2.status_code == 200
|
||||
body = r2.json()
|
||||
# Should land on the second name in sorted order — not skip ahead by 999.
|
||||
assert [a["name"] for a in body["assets"]] == [names[1]]
|
||||
|
||||
|
||||
def test_next_cursor_absent_when_no_more_results(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
_seed(asset_factory, make_asset_bytes, count=2, tag="cursor-exhaust")
|
||||
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-exhaust",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "50",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
body = r.json()
|
||||
assert body["has_more"] is False
|
||||
assert "next_cursor" not in body
|
||||
|
||||
|
||||
def test_cursor_pagination_first_page_mints_cursor(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
"""First-page request (no `after`) must still return `next_cursor` when
|
||||
more rows exist, or pagination is unreachable from a cold start.
|
||||
"""
|
||||
_seed(asset_factory, make_asset_bytes, count=3, tag="cursor-first-page")
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": "unit-tests,cursor-first-page", "sort": "name", "order": "asc", "limit": "2"},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
body = r.json()
|
||||
assert body["has_more"] is True
|
||||
assert body.get("next_cursor"), "first page must mint a cursor when more rows exist"
|
||||
|
||||
|
||||
def test_cursor_no_spurious_cursor_when_page_size_equals_remainder(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
"""When `total` is an exact multiple of `limit`, the final page must
|
||||
NOT carry a next_cursor — there is nothing past it.
|
||||
"""
|
||||
_seed(asset_factory, make_asset_bytes, count=4, tag="cursor-exact-multiple")
|
||||
# Page 1
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": "unit-tests,cursor-exact-multiple", "sort": "name", "order": "asc", "limit": "2"},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
cursor = r.json()["next_cursor"]
|
||||
assert cursor is not None
|
||||
# Page 2 — should exhaust the set with no cursor for a phantom page 3
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"include_tags": "unit-tests,cursor-exact-multiple", "sort": "name", "order": "asc", "limit": "2", "after": cursor},
|
||||
timeout=120,
|
||||
)
|
||||
assert r2.status_code == 200, r2.text
|
||||
body = r2.json()
|
||||
assert len(body["assets"]) == 2
|
||||
assert body["has_more"] is False
|
||||
assert "next_cursor" not in body
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sort_field", ["created_at", "updated_at", "size"])
|
||||
def test_cursor_walks_for_non_name_sorts(sort_field, http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
"""Cursor pagination must work for every sort field the contract claims.
|
||||
|
||||
Without this, the `created_at` / `updated_at` (time-encoded micros) and
|
||||
`size` (int-encoded) cursor paths go entirely unexercised end-to-end.
|
||||
"""
|
||||
# Sizes increase strictly by index, so `size desc` has a deterministic
|
||||
# expected order. Time-based sorts (created_at / updated_at) can tie when
|
||||
# rows are inserted faster than the DB's timestamp resolution; for those
|
||||
# we check coverage and no-duplicates and let the keyset tiebreaker do
|
||||
# the rest, instead of sleeping between inserts and asserting an order
|
||||
# that depends on clock granularity.
|
||||
names = []
|
||||
for i in range(4):
|
||||
n = f"cursor_{sort_field}_{i:02d}.safetensors"
|
||||
asset_factory(n, ["models", "checkpoints", "unit-tests", f"cursor-{sort_field}"], {}, make_asset_bytes(n, size=2048 + i))
|
||||
names.append(n)
|
||||
|
||||
params = {
|
||||
"include_tags": f"unit-tests,cursor-{sort_field}",
|
||||
"sort": sort_field,
|
||||
"order": "desc",
|
||||
"limit": "2",
|
||||
}
|
||||
seen: list[str] = []
|
||||
after: str | None = None
|
||||
pages = 0
|
||||
while True:
|
||||
page_params = dict(params)
|
||||
if after is not None:
|
||||
page_params["after"] = after
|
||||
r = http.get(api_base + "/api/assets", params=page_params, timeout=120)
|
||||
assert r.status_code == 200, r.text
|
||||
body = r.json()
|
||||
seen.extend(a["name"] for a in body["assets"])
|
||||
after = body.get("next_cursor")
|
||||
pages += 1
|
||||
if after is None:
|
||||
break
|
||||
assert pages < 10, "guard against runaway cursor loop"
|
||||
|
||||
# No duplicates: a faulty keyset boundary that returns the same row across
|
||||
# two pages must fail this check.
|
||||
assert len(seen) == len(set(seen)), (
|
||||
f"cursor walk repeated rows for sort={sort_field}: {seen}"
|
||||
)
|
||||
# Full coverage: every seeded asset reached exactly once.
|
||||
assert set(seen) == set(names), (
|
||||
f"missing items for sort={sort_field}: expected {set(names)}, got {set(seen)}"
|
||||
)
|
||||
# Strict order check for the only field with a clock-independent ordering.
|
||||
if sort_field == "size":
|
||||
assert seen == list(reversed(names)), (
|
||||
f"size cursor walked out of order: got {seen}, expected {list(reversed(names))}"
|
||||
)
|
||||
|
||||
|
||||
def test_cursor_order_mismatch_returns_400(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
"""A cursor minted under desc order replayed against asc must 400, not
|
||||
silently walk the wrong direction."""
|
||||
_seed(asset_factory, make_asset_bytes, count=3, tag="cursor-order-flip")
|
||||
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-order-flip",
|
||||
"sort": "name",
|
||||
"order": "desc",
|
||||
"limit": "1",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
cursor = r.json()["next_cursor"]
|
||||
assert cursor is not None
|
||||
|
||||
# Replay with order flipped to asc — server must reject the cursor.
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-order-flip",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "1",
|
||||
"after": cursor,
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r2.status_code == 400, r2.text
|
||||
assert r2.json()["error"]["code"] == "INVALID_CURSOR"
|
||||
|
||||
|
||||
def test_cursor_invalid_cursor_at_microsecond_boundary(http: requests.Session, api_base: str):
|
||||
"""A cursor carrying an out-of-range microsecond timestamp must map to
|
||||
400 INVALID_CURSOR, not 500."""
|
||||
import base64
|
||||
import json
|
||||
# 10^18 microseconds ≈ year 33658, well past datetime.MAX_YEAR.
|
||||
# `o` and `order=` must be set; otherwise decode fails earlier on the
|
||||
# missing-order branch and the µs-overflow path is never exercised.
|
||||
payload = {"s": "created_at", "o": "desc", "v": "999999999999999999999", "id": "asset-x"}
|
||||
raw = json.dumps(payload, separators=(",", ":")).encode("utf-8")
|
||||
cursor = base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={"after": cursor, "sort": "created_at", "order": "desc"},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 400, r.text
|
||||
assert r.json()["error"]["code"] == "INVALID_CURSOR"
|
||||
|
||||
|
||||
def test_cursor_pagination_stable_after_delete(http: requests.Session, api_base: str, asset_factory, make_asset_bytes):
|
||||
names = _seed(asset_factory, make_asset_bytes, count=4, tag="cursor-delete")
|
||||
|
||||
# Page 1.
|
||||
r = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-delete",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "2",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
page1_names = [a["name"] for a in body["assets"]]
|
||||
cursor = body["next_cursor"]
|
||||
assert cursor is not None
|
||||
assert page1_names == names[:2]
|
||||
|
||||
# Delete an item from page 1 (already returned) — cursor should still
|
||||
# locate the next page from where it was minted, not re-index.
|
||||
target_id = body["assets"][0]["id"]
|
||||
d = http.delete(api_base + f"/api/assets/{target_id}", timeout=120)
|
||||
assert d.status_code in (200, 204), d.text
|
||||
|
||||
# Page 2 via cursor.
|
||||
r2 = http.get(
|
||||
api_base + "/api/assets",
|
||||
params={
|
||||
"include_tags": "unit-tests,cursor-delete",
|
||||
"sort": "name",
|
||||
"order": "asc",
|
||||
"limit": "2",
|
||||
"after": cursor,
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert r2.status_code == 200, r2.text
|
||||
body2 = r2.json()
|
||||
assert [a["name"] for a in body2["assets"]] == names[2:]
|
||||
Loading…
Reference in New Issue
Block a user