fix(assets): stagger added_at in set_reference_tags + add ordering tests

Cursor-reviews follow-up on PR #13994:

1. set_reference_tags / add_tags_to_reference now apply the same
   microsecond stagger as batch_insert_seed_assets. Per-tag get_utc_now()
   calls can collide at microsecond resolution on fast machines, dropping
   retrieval to the tag_name alphabetical tiebreaker. Using a single
   base_ts + timedelta(microseconds=i) preserves insertion order for any
   batch.

2. Docstring on get_name_and_tags_from_asset_path corrected: only the
   subpath is lowercased in code; the root category is lowercase by
   construction in get_asset_category_and_relative_path.

3. resolve_destination_from_tags docstring now states explicitly that
   hybrid shapes (mix of legacy multi-tag + new slash-joined within a
   single call) are accepted and resolve to the same destination.

4. New TestTagRetrievalOrder class in test_asset_info.py exercises the
   public write paths (set_reference_tags, add_tags_to_reference,
   remove_tags_from_reference) and asserts the public read paths
   (list_references_page, fetch_reference_asset_and_tags) return tags
   in insertion order rather than alphabetical. Tag names are chosen
   to fail loudly under alphabetical regression — "checkpoints" sorts
   before "models", "aaa-user-tag" sorts before every path tag, etc.

Full assets suite: 338 passed, 10 pre-existing skipped.
This commit is contained in:
Matt Miller 2026-05-19 21:05:47 -07:00
parent 19ba85bb2e
commit 7ff001d7c8
3 changed files with 128 additions and 13 deletions

View File

@ -1,4 +1,5 @@
from dataclasses import dataclass from dataclasses import dataclass
from datetime import timedelta
from typing import Iterable, Sequence from typing import Iterable, Sequence
import sqlalchemy as sa import sqlalchemy as sa
@ -98,15 +99,21 @@ def set_reference_tags(
if to_add: if to_add:
ensure_tags_exist(session, to_add, tag_type="user") ensure_tags_exist(session, to_add, tag_type="user")
# Stagger added_at by microsecond per tag so the retrieval ORDER BY
# added_at preserves input order. Per-tag get_utc_now() calls can
# collide at microsecond resolution on fast machines, dropping the
# query to the tag_name alphabetical tiebreaker — same fix as in
# batch_insert_seed_assets.
base_ts = get_utc_now()
session.add_all( session.add_all(
[ [
AssetReferenceTag( AssetReferenceTag(
asset_reference_id=reference_id, asset_reference_id=reference_id,
tag_name=t, tag_name=t,
origin=origin, origin=origin,
added_at=get_utc_now(), added_at=base_ts + timedelta(microseconds=i),
) )
for t in to_add for i, t in enumerate(to_add)
] ]
) )
session.flush() session.flush()
@ -150,6 +157,8 @@ def add_tags_to_reference(
to_add = sorted(want - current) to_add = sorted(want - current)
if to_add: if to_add:
# See set_reference_tags for the rationale behind the per-tag stagger.
base_ts = get_utc_now()
with session.begin_nested() as nested: with session.begin_nested() as nested:
try: try:
session.add_all( session.add_all(
@ -158,9 +167,9 @@ def add_tags_to_reference(
asset_reference_id=reference_id, asset_reference_id=reference_id,
tag_name=t, tag_name=t,
origin=origin, origin=origin,
added_at=get_utc_now(), added_at=base_ts + timedelta(microseconds=i),
) )
for t in to_add for i, t in enumerate(to_add)
] ]
) )
session.flush() session.flush()

View File

@ -31,9 +31,13 @@ def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]:
Accepts both the legacy one-tag-per-directory shape Accepts both the legacy one-tag-per-directory shape
(``["models", "diffusers", "Kolors", "text_encoder"]``) and the (``["models", "diffusers", "Kolors", "text_encoder"]``) and the
slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path` slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path`
(``["models", "diffusers/Kolors/text_encoder"]``). Either shape is (``["models", "diffusers/Kolors/text_encoder"]``). Hybrid shapes that
split into a category (for ``models``) plus subdirs, with the same mix the two within a single call (e.g.
safety checks applied to each component. ``["models", "diffusers", "Kolors/text_encoder"]``) are also
accepted: each entry after ``tags[0]`` is split on ``/`` and
concatenated, so the two shapes and any mix of them resolve to
the same destination. The same safety checks are applied to each
component after expansion.
""" """
if not tags: if not tags:
raise ValueError("tags must not be empty") raise ValueError("tags must not be empty")
@ -186,12 +190,14 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
consumers can use ``tags[1]`` as a stable category identifier that consumers can use ``tags[1]`` as a stable category identifier that
survives nested directory layouts (e.g. diffusers components). survives nested directory layouts (e.g. diffusers components).
Both the root category and the subpath are lowercased to match the The subpath is lowercased to match the canonicalization applied by
canonicalization applied by :func:`ensure_tags_exist`, otherwise the :func:`ensure_tags_exist`; without that, the
``asset_reference_tags.tag_name`` FK to the lowercased ``asset_reference_tags.tag_name`` FK to the lowercased ``tags.name``
``tags.name`` would fail for any path containing uppercase letters. would fail for any path containing uppercase letters. The root
Consumers that need to look up providers keyed on original-case category is lowercase by construction in
paths should normalize their lookup key to lowercase. :func:`get_asset_category_and_relative_path`, so no separate cast
is applied here. Consumers that need to look up providers keyed on
original-case paths should normalize their lookup key to lowercase.
Raises: Raises:
ValueError: path does not belong to any known root. ValueError: path does not belong to any known root.

View File

@ -21,6 +21,7 @@ from app.assets.database.queries import (
get_reference_ids_by_ids, get_reference_ids_by_ids,
ensure_tags_exist, ensure_tags_exist,
add_tags_to_reference, add_tags_to_reference,
set_reference_tags,
) )
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
@ -159,6 +160,105 @@ class TestListReferencesPage:
assert refs[0].name == "large" assert refs[0].name == "large"
class TestTagRetrievalOrder:
"""End-to-end check: tags written through the public write paths come
back from the public read paths in insertion order rather than the
composite-PK alphabetical order SQLite would otherwise impose.
Each test deliberately picks tag names that would sort differently
under alphabetical vs insertion order, so an alphabetical regression
fails loudly.
"""
def _make_ref(self, session: Session) -> AssetReference:
asset = _make_asset(session, "h1")
return _make_reference(session, asset, name="x.bin")
def test_set_reference_tags_preserves_input_order_in_list(self, session: Session):
ref = self._make_ref(session)
# "checkpoints" < "models" alphabetically; if added_at stagger
# works, list_references_page returns insertion order.
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
session.commit()
_, tag_map, _ = list_references_page(session)
assert tag_map[ref.id] == ["models", "checkpoints"]
def test_set_reference_tags_preserves_input_order_in_fetch(self, session: Session):
ref = self._make_ref(session)
# Subpath tag sorts before "models" alphabetically.
set_reference_tags(
session,
reference_id=ref.id,
tags=["models", "diffusers/kolors/text_encoder"],
)
session.commit()
result = fetch_reference_asset_and_tags(session, ref.id)
assert result is not None
_, _, tags = result
assert tags == ["models", "diffusers/kolors/text_encoder"]
def test_add_tags_to_reference_lands_after_path_tags(self, session: Session):
ref = self._make_ref(session)
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
session.commit()
# "aaa-..." sorts before both path tags alphabetically. If added_at
# stagger is missing, alphabetic tiebreak would hoist it to tags[0].
add_tags_to_reference(
session, reference_id=ref.id, tags=["aaa-user-tag"], origin="manual"
)
session.commit()
_, tag_map, _ = list_references_page(session)
assert tag_map[ref.id] == ["models", "checkpoints", "aaa-user-tag"]
def test_multi_tag_batch_lands_after_path_tags(self, session: Session):
ref = self._make_ref(session)
set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"])
session.commit()
# Three user tags inserted in non-alphabetical input order. Per-tag
# microsecond stagger should preserve at least the "user batch is
# after path tags" property; within the user batch insertion order
# is also preserved.
add_tags_to_reference(
session,
reference_id=ref.id,
tags=["zzz-z", "favorite", "experiment-q4"],
origin="manual",
)
session.commit()
_, tag_map, _ = list_references_page(session)
tags = tag_map[ref.id]
assert tags[0:2] == ["models", "checkpoints"]
assert set(tags[2:]) == {"zzz-z", "favorite", "experiment-q4"}
def test_remove_then_add_does_not_disrupt_path_tag_positions(
self, session: Session
):
ref = self._make_ref(session)
set_reference_tags(
session,
reference_id=ref.id,
tags=["models", "loras/my/custom/path"],
)
session.commit()
add_tags_to_reference(session, reference_id=ref.id, tags=["temp-tag"])
session.commit()
from app.assets.database.queries import remove_tags_from_reference
remove_tags_from_reference(session, reference_id=ref.id, tags=["temp-tag"])
session.commit()
add_tags_to_reference(session, reference_id=ref.id, tags=["second-tag"])
session.commit()
_, tag_map, _ = list_references_page(session)
assert tag_map[ref.id] == ["models", "loras/my/custom/path", "second-tag"]
class TestFetchReferenceAssetAndTags: class TestFetchReferenceAssetAndTags:
def test_returns_none_for_nonexistent(self, session: Session): def test_returns_none_for_nonexistent(self, session: Session):
result = fetch_reference_asset_and_tags(session, "nonexistent") result = fetch_reference_asset_and_tags(session, "nonexistent")