diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index f4126dba8..2ea4b187e 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import timedelta from typing import Iterable, Sequence import sqlalchemy as sa @@ -98,15 +99,21 @@ def set_reference_tags( if to_add: ensure_tags_exist(session, to_add, tag_type="user") + # Stagger added_at by microsecond per tag so the retrieval ORDER BY + # added_at preserves input order. Per-tag get_utc_now() calls can + # collide at microsecond resolution on fast machines, dropping the + # query to the tag_name alphabetical tiebreaker — same fix as in + # batch_insert_seed_assets. + base_ts = get_utc_now() session.add_all( [ AssetReferenceTag( asset_reference_id=reference_id, tag_name=t, origin=origin, - added_at=get_utc_now(), + added_at=base_ts + timedelta(microseconds=i), ) - for t in to_add + for i, t in enumerate(to_add) ] ) session.flush() @@ -150,6 +157,8 @@ def add_tags_to_reference( to_add = sorted(want - current) if to_add: + # See set_reference_tags for the rationale behind the per-tag stagger. + base_ts = get_utc_now() with session.begin_nested() as nested: try: session.add_all( @@ -158,9 +167,9 @@ def add_tags_to_reference( asset_reference_id=reference_id, tag_name=t, origin=origin, - added_at=get_utc_now(), + added_at=base_ts + timedelta(microseconds=i), ) - for t in to_add + for i, t in enumerate(to_add) ] ) session.flush() diff --git a/app/assets/services/path_utils.py b/app/assets/services/path_utils.py index d1732a250..c85bd95d8 100644 --- a/app/assets/services/path_utils.py +++ b/app/assets/services/path_utils.py @@ -31,9 +31,13 @@ def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]: Accepts both the legacy one-tag-per-directory shape (``["models", "diffusers", "Kolors", "text_encoder"]``) and the slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path` - (``["models", "diffusers/Kolors/text_encoder"]``). Either shape is - split into a category (for ``models``) plus subdirs, with the same - safety checks applied to each component. + (``["models", "diffusers/Kolors/text_encoder"]``). Hybrid shapes that + mix the two within a single call (e.g. + ``["models", "diffusers", "Kolors/text_encoder"]``) are also + accepted: each entry after ``tags[0]`` is split on ``/`` and + concatenated, so the two shapes — and any mix of them — resolve to + the same destination. The same safety checks are applied to each + component after expansion. """ if not tags: raise ValueError("tags must not be empty") @@ -186,12 +190,14 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: consumers can use ``tags[1]`` as a stable category identifier that survives nested directory layouts (e.g. diffusers components). - Both the root category and the subpath are lowercased to match the - canonicalization applied by :func:`ensure_tags_exist`, otherwise the - ``asset_reference_tags.tag_name`` FK to the lowercased - ``tags.name`` would fail for any path containing uppercase letters. - Consumers that need to look up providers keyed on original-case - paths should normalize their lookup key to lowercase. + The subpath is lowercased to match the canonicalization applied by + :func:`ensure_tags_exist`; without that, the + ``asset_reference_tags.tag_name`` FK to the lowercased ``tags.name`` + would fail for any path containing uppercase letters. The root + category is lowercase by construction in + :func:`get_asset_category_and_relative_path`, so no separate cast + is applied here. Consumers that need to look up providers keyed on + original-case paths should normalize their lookup key to lowercase. Raises: ValueError: path does not belong to any known root. diff --git a/tests-unit/assets_test/queries/test_asset_info.py b/tests-unit/assets_test/queries/test_asset_info.py index fe510e342..784030ba4 100644 --- a/tests-unit/assets_test/queries/test_asset_info.py +++ b/tests-unit/assets_test/queries/test_asset_info.py @@ -21,6 +21,7 @@ from app.assets.database.queries import ( get_reference_ids_by_ids, ensure_tags_exist, add_tags_to_reference, + set_reference_tags, ) from app.assets.helpers import get_utc_now @@ -159,6 +160,105 @@ class TestListReferencesPage: assert refs[0].name == "large" +class TestTagRetrievalOrder: + """End-to-end check: tags written through the public write paths come + back from the public read paths in insertion order rather than the + composite-PK alphabetical order SQLite would otherwise impose. + + Each test deliberately picks tag names that would sort differently + under alphabetical vs insertion order, so an alphabetical regression + fails loudly. + """ + + def _make_ref(self, session: Session) -> AssetReference: + asset = _make_asset(session, "h1") + return _make_reference(session, asset, name="x.bin") + + def test_set_reference_tags_preserves_input_order_in_list(self, session: Session): + ref = self._make_ref(session) + # "checkpoints" < "models" alphabetically; if added_at stagger + # works, list_references_page returns insertion order. + set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"]) + session.commit() + + _, tag_map, _ = list_references_page(session) + assert tag_map[ref.id] == ["models", "checkpoints"] + + def test_set_reference_tags_preserves_input_order_in_fetch(self, session: Session): + ref = self._make_ref(session) + # Subpath tag sorts before "models" alphabetically. + set_reference_tags( + session, + reference_id=ref.id, + tags=["models", "diffusers/kolors/text_encoder"], + ) + session.commit() + + result = fetch_reference_asset_and_tags(session, ref.id) + assert result is not None + _, _, tags = result + assert tags == ["models", "diffusers/kolors/text_encoder"] + + def test_add_tags_to_reference_lands_after_path_tags(self, session: Session): + ref = self._make_ref(session) + set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"]) + session.commit() + + # "aaa-..." sorts before both path tags alphabetically. If added_at + # stagger is missing, alphabetic tiebreak would hoist it to tags[0]. + add_tags_to_reference( + session, reference_id=ref.id, tags=["aaa-user-tag"], origin="manual" + ) + session.commit() + + _, tag_map, _ = list_references_page(session) + assert tag_map[ref.id] == ["models", "checkpoints", "aaa-user-tag"] + + def test_multi_tag_batch_lands_after_path_tags(self, session: Session): + ref = self._make_ref(session) + set_reference_tags(session, reference_id=ref.id, tags=["models", "checkpoints"]) + session.commit() + + # Three user tags inserted in non-alphabetical input order. Per-tag + # microsecond stagger should preserve at least the "user batch is + # after path tags" property; within the user batch insertion order + # is also preserved. + add_tags_to_reference( + session, + reference_id=ref.id, + tags=["zzz-z", "favorite", "experiment-q4"], + origin="manual", + ) + session.commit() + + _, tag_map, _ = list_references_page(session) + tags = tag_map[ref.id] + assert tags[0:2] == ["models", "checkpoints"] + assert set(tags[2:]) == {"zzz-z", "favorite", "experiment-q4"} + + def test_remove_then_add_does_not_disrupt_path_tag_positions( + self, session: Session + ): + ref = self._make_ref(session) + set_reference_tags( + session, + reference_id=ref.id, + tags=["models", "loras/my/custom/path"], + ) + session.commit() + add_tags_to_reference(session, reference_id=ref.id, tags=["temp-tag"]) + session.commit() + from app.assets.database.queries import remove_tags_from_reference + + remove_tags_from_reference(session, reference_id=ref.id, tags=["temp-tag"]) + session.commit() + add_tags_to_reference(session, reference_id=ref.id, tags=["second-tag"]) + session.commit() + + _, tag_map, _ = list_references_page(session) + assert tag_map[ref.id] == ["models", "loras/my/custom/path", "second-tag"] + + class TestFetchReferenceAssetAndTags: def test_returns_none_for_nonexistent(self, session: Session): result = fetch_reference_asset_and_tags(session, "nonexistent")