From 00940fb24e1040306362b07697fa1ad5dd0f81d1 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 19 May 2026 21:10:53 -0700 Subject: [PATCH] fix(assets): preserve caller order in add_tags_to_reference + align response helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smoke test through the real HTTP upload + tag-add path exposed two ordering bugs the unit-layer tests missed: 1. add_tags_to_reference did `to_add = sorted(want - current)` — an alphabetical pre-sort defeating the microsecond-stagger fix from the previous commit. The stagger was encoding alphabetical positions, not the caller's insertion order. Fix: build to_add by walking the already-normalized caller list and filtering against the current set, so the staggered added_at timestamps reflect what the caller actually requested. 2. get_reference_tags used .order_by(tag_name.asc()) — alphabetical. It's called by the upload response path; meanwhile list_references_page and fetch_reference_asset_and_tags were already updated to order by added_at. The mismatch meant POST /api/assets returned tags in alphabetical order but a subsequent GET returned them in insertion order. Fix: order get_reference_tags by added_at too, so all three response-path helpers agree. New tests-unit/assets_test/test_user_tag_http_smoke.py exercises the full HTTP layer: POST /api/assets to upload, POST /api/assets/{id}/tags to add a user tag (using tag names like "aaa-user-tag" that would jump to position 0 under alphabetical), GET /api/assets/{id} to verify ordering. Catches the bugs above in CI going forward. Full assets suite: 340 passed, 10 pre-existing skipped. --- app/assets/database/queries/tags.py | 14 ++- .../assets_test/test_user_tag_http_smoke.py | 89 +++++++++++++++++++ 2 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 tests-unit/assets_test/test_user_tag_http_smoke.py diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index 2ea4b187e..2501665ac 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -78,7 +78,13 @@ def get_reference_tags(session: Session, reference_id: str) -> list[str]: session.execute( select(AssetReferenceTag.tag_name) .where(AssetReferenceTag.asset_reference_id == reference_id) - .order_by(AssetReferenceTag.tag_name.asc()) + # Match the response-path ordering used by + # list_references_page / fetch_reference_asset_and_tags so + # upload responses and subsequent GETs agree on tag order. + .order_by( + AssetReferenceTag.added_at.asc(), + AssetReferenceTag.tag_name.asc(), + ) ) ).all() ] @@ -153,8 +159,12 @@ def add_tags_to_reference( current = set(get_reference_tags(session, reference_id)) + # Preserve the caller's insertion order rather than alphabetizing — + # the retrieval ORDER BY added_at + microsecond stagger only meaningfully + # preserves insertion order if "the order we insert in" actually matches + # the caller's intent. want = set(norm) - to_add = sorted(want - current) + to_add = [t for t in norm if t not in current] if to_add: # See set_reference_tags for the rationale behind the per-tag stagger. diff --git a/tests-unit/assets_test/test_user_tag_http_smoke.py b/tests-unit/assets_test/test_user_tag_http_smoke.py new file mode 100644 index 000000000..e12a8b9d0 --- /dev/null +++ b/tests-unit/assets_test/test_user_tag_http_smoke.py @@ -0,0 +1,89 @@ +"""HTTP-layer smoke test: user-added tags via POST /api/assets/{id}/tags +land after path tags when read back via GET /api/assets. + +Exercises the full route handler -> service -> query path that the unit +tests at tests-unit/assets_test/queries/test_asset_info.py only cover at +the service layer. +""" +import json + +import pytest +import requests + + +@pytest.fixture +def smoke_asset(http: requests.Session, api_base: str): + """Upload a single asset into models/checkpoints/unit-tests/smoke + and delete it on teardown.""" + name = "smoke_user_tag.safetensors" + tags = ["models", "checkpoints", "unit-tests", "smoke"] + files = {"file": (name, b"S" * 4096, "application/octet-stream")} + form_data = { + "tags": json.dumps(tags), + "name": name, + "user_metadata": json.dumps({}), + } + r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120) + assert r.status_code == 201, r.text + body = r.json() + yield body + http.delete( + f"{api_base}/api/assets/{body['id']}?delete_content=true", timeout=30 + ) + + +def _fetch_asset_tags(http, api_base, ref_id): + r = http.get(f"{api_base}/api/assets/{ref_id}", timeout=30) + assert r.status_code == 200, r.text + return r.json()["tags"] + + +def test_user_tag_lands_after_path_tags_via_http( + http: requests.Session, api_base: str, smoke_asset: dict +): + ref_id = smoke_asset["id"] + + initial_tags = _fetch_asset_tags(http, api_base, ref_id) + # Path tags should already be at the front in upload order. + assert initial_tags[:2] == ["models", "checkpoints"] + + # Add a user tag that would jump to position 0 under alphabetical sort. + r = http.post( + f"{api_base}/api/assets/{ref_id}/tags", + json={"tags": ["aaa-user-tag"]}, + timeout=30, + ) + assert r.status_code in (200, 201), r.text + + tags_after = _fetch_asset_tags(http, api_base, ref_id) + # Path tags must still be at the front; user tag goes to the end. + assert tags_after[0] == "models" + assert tags_after[1] == "checkpoints" + assert "aaa-user-tag" in tags_after + assert tags_after[-1] == "aaa-user-tag" + + +def test_user_tag_batch_lands_after_path_tags_via_http( + http: requests.Session, api_base: str, smoke_asset: dict +): + ref_id = smoke_asset["id"] + + # Add three user tags in a single request, in non-alphabetical input + # order. They should all land after the path tags (microsecond stagger + # in set_reference_tags / add_tags_to_reference is what makes this + # work — without it, "aaa" would jump to position 0). + r = http.post( + f"{api_base}/api/assets/{ref_id}/tags", + json={"tags": ["zzz-z", "favorite", "aaa-experiment"]}, + timeout=30, + ) + assert r.status_code in (200, 201), r.text + + tags_after = _fetch_asset_tags(http, api_base, ref_id) + assert tags_after[0] == "models" + assert tags_after[1] == "checkpoints" + user_tail = tags_after[len({"models", "checkpoints", "unit-tests", "smoke"}):] + assert set(user_tail) >= {"zzz-z", "favorite", "aaa-experiment"} + # Critically: alphabetical sort would put 'aaa-experiment' at position 0. + assert tags_after.index("aaa-experiment") > tags_after.index("models") + assert tags_after.index("aaa-experiment") > tags_after.index("checkpoints")