diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 68126b6a5..6555974e9 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -160,10 +160,12 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu preview_url = None else: preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata) + asset_content_hash = result.asset.hash if result.asset else None return schemas_out.Asset( id=result.ref.id, name=result.ref.name, - asset_hash=result.asset.hash if result.asset else None, + hash=asset_content_hash, + asset_hash=asset_content_hash, size=int(result.asset.size_bytes) if result.asset else None, mime_type=result.asset.mime_type if result.asset else None, tags=result.tags, diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index d99b1098d..0e748b907 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -10,6 +10,7 @@ class Asset(BaseModel): id: str name: str + hash: str | None = None asset_hash: str | None = None size: int | None = None mime_type: str | None = None diff --git a/tests-unit/assets_test/conftest.py b/tests-unit/assets_test/conftest.py index 6c5c56113..9867b4e14 100644 --- a/tests-unit/assets_test/conftest.py +++ b/tests-unit/assets_test/conftest.py @@ -236,6 +236,8 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120) body = r.json() assert r.status_code == 201, body + from helpers import assert_hash_fields_consistent + assert_hash_fields_consistent(body) return body diff --git a/tests-unit/assets_test/helpers.py b/tests-unit/assets_test/helpers.py index 770e011f4..ae3de6dc3 100644 --- a/tests-unit/assets_test/helpers.py +++ b/tests-unit/assets_test/helpers.py @@ -26,3 +26,26 @@ def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None: def get_asset_filename(asset_hash: str, extension: str) -> str: return asset_hash.removeprefix("blake3:") + extension + + +def assert_hash_fields_consistent(body: dict, expected_hash: str | None = None) -> None: + """Assert hash and asset_hash invariants on an Asset response. + + Both must be present or both absent (so a regression that drops only one + is caught). When present, they must equal each other and, if expected_hash + is provided, must equal that value. + """ + hash_present = "hash" in body + asset_hash_present = "asset_hash" in body + assert hash_present == asset_hash_present, ( + f"hash and asset_hash must both be present or both absent: " + f"hash present={hash_present}, asset_hash present={asset_hash_present}" + ) + if hash_present: + h = body["hash"] + ah = body["asset_hash"] + assert h == ah, f"hash and asset_hash must match: hash={h!r}, asset_hash={ah!r}" + if expected_hash is not None: + assert h == expected_hash, ( + f"hash must equal expected: got {h!r}, expected {expected_hash!r}" + ) diff --git a/tests-unit/assets_test/test_assets_missing_sync.py b/tests-unit/assets_test/test_assets_missing_sync.py index 47dc130cb..29ec1d09d 100644 --- a/tests-unit/assets_test/test_assets_missing_sync.py +++ b/tests-unit/assets_test/test_assets_missing_sync.py @@ -40,7 +40,9 @@ def test_seed_asset_removed_when_file_is_deleted( # there should be exactly one with that name matches = [a for a in body1.get("assets", []) if a.get("name") == name] assert matches - assert matches[0].get("asset_hash") is None + # Seed assets have no hash; exclude_none drops both keys from the response + assert "asset_hash" not in matches[0] + assert "hash" not in matches[0] asset_info_id = matches[0]["id"] # Remove the underlying file and sync again diff --git a/tests-unit/assets_test/test_crud.py b/tests-unit/assets_test/test_crud.py index 07310223e..fd2e9a098 100644 --- a/tests-unit/assets_test/test_crud.py +++ b/tests-unit/assets_test/test_crud.py @@ -21,6 +21,8 @@ def test_create_from_hash_success( b1 = r1.json() assert r1.status_code == 201, b1 assert b1["asset_hash"] == h + assert b1["hash"] == h + assert b1["hash"] == b1["asset_hash"] assert b1["created_new"] is False aid = b1["id"] @@ -39,6 +41,7 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse detail = rg.json() assert rg.status_code == 200, detail assert detail["id"] == aid + assert detail["hash"] == detail["asset_hash"] assert "user_metadata" in detail assert "filename" in detail["user_metadata"] @@ -97,6 +100,7 @@ def test_delete_upon_reference_count( copy = r2.json() assert r2.status_code == 201, copy assert copy["asset_hash"] == src_hash + assert copy["hash"] == src_hash assert copy["created_new"] is False # Soft-delete original reference (default) -> asset identity must remain @@ -139,6 +143,7 @@ def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset body = ru.json() assert ru.status_code == 200, body assert body["name"] == payload["name"] + assert body["hash"] == body["asset_hash"] assert body["tags"] == original_tags # tags unchanged assert body["user_metadata"]["purpose"] == "updated" # filename should still be present and normalized by server @@ -289,7 +294,9 @@ def test_metadata_filename_is_set_for_seed_asset_without_hash( assert r1.status_code == 200, body matches = [a for a in body.get("assets", []) if a.get("name") == name] assert matches, "Seed asset should be visible after sync" - assert matches[0].get("asset_hash") is None # still a seed + # Seed assets have no hash; exclude_none drops both keys from the response + assert "asset_hash" not in matches[0] + assert "hash" not in matches[0] aid = matches[0]["id"] r2 = http.get(f"{api_base}/api/assets/{aid}", timeout=120) diff --git a/tests-unit/assets_test/test_list_filter.py b/tests-unit/assets_test/test_list_filter.py index dcb7a73ca..17bbea5c6 100644 --- a/tests-unit/assets_test/test_list_filter.py +++ b/tests-unit/assets_test/test_list_filter.py @@ -3,6 +3,7 @@ import uuid import pytest import requests +from helpers import assert_hash_fields_consistent def test_list_assets_paging_and_sort(http: requests.Session, api_base: str, asset_factory, make_asset_bytes): @@ -26,6 +27,10 @@ def test_list_assets_paging_and_sort(http: requests.Session, api_base: str, asse got1 = [a["name"] for a in b1["assets"]] assert got1 == sorted(names)[:2] assert b1["has_more"] is True + # Populated assets in list responses must carry both `hash` and `asset_hash` consistently + for asset in b1["assets"]: + assert_hash_fields_consistent(asset) + assert "hash" in asset, "populated asset must emit hash on list endpoint" r2 = http.get( api_base + "/api/assets", diff --git a/tests-unit/assets_test/test_uploads.py b/tests-unit/assets_test/test_uploads.py index 0f2b124a3..427a417cc 100644 --- a/tests-unit/assets_test/test_uploads.py +++ b/tests-unit/assets_test/test_uploads.py @@ -5,6 +5,20 @@ from concurrent.futures import ThreadPoolExecutor import requests import pytest +from app.assets.api.schemas_out import Asset, AssetCreated + + +def test_asset_created_inherits_hash_field(): + """AssetCreated must inherit `hash` from Asset so POST /api/assets responses emit it. + + Schema-level guard: integration tests cover the wire shape, but inheritance + drift (e.g. AssetCreated ever being redefined to no longer extend Asset) + would silently drop `hash` from a major endpoint without this check. + """ + assert "hash" in Asset.model_fields + assert "hash" in AssetCreated.model_fields + assert AssetCreated.model_fields["hash"].annotation == Asset.model_fields["hash"].annotation + def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, make_asset_bytes): name = "dup_a.safetensors" @@ -17,6 +31,7 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma a1 = r1.json() assert r1.status_code == 201, a1 assert a1["created_new"] is True + assert a1["hash"] == a1["asset_hash"] # Second upload with the same data and name creates a new AssetReference (duplicates allowed) # Returns 200 because Asset already exists, but a new AssetReference is created @@ -26,6 +41,7 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma a2 = r2.json() assert r2.status_code in (200, 201), a2 assert a2["asset_hash"] == a1["asset_hash"] + assert a2["hash"] == a1["hash"] assert a2["id"] != a1["id"] # new reference with same content # Third upload with the same data but different name also creates new AssetReference @@ -50,6 +66,7 @@ def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_ b1 = r1.json() assert r1.status_code == 201, b1 h = b1["asset_hash"] + assert b1["hash"] == h # Now POST /api/assets with only hash and no file files = [ @@ -63,6 +80,7 @@ def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_ assert r2.status_code == 200, b2 # fast path returns 200 with created_new == False assert b2["created_new"] is False assert b2["asset_hash"] == h + assert b2["hash"] == h def test_upload_fastpath_with_known_hash_and_file( @@ -75,6 +93,7 @@ def test_upload_fastpath_with_known_hash_and_file( b1 = r1.json() assert r1.status_code == 201, b1 h = b1["asset_hash"] + assert b1["hash"] == h # Send both file and hash of existing content -> server must drain file and create from hash (200) files = {"file": ("ignored.bin", b"ignored" * 10, "application/octet-stream")} @@ -84,6 +103,7 @@ def test_upload_fastpath_with_known_hash_and_file( assert r2.status_code == 200, b2 assert b2["created_new"] is False assert b2["asset_hash"] == h + assert b2["hash"] == h def test_upload_multiple_tags_fields_are_merged(http: requests.Session, api_base: str): @@ -142,6 +162,8 @@ def test_concurrent_upload_identical_bytes_different_names( assert r1.status_code in (200, 201), b1 assert r2.status_code in (200, 201), b2 assert b1["asset_hash"] == b2["asset_hash"] + assert b1["hash"] == b2["hash"] + assert b1["hash"] == b1["asset_hash"] assert b1["id"] != b2["id"] created_flags = sorted([bool(b1.get("created_new")), bool(b2.get("created_new"))])