From b7f8a7a86c95f9f7e6e7ca9f641e8674600e37e0 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 6 May 2026 20:35:15 +0000 Subject: [PATCH 1/2] Emit `hash` alongside `asset_hash` on all Asset responses Add a `hash` field to the Asset response schema that carries the same value as the existing `asset_hash` field. Both fields are now populated in _build_asset_response, so every Asset-returning endpoint (GET, POST, PUT) includes both. No existing fields are removed. Tests updated to assert both fields. Co-authored-by: Matt Miller --- app/assets/api/routes.py | 1 + app/assets/api/schemas_out.py | 1 + tests-unit/assets_test/conftest.py | 1 + tests-unit/assets_test/test_assets_missing_sync.py | 1 + tests-unit/assets_test/test_crud.py | 6 ++++++ tests-unit/assets_test/test_uploads.py | 8 ++++++++ 6 files changed, 18 insertions(+) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 68126b6a5..0c87d867a 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -163,6 +163,7 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu return schemas_out.Asset( id=result.ref.id, name=result.ref.name, + hash=result.asset.hash if result.asset else None, asset_hash=result.asset.hash if result.asset else None, size=int(result.asset.size_bytes) if result.asset else None, mime_type=result.asset.mime_type if result.asset else None, diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index d99b1098d..0e748b907 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -10,6 +10,7 @@ class Asset(BaseModel): id: str name: str + hash: str | None = None asset_hash: str | None = None size: int | None = None mime_type: str | None = None diff --git a/tests-unit/assets_test/conftest.py b/tests-unit/assets_test/conftest.py index 6c5c56113..5477167bd 100644 --- a/tests-unit/assets_test/conftest.py +++ b/tests-unit/assets_test/conftest.py @@ -236,6 +236,7 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120) body = r.json() assert r.status_code == 201, body + assert body.get("hash") == body.get("asset_hash") return body diff --git a/tests-unit/assets_test/test_assets_missing_sync.py b/tests-unit/assets_test/test_assets_missing_sync.py index 47dc130cb..5610141e1 100644 --- a/tests-unit/assets_test/test_assets_missing_sync.py +++ b/tests-unit/assets_test/test_assets_missing_sync.py @@ -41,6 +41,7 @@ def test_seed_asset_removed_when_file_is_deleted( matches = [a for a in body1.get("assets", []) if a.get("name") == name] assert matches assert matches[0].get("asset_hash") is None + assert matches[0].get("hash") is None asset_info_id = matches[0]["id"] # Remove the underlying file and sync again diff --git a/tests-unit/assets_test/test_crud.py b/tests-unit/assets_test/test_crud.py index 07310223e..f0aa0466f 100644 --- a/tests-unit/assets_test/test_crud.py +++ b/tests-unit/assets_test/test_crud.py @@ -21,6 +21,8 @@ def test_create_from_hash_success( b1 = r1.json() assert r1.status_code == 201, b1 assert b1["asset_hash"] == h + assert b1["hash"] == h + assert b1["hash"] == b1["asset_hash"] assert b1["created_new"] is False aid = b1["id"] @@ -39,6 +41,7 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse detail = rg.json() assert rg.status_code == 200, detail assert detail["id"] == aid + assert detail["hash"] == detail["asset_hash"] assert "user_metadata" in detail assert "filename" in detail["user_metadata"] @@ -97,6 +100,7 @@ def test_delete_upon_reference_count( copy = r2.json() assert r2.status_code == 201, copy assert copy["asset_hash"] == src_hash + assert copy["hash"] == src_hash assert copy["created_new"] is False # Soft-delete original reference (default) -> asset identity must remain @@ -139,6 +143,7 @@ def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset body = ru.json() assert ru.status_code == 200, body assert body["name"] == payload["name"] + assert body["hash"] == body["asset_hash"] assert body["tags"] == original_tags # tags unchanged assert body["user_metadata"]["purpose"] == "updated" # filename should still be present and normalized by server @@ -290,6 +295,7 @@ def test_metadata_filename_is_set_for_seed_asset_without_hash( matches = [a for a in body.get("assets", []) if a.get("name") == name] assert matches, "Seed asset should be visible after sync" assert matches[0].get("asset_hash") is None # still a seed + assert matches[0].get("hash") is None # still a seed aid = matches[0]["id"] r2 = http.get(f"{api_base}/api/assets/{aid}", timeout=120) diff --git a/tests-unit/assets_test/test_uploads.py b/tests-unit/assets_test/test_uploads.py index 0f2b124a3..62f3338f2 100644 --- a/tests-unit/assets_test/test_uploads.py +++ b/tests-unit/assets_test/test_uploads.py @@ -17,6 +17,7 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma a1 = r1.json() assert r1.status_code == 201, a1 assert a1["created_new"] is True + assert a1["hash"] == a1["asset_hash"] # Second upload with the same data and name creates a new AssetReference (duplicates allowed) # Returns 200 because Asset already exists, but a new AssetReference is created @@ -26,6 +27,7 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma a2 = r2.json() assert r2.status_code in (200, 201), a2 assert a2["asset_hash"] == a1["asset_hash"] + assert a2["hash"] == a1["hash"] assert a2["id"] != a1["id"] # new reference with same content # Third upload with the same data but different name also creates new AssetReference @@ -50,6 +52,7 @@ def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_ b1 = r1.json() assert r1.status_code == 201, b1 h = b1["asset_hash"] + assert b1["hash"] == h # Now POST /api/assets with only hash and no file files = [ @@ -63,6 +66,7 @@ def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_ assert r2.status_code == 200, b2 # fast path returns 200 with created_new == False assert b2["created_new"] is False assert b2["asset_hash"] == h + assert b2["hash"] == h def test_upload_fastpath_with_known_hash_and_file( @@ -75,6 +79,7 @@ def test_upload_fastpath_with_known_hash_and_file( b1 = r1.json() assert r1.status_code == 201, b1 h = b1["asset_hash"] + assert b1["hash"] == h # Send both file and hash of existing content -> server must drain file and create from hash (200) files = {"file": ("ignored.bin", b"ignored" * 10, "application/octet-stream")} @@ -84,6 +89,7 @@ def test_upload_fastpath_with_known_hash_and_file( assert r2.status_code == 200, b2 assert b2["created_new"] is False assert b2["asset_hash"] == h + assert b2["hash"] == h def test_upload_multiple_tags_fields_are_merged(http: requests.Session, api_base: str): @@ -142,6 +148,8 @@ def test_concurrent_upload_identical_bytes_different_names( assert r1.status_code in (200, 201), b1 assert r2.status_code in (200, 201), b2 assert b1["asset_hash"] == b2["asset_hash"] + assert b1["hash"] == b2["hash"] + assert b1["hash"] == b1["asset_hash"] assert b1["id"] != b2["id"] created_flags = sorted([bool(b1.get("created_new")), bool(b2.get("created_new"))]) From 5130628ed5797283458dd21f659a389a033d2080 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 6 May 2026 14:56:46 -0700 Subject: [PATCH 2/2] Tighten hash field tests and DRY response builder - Extract assert_hash_fields_consistent() helper that verifies presence parity and value equality, replacing body.get()-based assertions that treated missing keys and explicit nulls identically. - Conftest seeded_asset fixture and seed-asset list assertions now check key absence directly, so a regression that surfaces null fields would be caught (validates exclude_none behavior). - DRY duplicate hash expression in _build_asset_response. - Add list-endpoint coverage asserting hash is present and consistent on populated assets. - Add schema-level test asserting AssetCreated inherits the hash field from Asset, guarding against future inheritance drift. --- app/assets/api/routes.py | 5 ++-- tests-unit/assets_test/conftest.py | 3 ++- tests-unit/assets_test/helpers.py | 23 +++++++++++++++++++ .../assets_test/test_assets_missing_sync.py | 5 ++-- tests-unit/assets_test/test_crud.py | 5 ++-- tests-unit/assets_test/test_list_filter.py | 5 ++++ tests-unit/assets_test/test_uploads.py | 14 +++++++++++ 7 files changed, 53 insertions(+), 7 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 0c87d867a..6555974e9 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -160,11 +160,12 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu preview_url = None else: preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata) + asset_content_hash = result.asset.hash if result.asset else None return schemas_out.Asset( id=result.ref.id, name=result.ref.name, - hash=result.asset.hash if result.asset else None, - asset_hash=result.asset.hash if result.asset else None, + hash=asset_content_hash, + asset_hash=asset_content_hash, size=int(result.asset.size_bytes) if result.asset else None, mime_type=result.asset.mime_type if result.asset else None, tags=result.tags, diff --git a/tests-unit/assets_test/conftest.py b/tests-unit/assets_test/conftest.py index 5477167bd..9867b4e14 100644 --- a/tests-unit/assets_test/conftest.py +++ b/tests-unit/assets_test/conftest.py @@ -236,7 +236,8 @@ def seeded_asset(request: pytest.FixtureRequest, http: requests.Session, api_bas r = http.post(api_base + "/api/assets", files=files, data=form_data, timeout=120) body = r.json() assert r.status_code == 201, body - assert body.get("hash") == body.get("asset_hash") + from helpers import assert_hash_fields_consistent + assert_hash_fields_consistent(body) return body diff --git a/tests-unit/assets_test/helpers.py b/tests-unit/assets_test/helpers.py index 770e011f4..ae3de6dc3 100644 --- a/tests-unit/assets_test/helpers.py +++ b/tests-unit/assets_test/helpers.py @@ -26,3 +26,26 @@ def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None: def get_asset_filename(asset_hash: str, extension: str) -> str: return asset_hash.removeprefix("blake3:") + extension + + +def assert_hash_fields_consistent(body: dict, expected_hash: str | None = None) -> None: + """Assert hash and asset_hash invariants on an Asset response. + + Both must be present or both absent (so a regression that drops only one + is caught). When present, they must equal each other and, if expected_hash + is provided, must equal that value. + """ + hash_present = "hash" in body + asset_hash_present = "asset_hash" in body + assert hash_present == asset_hash_present, ( + f"hash and asset_hash must both be present or both absent: " + f"hash present={hash_present}, asset_hash present={asset_hash_present}" + ) + if hash_present: + h = body["hash"] + ah = body["asset_hash"] + assert h == ah, f"hash and asset_hash must match: hash={h!r}, asset_hash={ah!r}" + if expected_hash is not None: + assert h == expected_hash, ( + f"hash must equal expected: got {h!r}, expected {expected_hash!r}" + ) diff --git a/tests-unit/assets_test/test_assets_missing_sync.py b/tests-unit/assets_test/test_assets_missing_sync.py index 5610141e1..29ec1d09d 100644 --- a/tests-unit/assets_test/test_assets_missing_sync.py +++ b/tests-unit/assets_test/test_assets_missing_sync.py @@ -40,8 +40,9 @@ def test_seed_asset_removed_when_file_is_deleted( # there should be exactly one with that name matches = [a for a in body1.get("assets", []) if a.get("name") == name] assert matches - assert matches[0].get("asset_hash") is None - assert matches[0].get("hash") is None + # Seed assets have no hash; exclude_none drops both keys from the response + assert "asset_hash" not in matches[0] + assert "hash" not in matches[0] asset_info_id = matches[0]["id"] # Remove the underlying file and sync again diff --git a/tests-unit/assets_test/test_crud.py b/tests-unit/assets_test/test_crud.py index f0aa0466f..fd2e9a098 100644 --- a/tests-unit/assets_test/test_crud.py +++ b/tests-unit/assets_test/test_crud.py @@ -294,8 +294,9 @@ def test_metadata_filename_is_set_for_seed_asset_without_hash( assert r1.status_code == 200, body matches = [a for a in body.get("assets", []) if a.get("name") == name] assert matches, "Seed asset should be visible after sync" - assert matches[0].get("asset_hash") is None # still a seed - assert matches[0].get("hash") is None # still a seed + # Seed assets have no hash; exclude_none drops both keys from the response + assert "asset_hash" not in matches[0] + assert "hash" not in matches[0] aid = matches[0]["id"] r2 = http.get(f"{api_base}/api/assets/{aid}", timeout=120) diff --git a/tests-unit/assets_test/test_list_filter.py b/tests-unit/assets_test/test_list_filter.py index dcb7a73ca..17bbea5c6 100644 --- a/tests-unit/assets_test/test_list_filter.py +++ b/tests-unit/assets_test/test_list_filter.py @@ -3,6 +3,7 @@ import uuid import pytest import requests +from helpers import assert_hash_fields_consistent def test_list_assets_paging_and_sort(http: requests.Session, api_base: str, asset_factory, make_asset_bytes): @@ -26,6 +27,10 @@ def test_list_assets_paging_and_sort(http: requests.Session, api_base: str, asse got1 = [a["name"] for a in b1["assets"]] assert got1 == sorted(names)[:2] assert b1["has_more"] is True + # Populated assets in list responses must carry both `hash` and `asset_hash` consistently + for asset in b1["assets"]: + assert_hash_fields_consistent(asset) + assert "hash" in asset, "populated asset must emit hash on list endpoint" r2 = http.get( api_base + "/api/assets", diff --git a/tests-unit/assets_test/test_uploads.py b/tests-unit/assets_test/test_uploads.py index 62f3338f2..427a417cc 100644 --- a/tests-unit/assets_test/test_uploads.py +++ b/tests-unit/assets_test/test_uploads.py @@ -5,6 +5,20 @@ from concurrent.futures import ThreadPoolExecutor import requests import pytest +from app.assets.api.schemas_out import Asset, AssetCreated + + +def test_asset_created_inherits_hash_field(): + """AssetCreated must inherit `hash` from Asset so POST /api/assets responses emit it. + + Schema-level guard: integration tests cover the wire shape, but inheritance + drift (e.g. AssetCreated ever being redefined to no longer extend Asset) + would silently drop `hash` from a major endpoint without this check. + """ + assert "hash" in Asset.model_fields + assert "hash" in AssetCreated.model_fields + assert AssetCreated.model_fields["hash"].annotation == Asset.model_fields["hash"].annotation + def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, make_asset_bytes): name = "dup_a.safetensors"