From 058692a79b258a3a7ca24c2137946327e0b83e7f Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Wed, 1 Jul 2026 00:41:42 -0700 Subject: [PATCH] fix(assets): treat explicit empty hash filter as exact-match miss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review findings on the listAssets contract PR: - Empty `?hash=` no longer collapses to "no filter" (which returned a full unfiltered page); it now stays "" and is matched exactly, yielding an empty page — consistent with the documented "malformed -> empty page" contract. Omitting the param entirely still disables the filter. - Guard `list_references_page` on `asset_hash is not None` (page + count) so the present-empty vs omitted distinction is preserved. - Add tests for hash normalization (uppercase/whitespace) and for the explicit-empty -> empty-page behavior. - Clarify the `include_public` comment: core reads are owner-scoped with no separate public pool, so the flag is inert here; cloud enforces it in its own service layer. --- app/assets/api/schemas_in.py | 14 +++--- .../database/queries/asset_reference.py | 6 ++- tests-unit/assets_test/test_list_filter.py | 45 +++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 4bab066a2..fe56b3f5c 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -59,9 +59,11 @@ class ListAssetsQuery(BaseModel): # is `asset_hash`; the query param is `hash`). hash: str | None = None - # Declared for cloud/core contract parity. Core has no public asset pool, so - # this is inert: results are always the caller's own assets. Accepted (not - # rejected) so the FE needs no isCloud branch. + # Declared for cloud/core contract parity. In core, reads are owner-scoped + # (owner_id == "") and there is no separate shared/public pool for this flag + # to include or exclude, so it is inert here and intentionally not threaded + # into the query. Accepted (not rejected) so the FE needs no isCloud branch; + # cloud enforces the flag in its own service layer. include_public: bool = True # Accept either a JSON string (query param) or a dict @@ -102,9 +104,11 @@ class ListAssetsQuery(BaseModel): # Normalize for an exact match against stored hashes (which are # lowercase `blake3:`). Liberal in what we accept — no pattern # enforcement; a non-matching value simply yields an empty page. + # An explicitly-supplied-but-empty value (`?hash=`) stays `""` so it + # is treated as an exact-match miss (empty page), not silently dropped + # to "no filter" — omit the param entirely to disable the filter. if isinstance(v, str): - v = v.strip().lower() - return v or None + return v.strip().lower() return v @field_validator("metadata_filter", mode="before") diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 45efb674d..87f612f48 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -294,7 +294,9 @@ def list_references_page( escaped, esc = escape_sql_like_string(name_contains) base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc)) - if asset_hash: + # `is not None` (not truthiness): an explicit empty hash is an exact-match + # miss (empty page), while an omitted hash (None) disables the filter. + if asset_hash is not None: base = base.where(Asset.hash == asset_hash) base = apply_tag_filters(base, include_tags, exclude_tags) @@ -349,7 +351,7 @@ def list_references_page( count_stmt = count_stmt.where( AssetReference.name.ilike(f"%{escaped}%", escape=esc) ) - if asset_hash: + if asset_hash is not None: count_stmt = count_stmt.where(Asset.hash == asset_hash) count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags) count_stmt = apply_metadata_filter(count_stmt, metadata_filter) diff --git a/tests-unit/assets_test/test_list_filter.py b/tests-unit/assets_test/test_list_filter.py index 49c1bdf8d..f4157367b 100644 --- a/tests-unit/assets_test/test_list_filter.py +++ b/tests-unit/assets_test/test_list_filter.py @@ -366,6 +366,51 @@ def test_list_assets_hash_filter_no_match(http, api_base, asset_factory, make_as assert body["total"] == 0 +def test_list_assets_hash_filter_normalizes_case_and_whitespace( + http, api_base, asset_factory, make_asset_bytes +): + """`hash` is trimmed and lowercased before matching, so an upper-cased, + space-padded value still matches the stored lowercase hash.""" + scope = f"lf-hashnorm-{uuid.uuid4().hex[:6]}" + tags = ["models", "checkpoints", "unit-tests", scope] + a = asset_factory("hnorm_a.safetensors", tags, {}, make_asset_bytes("hnorm_a", 1024)) + + target = a["hash"] + assert target == target.lower(), "stored hash is expected to be lowercase" + messy = f" {target.upper()} " + + r = http.get( + api_base + "/api/assets", + params={"hash": messy, "limit": "50"}, + timeout=120, + ) + body = r.json() + assert r.status_code == 200, body + names = [x["name"] for x in body["assets"]] + assert names == [a["name"]] + assert body["total"] == 1 + + +def test_list_assets_hash_filter_empty_returns_empty_page( + http, api_base, asset_factory, make_asset_bytes +): + """An explicitly-supplied but empty `hash` (`?hash=`) is an exact-match miss + and returns an empty page, rather than silently disabling the filter.""" + scope = f"lf-hashempty-{uuid.uuid4().hex[:6]}" + tags = ["models", "checkpoints", "unit-tests", scope] + asset_factory("he_a.safetensors", tags, {}, make_asset_bytes("he_a", 800)) + + r = http.get( + api_base + "/api/assets", + params={"hash": "", "limit": "50"}, + timeout=120, + ) + body = r.json() + assert r.status_code == 200, body + assert body["assets"] == [] + assert body["total"] == 0 + + def test_list_assets_include_public_accepted(http, api_base, asset_factory, make_asset_bytes): """`include_public` is accepted for contract parity; core results are always the caller's own assets regardless of its value (the param is inert)."""