revert(assets): drop job_ids filter from GET /api/assets (#14408)

The job_ids query filter added in #13998 has no live consumer: the
frontend Generated tab kept sourcing from GET /jobs, and the cloud side
removed its equivalent filter from the shared asset spec. Carrying it on
the local server only re-introduces Core<->Cloud drift on the shared
contract, so remove it to match.

Removed: the job_ids field + validator on ListAssetsQuery, the IN(...)
clauses in list_references_page, the service/route passthrough, and the
filter-only tests.

Kept: the canonical-UUID prompt_id enforcement at job creation (also
landed in #13998). It stands on its own -- job ids are matched verbatim
by history keys, websocket correlation, and /interrupt -- and cloud
inherits it by running core for execution, so no divergence is created.
This commit is contained in:
Matt Miller 2026-06-10 19:23:01 -07:00 committed by GitHub
parent ce200c0850
commit 431a1888d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 9 additions and 165 deletions

View File

@ -219,7 +219,6 @@ async def list_assets_route(request: web.Request) -> web.Response:
exclude_tags=q.exclude_tags,
name_contains=q.name_contains,
metadata_filter=q.metadata_filter,
job_ids=q.job_ids,
limit=q.limit,
offset=q.offset,
sort=sort,

View File

@ -1,5 +1,4 @@
import json
import uuid
from dataclasses import dataclass
from typing import Any, Literal
@ -54,7 +53,6 @@ class ListAssetsQuery(BaseModel):
include_tags: list[str] = Field(default_factory=list)
exclude_tags: list[str] = Field(default_factory=list)
name_contains: str | None = None
job_ids: list[str] = Field(default_factory=list, max_length=500)
# Accept either a JSON string (query param) or a dict
metadata_filter: dict[str, Any] | None = None
@ -88,40 +86,6 @@ class ListAssetsQuery(BaseModel):
return out
return v
@field_validator("job_ids", mode="before")
@classmethod
def _split_and_validate_job_ids(cls, v):
# Accept "uuid1,uuid2" or ["uuid1","uuid2"] or repeated query params.
# Each entry must parse as a UUID; canonicalized to lowercase hyphenated form.
if v is None:
return []
if isinstance(v, str):
raw = [t.strip() for t in v.split(",") if t.strip()]
elif isinstance(v, list):
raw = []
for item in v:
if not isinstance(item, str):
raise ValueError(
f"job_ids entries must be strings, got {type(item).__name__}"
)
raw.extend([t.strip() for t in item.split(",") if t.strip()])
else:
raise ValueError(
f"job_ids must be a string or list of strings, got {type(v).__name__}"
)
out: list[str] = []
seen: set[str] = set()
for s in raw:
try:
canonical = str(uuid.UUID(s))
except ValueError as e:
raise ValueError(f"job_ids must be UUIDs: {s!r}") from e
if canonical not in seen:
seen.add(canonical)
out.append(canonical)
return out
@field_validator("metadata_filter", mode="before")
@classmethod
def _parse_metadata_json(cls, v):

View File

@ -264,7 +264,6 @@ def list_references_page(
include_tags: Sequence[str] | None = None,
exclude_tags: Sequence[str] | None = None,
metadata_filter: dict | None = None,
job_ids: Sequence[str] | None = None,
sort: str | None = None,
order: str | None = None,
after_cursor_value: object | None = None,
@ -294,9 +293,6 @@ def list_references_page(
escaped, esc = escape_sql_like_string(name_contains)
base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc))
if job_ids:
base = base.where(AssetReference.job_id.in_(list(job_ids)))
base = apply_tag_filters(base, include_tags, exclude_tags)
base = apply_metadata_filter(base, metadata_filter)
@ -349,8 +345,6 @@ def list_references_page(
count_stmt = count_stmt.where(
AssetReference.name.ilike(f"%{escaped}%", escape=esc)
)
if job_ids:
count_stmt = count_stmt.where(AssetReference.job_id.in_(list(job_ids)))
count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags)
count_stmt = apply_metadata_filter(count_stmt, metadata_filter)

View File

@ -274,7 +274,6 @@ def list_assets_page(
exclude_tags: Sequence[str] | None = None,
name_contains: str | None = None,
metadata_filter: dict | None = None,
job_ids: Sequence[str] | None = None,
limit: int = 20,
offset: int = 0,
sort: str = "created_at",
@ -320,7 +319,6 @@ def list_assets_page(
exclude_tags=exclude_tags,
name_contains=name_contains,
metadata_filter=metadata_filter,
job_ids=job_ids,
limit=fetch_limit,
offset=offset,
sort=sort,

View File

@ -25,10 +25,9 @@ def validate_job_id(value) -> str:
Job ids must be UUIDs in the canonical lowercase hyphenated form. The id
is stored and compared verbatim everywhere downstream history keys,
websocket events, /interrupt matching, and the assets ``job_ids`` filter
(a String(36) column matched exactly) so accepting another spelling
would either rewrite the client's id behind its back or mint a job whose
outputs the filter can never find. Rejecting loudly beats both.
websocket events, and /interrupt matching so accepting another spelling
would silently rewrite the client's id and then miss every exact-match
lookup. Rejecting loudly beats that.
Returns the id unchanged. Raises ValueError when the value is not a
string in canonical UUID form.

View File

@ -158,56 +158,6 @@ class TestListReferencesPage:
refs, _, _ = list_references_page(session, sort="name", order="asc")
assert refs[0].name == "large"
def test_job_ids_filter(self, session: Session):
asset = _make_asset(session, "hash1")
job_a = str(uuid.uuid4())
job_b = str(uuid.uuid4())
ref_a = _make_reference(session, asset, name="from_job_a")
ref_a.job_id = job_a
ref_b = _make_reference(session, asset, name="from_job_b")
ref_b.job_id = job_b
_make_reference(session, asset, name="no_job")
session.commit()
# Single job filter
refs, _, total = list_references_page(session, job_ids=[job_a])
assert total == 1
assert refs[0].name == "from_job_a"
# Multi-job filter (IN)
refs, _, total = list_references_page(session, job_ids=[job_a, job_b])
names = sorted(r.name for r in refs)
assert total == 2
assert names == ["from_job_a", "from_job_b"]
# Unknown job id matches nothing
refs, _, total = list_references_page(session, job_ids=[str(uuid.uuid4())])
assert total == 0
assert refs == []
# Empty/None means no filter -> all three references
refs, _, total = list_references_page(session, job_ids=[])
assert total == 3
refs, _, total = list_references_page(session, job_ids=None)
assert total == 3
def test_job_ids_combined_with_other_filters(self, session: Session):
asset = _make_asset(session, "hash1")
job_a = str(uuid.uuid4())
ref_match = _make_reference(session, asset, name="match.bin")
ref_match.job_id = job_a
ref_wrong_name = _make_reference(session, asset, name="other.bin")
ref_wrong_name.job_id = job_a
ref_wrong_job = _make_reference(session, asset, name="match.bin")
ref_wrong_job.job_id = str(uuid.uuid4())
session.commit()
refs, _, total = list_references_page(
session, job_ids=[job_a], name_contains="match"
)
assert total == 1
assert refs[0].id == ref_match.id
class TestFetchReferenceAssetAndTags:
def test_returns_none_for_nonexistent(self, session: Session):

View File

@ -1,60 +0,0 @@
"""Schema-level unit tests for ListAssetsQuery (no DB required)."""
import uuid
import pytest
from pydantic import ValidationError
from app.assets.api.schemas_in import ListAssetsQuery
class TestJobIdsValidator:
def test_csv_string_parses_and_canonicalizes(self):
a = "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE"
b = "11111111-2222-3333-4444-555555555555"
q = ListAssetsQuery.model_validate({"job_ids": f"{a},{b}"})
# Canonicalized to lowercase
assert q.job_ids == [a.lower(), b]
def test_repeated_query_params_as_list(self):
a = "11111111-1111-1111-1111-111111111111"
b = "22222222-2222-2222-2222-222222222222"
q = ListAssetsQuery.model_validate({"job_ids": [a, b]})
assert q.job_ids == [a, b]
def test_dedup_preserves_first_seen_order(self):
a = "11111111-1111-1111-1111-111111111111"
b = "22222222-2222-2222-2222-222222222222"
q = ListAssetsQuery.model_validate({"job_ids": [a, b, a]})
assert q.job_ids == [a, b]
def test_default_empty(self):
q = ListAssetsQuery.model_validate({})
assert q.job_ids == []
def test_invalid_uuid_rejected(self):
with pytest.raises(ValidationError) as exc:
ListAssetsQuery.model_validate({"job_ids": "not-a-uuid"})
assert "must be UUIDs" in str(exc.value)
def test_non_string_list_item_rejected(self):
with pytest.raises(ValidationError) as exc:
ListAssetsQuery.model_validate(
{"job_ids": ["11111111-1111-1111-1111-111111111111", 42]}
)
assert "must be strings" in str(exc.value)
def test_non_string_non_list_value_rejected(self):
with pytest.raises(ValidationError) as exc:
ListAssetsQuery.model_validate({"job_ids": {"bad": "shape"}})
assert "must be a string or list of strings" in str(exc.value)
def test_max_length_enforced(self):
too_many = [str(uuid.uuid4()) for _ in range(501)]
with pytest.raises(ValidationError) as exc:
ListAssetsQuery.model_validate({"job_ids": too_many})
assert exc.value.errors()[0]["type"] == "too_long"
def test_max_length_boundary_accepted(self):
at_cap = [str(uuid.uuid4()) for _ in range(500)]
q = ListAssetsQuery.model_validate({"job_ids": at_cap})
assert len(q.job_ids) == 500

View File

@ -1,9 +1,9 @@
"""POST /prompt enforces canonical-UUID job ids at creation time.
Lives in assets_test because it uses this suite's booted-server fixture and
because the invariant exists for the assets pipeline: the GET /api/assets
``job_ids`` filter matches stored job ids exactly, so a job minted with a
non-canonical id would produce assets the filter can never find.
Lives in assets_test because it uses this suite's booted-server fixture. The
invariant itself is pipeline-wide: a job id is stored and compared verbatim
downstream history keys, websocket correlation, and /interrupt matching
so a job minted with a non-canonical id would miss every exact-match lookup.
The prompt bodies here are intentionally invalid workflows prompt_id
validation happens before workflow validation, so a rejected id returns

View File

@ -35,8 +35,8 @@ class TestValidateJobId:
)
def test_non_canonical_spellings_rejected(self, variant):
# uuid.UUID parses all of these, but accepting them would silently
# rewrite the client's id (history keys, websocket events, and the
# assets job_ids filter all match the stored form exactly).
# rewrite the client's id (history keys, websocket events, and
# /interrupt matching all match the stored form exactly).
with pytest.raises(ValueError):
validate_job_id(variant)