From 560e6ee5c1174cb4ed741b58fd561108728eeceb Mon Sep 17 00:00:00 2001
From: Simon Pinfold <synap5e@users.noreply.github.com>
Date: Fri, 22 May 2026 10:53:50 +1200
Subject: [PATCH] Add job_ids filter for GET api assets (#13998)

Amp-Thread-ID: https://ampcode.com/threads/T-019e4ca5-b71a-7168-8f56-58b2325f34c3
Co-authored-by: Amp <amp@ampcode.com>
---
 app/assets/api/routes.py                      |  1 +
 app/assets/api/schemas_in.py                  | 36 +++++++++++
 .../database/queries/asset_reference.py       |  6 ++
 app/assets/services/asset_management.py       |  2 +
 openapi.yaml                                  | 11 ++++
 .../assets_test/queries/test_asset_info.py    | 50 ++++++++++++++++
 .../queries/test_list_assets_query.py         | 60 +++++++++++++++++++
 7 files changed, 166 insertions(+)
 create mode 100644 tests-unit/assets_test/queries/test_list_assets_query.py

diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py
index 20abde960..a6d4ffc0b 100644
--- a/app/assets/api/routes.py
+++ b/app/assets/api/routes.py
@@ -227,6 +227,7 @@ async def list_assets_route(request: web.Request) -> web.Response:
             exclude_tags=q.exclude_tags,
             name_contains=q.name_contains,
             metadata_filter=q.metadata_filter,
+            job_ids=q.job_ids,
             limit=q.limit,
             offset=q.offset,
             sort=sort,
diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py
index af666746d..4ae18c65a 100644
--- a/app/assets/api/schemas_in.py
+++ b/app/assets/api/schemas_in.py
@@ -1,4 +1,5 @@
 import json
+import uuid
 from dataclasses import dataclass
 from typing import Any, Literal
 
@@ -53,6 +54,7 @@ class ListAssetsQuery(BaseModel):
     include_tags: list[str] = Field(default_factory=list)
     exclude_tags: list[str] = Field(default_factory=list)
     name_contains: str | None = None
+    job_ids: list[str] = Field(default_factory=list, max_length=500)
 
     # Accept either a JSON string (query param) or a dict
     metadata_filter: dict[str, Any] | None = None
@@ -86,6 +88,40 @@ class ListAssetsQuery(BaseModel):
             return out
         return v
 
+    @field_validator("job_ids", mode="before")
+    @classmethod
+    def _split_and_validate_job_ids(cls, v):
+        # Accept "uuid1,uuid2" or ["uuid1","uuid2"] or repeated query params.
+        # Each entry must parse as a UUID; canonicalized to lowercase hyphenated form.
+        if v is None:
+            return []
+        if isinstance(v, str):
+            raw = [t.strip() for t in v.split(",") if t.strip()]
+        elif isinstance(v, list):
+            raw = []
+            for item in v:
+                if not isinstance(item, str):
+                    raise ValueError(
+                        f"job_ids entries must be strings, got {type(item).__name__}"
+                    )
+                raw.extend([t.strip() for t in item.split(",") if t.strip()])
+        else:
+            raise ValueError(
+                f"job_ids must be a string or list of strings, got {type(v).__name__}"
+            )
+
+        out: list[str] = []
+        seen: set[str] = set()
+        for s in raw:
+            try:
+                canonical = str(uuid.UUID(s))
+            except ValueError as e:
+                raise ValueError(f"job_ids must be UUIDs: {s!r}") from e
+            if canonical not in seen:
+                seen.add(canonical)
+                out.append(canonical)
+        return out
+
     @field_validator("metadata_filter", mode="before")
     @classmethod
     def _parse_metadata_json(cls, v):
diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py
index e0a01e871..37118cbda 100644
--- a/app/assets/database/queries/asset_reference.py
+++ b/app/assets/database/queries/asset_reference.py
@@ -264,6 +264,7 @@ def list_references_page(
     include_tags: Sequence[str] | None = None,
     exclude_tags: Sequence[str] | None = None,
     metadata_filter: dict | None = None,
+    job_ids: Sequence[str] | None = None,
     sort: str | None = None,
     order: str | None = None,
     after_cursor_value: object | None = None,
@@ -293,6 +294,9 @@ def list_references_page(
         escaped, esc = escape_sql_like_string(name_contains)
         base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc))
 
+    if job_ids:
+        base = base.where(AssetReference.job_id.in_(list(job_ids)))
+
     base = apply_tag_filters(base, include_tags, exclude_tags)
     base = apply_metadata_filter(base, metadata_filter)
 
@@ -345,6 +349,8 @@ def list_references_page(
         count_stmt = count_stmt.where(
             AssetReference.name.ilike(f"%{escaped}%", escape=esc)
         )
+    if job_ids:
+        count_stmt = count_stmt.where(AssetReference.job_id.in_(list(job_ids)))
     count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags)
     count_stmt = apply_metadata_filter(count_stmt, metadata_filter)
 
diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py
index 1072c95fa..f22e98aa3 100644
--- a/app/assets/services/asset_management.py
+++ b/app/assets/services/asset_management.py
@@ -264,6 +264,7 @@ def list_assets_page(
     exclude_tags: Sequence[str] | None = None,
     name_contains: str | None = None,
     metadata_filter: dict | None = None,
+    job_ids: Sequence[str] | None = None,
     limit: int = 20,
     offset: int = 0,
     sort: str = "created_at",
@@ -309,6 +310,7 @@ def list_assets_page(
             exclude_tags=exclude_tags,
             name_contains=name_contains,
             metadata_filter=metadata_filter,
+            job_ids=job_ids,
             limit=fetch_limit,
             offset=offset,
             sort=sort,
diff --git a/openapi.yaml b/openapi.yaml
index 6eb5e2d1d..0dd904048 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -1572,6 +1572,17 @@ paths:
             type: string
             enum: [asc, desc]
           description: Sort direction
+        - name: job_ids
+          in: query
+          schema:
+            type: array
+            maxItems: 500
+            items:
+              type: string
+              format: uuid
+          style: form
+          explode: true
+          description: "Filter assets by associated job UUIDs. Accepts repeated query params (e.g. `?job_ids=a&job_ids=b`) or a single comma-separated value (`?job_ids=a,b`)."
         - name: include_public
           in: query
           schema:
diff --git a/tests-unit/assets_test/queries/test_asset_info.py b/tests-unit/assets_test/queries/test_asset_info.py
index 62e59fe72..f95df0d44 100644
--- a/tests-unit/assets_test/queries/test_asset_info.py
+++ b/tests-unit/assets_test/queries/test_asset_info.py
@@ -159,6 +159,56 @@ class TestListReferencesPage:
         refs, _, _ = list_references_page(session, sort="name", order="asc")
         assert refs[0].name == "large"
 
+    def test_job_ids_filter(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        job_a = str(uuid.uuid4())
+        job_b = str(uuid.uuid4())
+        ref_a = _make_reference(session, asset, name="from_job_a")
+        ref_a.job_id = job_a
+        ref_b = _make_reference(session, asset, name="from_job_b")
+        ref_b.job_id = job_b
+        _make_reference(session, asset, name="no_job")
+        session.commit()
+
+        # Single job filter
+        refs, _, total = list_references_page(session, job_ids=[job_a])
+        assert total == 1
+        assert refs[0].name == "from_job_a"
+
+        # Multi-job filter (IN)
+        refs, _, total = list_references_page(session, job_ids=[job_a, job_b])
+        names = sorted(r.name for r in refs)
+        assert total == 2
+        assert names == ["from_job_a", "from_job_b"]
+
+        # Unknown job id matches nothing
+        refs, _, total = list_references_page(session, job_ids=[str(uuid.uuid4())])
+        assert total == 0
+        assert refs == []
+
+        # Empty/None means no filter -> all three references
+        refs, _, total = list_references_page(session, job_ids=[])
+        assert total == 3
+        refs, _, total = list_references_page(session, job_ids=None)
+        assert total == 3
+
+    def test_job_ids_combined_with_other_filters(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        job_a = str(uuid.uuid4())
+        ref_match = _make_reference(session, asset, name="match.bin")
+        ref_match.job_id = job_a
+        ref_wrong_name = _make_reference(session, asset, name="other.bin")
+        ref_wrong_name.job_id = job_a
+        ref_wrong_job = _make_reference(session, asset, name="match.bin")
+        ref_wrong_job.job_id = str(uuid.uuid4())
+        session.commit()
+
+        refs, _, total = list_references_page(
+            session, job_ids=[job_a], name_contains="match"
+        )
+        assert total == 1
+        assert refs[0].id == ref_match.id
+
 
 class TestTagRetrievalOrder:
     """End-to-end check: tags written through the public write paths come
diff --git a/tests-unit/assets_test/queries/test_list_assets_query.py b/tests-unit/assets_test/queries/test_list_assets_query.py
new file mode 100644
index 000000000..e8d3430e2
--- /dev/null
+++ b/tests-unit/assets_test/queries/test_list_assets_query.py
@@ -0,0 +1,60 @@
+"""Schema-level unit tests for ListAssetsQuery (no DB required)."""
+import uuid
+
+import pytest
+from pydantic import ValidationError
+
+from app.assets.api.schemas_in import ListAssetsQuery
+
+
+class TestJobIdsValidator:
+    def test_csv_string_parses_and_canonicalizes(self):
+        a = "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE"
+        b = "11111111-2222-3333-4444-555555555555"
+        q = ListAssetsQuery.model_validate({"job_ids": f"{a},{b}"})
+        # Canonicalized to lowercase
+        assert q.job_ids == [a.lower(), b]
+
+    def test_repeated_query_params_as_list(self):
+        a = "11111111-1111-1111-1111-111111111111"
+        b = "22222222-2222-2222-2222-222222222222"
+        q = ListAssetsQuery.model_validate({"job_ids": [a, b]})
+        assert q.job_ids == [a, b]
+
+    def test_dedup_preserves_first_seen_order(self):
+        a = "11111111-1111-1111-1111-111111111111"
+        b = "22222222-2222-2222-2222-222222222222"
+        q = ListAssetsQuery.model_validate({"job_ids": [a, b, a]})
+        assert q.job_ids == [a, b]
+
+    def test_default_empty(self):
+        q = ListAssetsQuery.model_validate({})
+        assert q.job_ids == []
+
+    def test_invalid_uuid_rejected(self):
+        with pytest.raises(ValidationError) as exc:
+            ListAssetsQuery.model_validate({"job_ids": "not-a-uuid"})
+        assert "must be UUIDs" in str(exc.value)
+
+    def test_non_string_list_item_rejected(self):
+        with pytest.raises(ValidationError) as exc:
+            ListAssetsQuery.model_validate(
+                {"job_ids": ["11111111-1111-1111-1111-111111111111", 42]}
+            )
+        assert "must be strings" in str(exc.value)
+
+    def test_non_string_non_list_value_rejected(self):
+        with pytest.raises(ValidationError) as exc:
+            ListAssetsQuery.model_validate({"job_ids": {"bad": "shape"}})
+        assert "must be a string or list of strings" in str(exc.value)
+
+    def test_max_length_enforced(self):
+        too_many = [str(uuid.uuid4()) for _ in range(501)]
+        with pytest.raises(ValidationError) as exc:
+            ListAssetsQuery.model_validate({"job_ids": too_many})
+        assert exc.value.errors()[0]["type"] == "too_long"
+
+    def test_max_length_boundary_accepted(self):
+        at_cap = [str(uuid.uuid4()) for _ in range(500)]
+        q = ListAssetsQuery.model_validate({"job_ids": at_cap})
+        assert len(q.job_ids) == 500