From a7078705e38ccf94a7fc58b47939ec11731be347 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Mon, 11 May 2026 21:18:32 -0700 Subject: [PATCH] fix(assets): validate, deduplicate, and bound job_ids query param - Validate each token is a valid UUID (normalizes case); invalid input returns 422 - Raise on non-string list items instead of silently dropping - Raise on unexpected input types instead of forwarding raw value - Deduplicate tokens to avoid redundant IN clause bind params - Cap list at max_length=100 to prevent oversized IN clauses --- app/assets/api/schemas_in.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 58cc656a9..6c5fdb07c 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -1,4 +1,5 @@ import json +import uuid from dataclasses import dataclass from typing import Any, Literal @@ -52,7 +53,7 @@ class ParsedUpload: class ListAssetsQuery(BaseModel): include_tags: list[str] = Field(default_factory=list) exclude_tags: list[str] = Field(default_factory=list) - job_ids: list[str] = Field(default_factory=list) + job_ids: list[str] = Field(default_factory=list, max_length=100) name_contains: str | None = None # Accept either a JSON string (query param) or a dict @@ -72,14 +73,28 @@ class ListAssetsQuery(BaseModel): if v is None: return [] if isinstance(v, str): - return [t.strip() for t in v.split(",") if t.strip()] - if isinstance(v, list): - out: list[str] = [] + tokens = [t.strip() for t in v.split(",") if t.strip()] + elif isinstance(v, list): + tokens = [] for item in v: - if isinstance(item, str): - out.extend([t.strip() for t in item.split(",") if t.strip()]) - return out - return v + if not isinstance(item, str): + raise ValueError( + f"job_ids items must be strings, got {type(item).__name__}" + ) + tokens.extend([t.strip() for t in item.split(",") if t.strip()]) + else: + raise ValueError("job_ids must be a string or list of strings") + seen: set[str] = set() + out: list[str] = [] + for t in tokens: + try: + normalized = str(uuid.UUID(t)) + except ValueError: + raise ValueError(f"invalid UUID in job_ids: {t!r}") + if normalized not in seen: + seen.add(normalized) + out.append(normalized) + return out @field_validator("include_tags", "exclude_tags", mode="before") @classmethod