From cf72cf3fcb8df2f5816e399e6dad2b583cbc6be9 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Mon, 9 Mar 2026 22:36:00 -0700 Subject: [PATCH 01/34] feat(assets): align local API with cloud spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify response models, add missing fields, and align input schemas with the cloud OpenAPI spec at cloud.comfy.org/openapi. - Replace AssetSummary/AssetDetail/AssetUpdated with single Asset model - Add is_immutable, metadata (system_metadata), prompt_id fields - Support mime_type and preview_id in update endpoint - Make CreateFromHashBody.name optional, add mime_type, require >=1 tag - Add id/mime_type/preview_id to upload, relax tags to optional - Rename total_tags → tags in tag add/remove responses - Add GET /api/assets/tags/refine histogram endpoint - Add DB migration for system_metadata and prompt_id columns Co-Authored-By: Claude Opus 4.6 --- .../versions/0003_add_metadata_prompt.py | 31 ++++ app/assets/api/routes.py | 147 +++++++++++------- app/assets/api/schemas_in.py | 86 ++++++++-- app/assets/api/schemas_out.py | 65 +++----- app/assets/api/upload.py | 12 ++ app/assets/database/models.py | 4 + app/assets/database/queries/__init__.py | 2 + .../database/queries/asset_reference.py | 87 +---------- app/assets/database/queries/common.py | 87 ++++++++++- app/assets/database/queries/tags.py | 50 ++++++ app/assets/services/asset_management.py | 17 ++ app/assets/services/ingest.py | 13 +- app/assets/services/schemas.py | 10 +- app/assets/services/tagging.py | 23 +++ tests-unit/assets_test/test_tags_api.py | 2 +- 15 files changed, 426 insertions(+), 210 deletions(-) create mode 100644 alembic_db/versions/0003_add_metadata_prompt.py diff --git a/alembic_db/versions/0003_add_metadata_prompt.py b/alembic_db/versions/0003_add_metadata_prompt.py new file mode 100644 index 000000000..484d92923 --- /dev/null +++ b/alembic_db/versions/0003_add_metadata_prompt.py @@ -0,0 +1,31 @@ +""" +Add system_metadata and prompt_id columns to asset_references. + +Revision ID: 0003_add_metadata_prompt +Revises: 0002_merge_to_asset_references +Create Date: 2026-03-09 +""" + +from alembic import op +import sqlalchemy as sa + +revision = "0003_add_metadata_prompt" +down_revision = "0002_merge_to_asset_references" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + with op.batch_alter_table("asset_references") as batch_op: + batch_op.add_column( + sa.Column("system_metadata", sa.JSON(), nullable=True) + ) + batch_op.add_column( + sa.Column("prompt_id", sa.String(length=36), nullable=True) + ) + + +def downgrade() -> None: + with op.batch_alter_table("asset_references") as batch_op: + batch_op.drop_column("prompt_id") + batch_op.drop_column("system_metadata") diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 40dee9f46..489ace2f1 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -38,6 +38,7 @@ from app.assets.services import ( update_asset_metadata, upload_from_temp_path, ) +from app.assets.services.tagging import list_tag_histogram ROUTES = web.RouteTableDef() USER_MANAGER: user_manager.UserManager | None = None @@ -122,6 +123,29 @@ def _validate_sort_field(requested: str | None) -> str: return "created_at" +def _build_asset_response(result) -> schemas_out.Asset: + """Build an Asset response from a service result.""" + preview_url = None + if result.ref.preview_id: + preview_url = f"/api/assets/{result.ref.preview_id}/content?disposition=inline" + return schemas_out.Asset( + id=result.ref.id, + name=result.ref.name, + asset_hash=result.asset.hash if result.asset else None, + size=int(result.asset.size_bytes) if result.asset else 0, + mime_type=result.asset.mime_type if result.asset else None, + tags=result.tags, + preview_url=preview_url, + preview_id=result.ref.preview_id, + user_metadata=result.ref.user_metadata or {}, + metadata=result.ref.system_metadata, + prompt_id=result.ref.prompt_id, + created_at=result.ref.created_at, + updated_at=result.ref.updated_at, + last_access_time=result.ref.last_access_time, + ) + + @ROUTES.head("/api/assets/hash/{hash}") @_require_assets_feature_enabled async def head_asset_by_hash(request: web.Request) -> web.Response: @@ -164,20 +188,7 @@ async def list_assets_route(request: web.Request) -> web.Response: order=order, ) - summaries = [ - schemas_out.AssetSummary( - id=item.ref.id, - name=item.ref.name, - asset_hash=item.asset.hash if item.asset else None, - size=int(item.asset.size_bytes) if item.asset else None, - mime_type=item.asset.mime_type if item.asset else None, - tags=item.tags, - created_at=item.ref.created_at, - updated_at=item.ref.updated_at, - last_access_time=item.ref.last_access_time, - ) - for item in result.items - ] + summaries = [_build_asset_response(item) for item in result.items] payload = schemas_out.AssetsList( assets=summaries, @@ -207,18 +218,7 @@ async def get_asset_route(request: web.Request) -> web.Response: {"id": reference_id}, ) - payload = schemas_out.AssetDetail( - id=result.ref.id, - name=result.ref.name, - asset_hash=result.asset.hash if result.asset else None, - size=int(result.asset.size_bytes) if result.asset else None, - mime_type=result.asset.mime_type if result.asset else None, - tags=result.tags, - user_metadata=result.ref.user_metadata or {}, - preview_id=result.ref.preview_id, - created_at=result.ref.created_at, - last_access_time=result.ref.last_access_time, - ) + payload = _build_asset_response(result) except ValueError as e: return _build_error_response( 404, "ASSET_NOT_FOUND", str(e), {"id": reference_id} @@ -312,29 +312,27 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response: 400, "INVALID_JSON", "Request body must be valid JSON." ) + # Derive name from hash if not provided + name = body.name + if name is None: + name = body.hash.split(":", 1)[1] if ":" in body.hash else body.hash + result = create_from_hash( hash_str=body.hash, - name=body.name, + name=name, tags=body.tags, user_metadata=body.user_metadata, owner_id=USER_MANAGER.get_request_user_id(request), + mime_type=body.mime_type, ) if result is None: return _build_error_response( 404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist" ) + asset = _build_asset_response(result) payload_out = schemas_out.AssetCreated( - id=result.ref.id, - name=result.ref.name, - asset_hash=result.asset.hash, - size=int(result.asset.size_bytes), - mime_type=result.asset.mime_type, - tags=result.tags, - user_metadata=result.ref.user_metadata or {}, - preview_id=result.ref.preview_id, - created_at=result.ref.created_at, - last_access_time=result.ref.last_access_time, + **asset.model_dump(), created_new=result.created_new, ) return web.json_response(payload_out.model_dump(mode="json"), status=201) @@ -358,6 +356,9 @@ async def upload_asset(request: web.Request) -> web.Response: "name": parsed.provided_name, "user_metadata": parsed.user_metadata_raw, "hash": parsed.provided_hash, + "id": parsed.provided_id, + "mime_type": parsed.provided_mime_type, + "preview_id": parsed.provided_preview_id, } ) except ValidationError as ve: @@ -378,6 +379,21 @@ async def upload_asset(request: web.Request) -> web.Response: ) try: + # Idempotent create: if spec.id is provided, check if reference already exists + if spec.id: + existing = get_asset_detail( + reference_id=spec.id, + owner_id=owner_id, + ) + if existing: + delete_temp_file_if_exists(parsed.tmp_path) + asset = _build_asset_response(existing) + payload_out = schemas_out.AssetCreated( + **asset.model_dump(), + created_new=False, + ) + return web.json_response(payload_out.model_dump(mode="json"), status=200) + # Fast path: hash exists, create AssetReference without writing anything if spec.hash and parsed.provided_hash_exists is True: result = create_from_hash( @@ -386,6 +402,7 @@ async def upload_asset(request: web.Request) -> web.Response: tags=spec.tags, user_metadata=spec.user_metadata or {}, owner_id=owner_id, + mime_type=spec.mime_type, ) if result is None: delete_temp_file_if_exists(parsed.tmp_path) @@ -410,6 +427,9 @@ async def upload_asset(request: web.Request) -> web.Response: client_filename=parsed.file_client_name, owner_id=owner_id, expected_hash=spec.hash, + mime_type=spec.mime_type, + preview_id=spec.preview_id, + asset_id=spec.id, ) except AssetValidationError as e: delete_temp_file_if_exists(parsed.tmp_path) @@ -428,21 +448,13 @@ async def upload_asset(request: web.Request) -> web.Response: logging.exception("upload_asset failed for owner_id=%s", owner_id) return _build_error_response(500, "INTERNAL", "Unexpected server error.") - payload = schemas_out.AssetCreated( - id=result.ref.id, - name=result.ref.name, - asset_hash=result.asset.hash, - size=int(result.asset.size_bytes), - mime_type=result.asset.mime_type, - tags=result.tags, - user_metadata=result.ref.user_metadata or {}, - preview_id=result.ref.preview_id, - created_at=result.ref.created_at, - last_access_time=result.ref.last_access_time, + asset = _build_asset_response(result) + payload_out = schemas_out.AssetCreated( + **asset.model_dump(), created_new=result.created_new, ) status = 201 if result.created_new else 200 - return web.json_response(payload.model_dump(mode="json"), status=status) + return web.json_response(payload_out.model_dump(mode="json"), status=status) @ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}") @@ -464,15 +476,10 @@ async def update_asset_route(request: web.Request) -> web.Response: name=body.name, user_metadata=body.user_metadata, owner_id=USER_MANAGER.get_request_user_id(request), + mime_type=body.mime_type, + preview_id=body.preview_id, ) - payload = schemas_out.AssetUpdated( - id=result.ref.id, - name=result.ref.name, - asset_hash=result.asset.hash if result.asset else None, - tags=result.tags, - user_metadata=result.ref.user_metadata or {}, - updated_at=result.ref.updated_at, - ) + payload = _build_asset_response(result) except PermissionError as pe: return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id}) except ValueError as ve: @@ -587,7 +594,7 @@ async def add_asset_tags(request: web.Request) -> web.Response: payload = schemas_out.TagsAdd( added=result.added, already_present=result.already_present, - total_tags=result.total_tags, + tags=result.total_tags, ) except PermissionError as pe: return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id}) @@ -634,7 +641,7 @@ async def delete_asset_tags(request: web.Request) -> web.Response: payload = schemas_out.TagsRemove( removed=result.removed, not_present=result.not_present, - total_tags=result.total_tags, + tags=result.total_tags, ) except PermissionError as pe: return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id}) @@ -653,6 +660,28 @@ async def delete_asset_tags(request: web.Request) -> web.Response: return web.json_response(payload.model_dump(mode="json"), status=200) +@ROUTES.get("/api/assets/tags/refine") +@_require_assets_feature_enabled +async def get_tags_refine(request: web.Request) -> web.Response: + """GET request to get tag histogram for filtered assets.""" + query_dict = get_query_dict(request) + try: + q = schemas_in.TagsRefineQuery.model_validate(query_dict) + except ValidationError as ve: + return _build_validation_error_response("INVALID_QUERY", ve) + + tag_counts = list_tag_histogram( + owner_id=USER_MANAGER.get_request_user_id(request), + include_tags=q.include_tags, + exclude_tags=q.exclude_tags, + name_contains=q.name_contains, + metadata_filter=q.metadata_filter, + limit=q.limit, + ) + payload = schemas_out.TagHistogram(tag_counts=tag_counts) + return web.json_response(payload.model_dump(mode="json"), status=200) + + @ROUTES.post("/api/assets/seed") @_require_assets_feature_enabled async def seed_assets(request: web.Request) -> web.Response: diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index d255c938e..48d11a391 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -45,6 +45,9 @@ class ParsedUpload: user_metadata_raw: str | None provided_hash: str | None provided_hash_exists: bool | None + provided_id: str | None = None + provided_mime_type: str | None = None + provided_preview_id: str | None = None class ListAssetsQuery(BaseModel): @@ -98,11 +101,18 @@ class ListAssetsQuery(BaseModel): class UpdateAssetBody(BaseModel): name: str | None = None user_metadata: dict[str, Any] | None = None + mime_type: str | None = None + preview_id: str | None = None @model_validator(mode="after") def _validate_at_least_one_field(self): - if self.name is None and self.user_metadata is None: - raise ValueError("Provide at least one of: name, user_metadata.") + if all( + v is None + for v in (self.name, self.user_metadata, self.mime_type, self.preview_id) + ): + raise ValueError( + "Provide at least one of: name, user_metadata, mime_type, preview_id." + ) return self @@ -110,9 +120,10 @@ class CreateFromHashBody(BaseModel): model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) hash: str - name: str - tags: list[str] = Field(default_factory=list) + name: str | None = None + tags: list[str] = Field(default_factory=list, min_length=1) user_metadata: dict[str, Any] = Field(default_factory=dict) + mime_type: str | None = None @field_validator("hash") @classmethod @@ -138,6 +149,44 @@ class CreateFromHashBody(BaseModel): return [] +class TagsRefineQuery(BaseModel): + include_tags: list[str] = Field(default_factory=list) + exclude_tags: list[str] = Field(default_factory=list) + name_contains: str | None = None + metadata_filter: dict[str, Any] | None = None + limit: conint(ge=1, le=1000) = 100 + + @field_validator("include_tags", "exclude_tags", mode="before") + @classmethod + def _split_csv_tags(cls, v): + if v is None: + return [] + if isinstance(v, str): + return [t.strip() for t in v.split(",") if t.strip()] + if isinstance(v, list): + out: list[str] = [] + for item in v: + if isinstance(item, str): + out.extend([t.strip() for t in item.split(",") if t.strip()]) + return out + return v + + @field_validator("metadata_filter", mode="before") + @classmethod + def _parse_metadata_json(cls, v): + if v is None or isinstance(v, dict): + return v + if isinstance(v, str) and v.strip(): + try: + parsed = json.loads(v) + except Exception as e: + raise ValueError(f"metadata_filter must be JSON: {e}") from e + if not isinstance(parsed, dict): + raise ValueError("metadata_filter must be a JSON object") + return parsed + return None + + class TagsListQuery(BaseModel): model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) @@ -186,21 +235,27 @@ class TagsRemove(TagsAdd): class UploadAssetSpec(BaseModel): """Upload Asset operation. - - tags: ordered; first is root ('models'|'input'|'output'); + - tags: optional list; if provided, first is root ('models'|'input'|'output'); if root == 'models', second must be a valid category - name: display name - user_metadata: arbitrary JSON object (optional) - hash: optional canonical 'blake3:' for validation / fast-path + - id: optional UUID for idempotent creation + - mime_type: optional MIME type override + - preview_id: optional asset ID for preview Files are stored using the content hash as filename stem. """ model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) - tags: list[str] = Field(..., min_length=1) + tags: list[str] = Field(default_factory=list) name: str | None = Field(default=None, max_length=512, description="Display Name") user_metadata: dict[str, Any] = Field(default_factory=dict) hash: str | None = Field(default=None) + id: str | None = Field(default=None) + mime_type: str | None = Field(default=None) + preview_id: str | None = Field(default=None) @field_validator("hash", mode="before") @classmethod @@ -278,14 +333,13 @@ class UploadAssetSpec(BaseModel): @model_validator(mode="after") def _validate_order(self): - if not self.tags: - raise ValueError("tags must be provided and non-empty") - root = self.tags[0] - if root not in {"models", "input", "output"}: - raise ValueError("first tag must be one of: models, input, output") - if root == "models": - if len(self.tags) < 2: - raise ValueError( - "models uploads require a category tag as the second tag" - ) + if self.tags: + root = self.tags[0] + if root not in {"models", "input", "output"}: + raise ValueError("first tag must be one of: models, input, output") + if root == "models": + if len(self.tags) < 2: + raise ValueError( + "models uploads require a category tag as the second tag" + ) return self diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index f36447856..e2d52c75f 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -4,16 +4,21 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field, field_serializer -class AssetSummary(BaseModel): +class Asset(BaseModel): id: str name: str asset_hash: str | None = None - size: int | None = None + size: int = 0 mime_type: str | None = None tags: list[str] = Field(default_factory=list) preview_url: str | None = None - created_at: datetime | None = None - updated_at: datetime | None = None + preview_id: str | None = None + user_metadata: dict[str, Any] = Field(default_factory=dict) + is_immutable: bool = False + metadata: dict[str, Any] | None = None + prompt_id: str | None = None + created_at: datetime + updated_at: datetime last_access_time: datetime | None = None model_config = ConfigDict(from_attributes=True) @@ -23,50 +28,16 @@ class AssetSummary(BaseModel): return v.isoformat() if v else None +class AssetCreated(Asset): + created_new: bool + + class AssetsList(BaseModel): - assets: list[AssetSummary] + assets: list[Asset] total: int has_more: bool -class AssetUpdated(BaseModel): - id: str - name: str - asset_hash: str | None = None - tags: list[str] = Field(default_factory=list) - user_metadata: dict[str, Any] = Field(default_factory=dict) - updated_at: datetime | None = None - - model_config = ConfigDict(from_attributes=True) - - @field_serializer("updated_at") - def _serialize_updated_at(self, v: datetime | None, _info): - return v.isoformat() if v else None - - -class AssetDetail(BaseModel): - id: str - name: str - asset_hash: str | None = None - size: int | None = None - mime_type: str | None = None - tags: list[str] = Field(default_factory=list) - user_metadata: dict[str, Any] = Field(default_factory=dict) - preview_id: str | None = None - created_at: datetime | None = None - last_access_time: datetime | None = None - - model_config = ConfigDict(from_attributes=True) - - @field_serializer("created_at", "last_access_time") - def _serialize_datetime(self, v: datetime | None, _info): - return v.isoformat() if v else None - - -class AssetCreated(AssetDetail): - created_new: bool - - class TagUsage(BaseModel): name: str count: int @@ -83,11 +54,15 @@ class TagsAdd(BaseModel): model_config = ConfigDict(str_strip_whitespace=True) added: list[str] = Field(default_factory=list) already_present: list[str] = Field(default_factory=list) - total_tags: list[str] = Field(default_factory=list) + tags: list[str] = Field(default_factory=list) class TagsRemove(BaseModel): model_config = ConfigDict(str_strip_whitespace=True) removed: list[str] = Field(default_factory=list) not_present: list[str] = Field(default_factory=list) - total_tags: list[str] = Field(default_factory=list) + tags: list[str] = Field(default_factory=list) + + +class TagHistogram(BaseModel): + tag_counts: dict[str, int] diff --git a/app/assets/api/upload.py b/app/assets/api/upload.py index 721c12f4d..c36257ae0 100644 --- a/app/assets/api/upload.py +++ b/app/assets/api/upload.py @@ -52,6 +52,9 @@ async def parse_multipart_upload( user_metadata_raw: str | None = None provided_hash: str | None = None provided_hash_exists: bool | None = None + provided_id: str | None = None + provided_mime_type: str | None = None + provided_preview_id: str | None = None file_written = 0 tmp_path: str | None = None @@ -128,6 +131,12 @@ async def parse_multipart_upload( provided_name = (await field.text()) or None elif fname == "user_metadata": user_metadata_raw = (await field.text()) or None + elif fname == "id": + provided_id = ((await field.text()) or "").strip() or None + elif fname == "mime_type": + provided_mime_type = ((await field.text()) or "").strip() or None + elif fname == "preview_id": + provided_preview_id = ((await field.text()) or "").strip() or None if not file_present and not (provided_hash and provided_hash_exists): raise UploadError( @@ -152,6 +161,9 @@ async def parse_multipart_upload( user_metadata_raw=user_metadata_raw, provided_hash=provided_hash, provided_hash_exists=provided_hash_exists, + provided_id=provided_id, + provided_mime_type=provided_mime_type, + provided_preview_id=provided_preview_id, ) diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 03c1c1707..22340ebd5 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -96,6 +96,10 @@ class AssetReference(Base): user_metadata: Mapped[dict[str, Any] | None] = mapped_column( JSON(none_as_null=True) ) + system_metadata: Mapped[dict[str, Any] | None] = mapped_column( + JSON(none_as_null=True), nullable=True, default=None + ) + prompt_id: Mapped[str | None] = mapped_column(String(36), nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=False), nullable=False, default=get_utc_now ) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 7888d0645..5283b400e 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -54,6 +54,7 @@ from app.assets.database.queries.tags import ( bulk_insert_tags_and_meta, ensure_tags_exist, get_reference_tags, + list_tag_counts_for_filtered_assets, list_tags_with_usage, remove_missing_tag_for_asset_id, remove_tags_from_reference, @@ -99,6 +100,7 @@ __all__ = [ "insert_reference", "list_references_by_asset_id", "list_references_page", + "list_tag_counts_for_filtered_assets", "list_tags_with_usage", "mark_references_missing_outside_prefixes", "reassign_asset_references", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 6524791cc..d096670c1 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -24,6 +24,8 @@ from app.assets.database.models import ( ) from app.assets.database.queries.common import ( MAX_BIND_PARAMS, + apply_metadata_filter, + apply_tag_filters, build_prefix_like_conditions, build_visible_owner_clause, calculate_rows_per_statement, @@ -79,83 +81,6 @@ def convert_metadata_to_rows(key: str, value) -> list[dict]: return [{"key": key, "ordinal": 0, "val_json": value}] -def _apply_tag_filters( - stmt: sa.sql.Select, - include_tags: Sequence[str] | None = None, - exclude_tags: Sequence[str] | None = None, -) -> sa.sql.Select: - """include_tags: every tag must be present; exclude_tags: none may be present.""" - include_tags = normalize_tags(include_tags) - exclude_tags = normalize_tags(exclude_tags) - - if include_tags: - for tag_name in include_tags: - stmt = stmt.where( - exists().where( - (AssetReferenceTag.asset_reference_id == AssetReference.id) - & (AssetReferenceTag.tag_name == tag_name) - ) - ) - - if exclude_tags: - stmt = stmt.where( - ~exists().where( - (AssetReferenceTag.asset_reference_id == AssetReference.id) - & (AssetReferenceTag.tag_name.in_(exclude_tags)) - ) - ) - return stmt - - -def _apply_metadata_filter( - stmt: sa.sql.Select, - metadata_filter: dict | None = None, -) -> sa.sql.Select: - """Apply filters using asset_reference_meta projection table.""" - if not metadata_filter: - return stmt - - def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement: - return sa.exists().where( - AssetReferenceMeta.asset_reference_id == AssetReference.id, - AssetReferenceMeta.key == key, - *preds, - ) - - def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement: - if value is None: - no_row_for_key = sa.not_( - sa.exists().where( - AssetReferenceMeta.asset_reference_id == AssetReference.id, - AssetReferenceMeta.key == key, - ) - ) - null_row = _exists_for_pred( - key, - AssetReferenceMeta.val_json.is_(None), - AssetReferenceMeta.val_str.is_(None), - AssetReferenceMeta.val_num.is_(None), - AssetReferenceMeta.val_bool.is_(None), - ) - return sa.or_(no_row_for_key, null_row) - - if isinstance(value, bool): - return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value)) - if isinstance(value, (int, float, Decimal)): - num = value if isinstance(value, Decimal) else Decimal(str(value)) - return _exists_for_pred(key, AssetReferenceMeta.val_num == num) - if isinstance(value, str): - return _exists_for_pred(key, AssetReferenceMeta.val_str == value) - return _exists_for_pred(key, AssetReferenceMeta.val_json == value) - - for k, v in metadata_filter.items(): - if isinstance(v, list): - ors = [_exists_clause_for_value(k, elem) for elem in v] - if ors: - stmt = stmt.where(sa.or_(*ors)) - else: - stmt = stmt.where(_exists_clause_for_value(k, v)) - return stmt def get_reference_by_id( @@ -336,8 +261,8 @@ def list_references_page( escaped, esc = escape_sql_like_string(name_contains) base = base.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc)) - base = _apply_tag_filters(base, include_tags, exclude_tags) - base = _apply_metadata_filter(base, metadata_filter) + base = apply_tag_filters(base, include_tags, exclude_tags) + base = apply_metadata_filter(base, metadata_filter) sort = (sort or "created_at").lower() order = (order or "desc").lower() @@ -366,8 +291,8 @@ def list_references_page( count_stmt = count_stmt.where( AssetReference.name.ilike(f"%{escaped}%", escape=esc) ) - count_stmt = _apply_tag_filters(count_stmt, include_tags, exclude_tags) - count_stmt = _apply_metadata_filter(count_stmt, metadata_filter) + count_stmt = apply_tag_filters(count_stmt, include_tags, exclude_tags) + count_stmt = apply_metadata_filter(count_stmt, metadata_filter) total = int(session.execute(count_stmt).scalar_one() or 0) refs = session.execute(base).unique().scalars().all() diff --git a/app/assets/database/queries/common.py b/app/assets/database/queries/common.py index 194c39a1e..94ec5a526 100644 --- a/app/assets/database/queries/common.py +++ b/app/assets/database/queries/common.py @@ -1,12 +1,14 @@ """Shared utilities for database query modules.""" import os -from typing import Iterable +from decimal import Decimal +from typing import Iterable, Sequence import sqlalchemy as sa +from sqlalchemy import exists -from app.assets.database.models import AssetReference -from app.assets.helpers import escape_sql_like_string +from app.assets.database.models import AssetReference, AssetReferenceMeta, AssetReferenceTag +from app.assets.helpers import escape_sql_like_string, normalize_tags MAX_BIND_PARAMS = 800 @@ -52,3 +54,82 @@ def build_prefix_like_conditions( escaped, esc = escape_sql_like_string(base) conds.append(AssetReference.file_path.like(escaped + "%", escape=esc)) return conds + + +def apply_tag_filters( + stmt: sa.sql.Select, + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, +) -> sa.sql.Select: + """include_tags: every tag must be present; exclude_tags: none may be present.""" + include_tags = normalize_tags(include_tags) + exclude_tags = normalize_tags(exclude_tags) + + if include_tags: + for tag_name in include_tags: + stmt = stmt.where( + exists().where( + (AssetReferenceTag.asset_reference_id == AssetReference.id) + & (AssetReferenceTag.tag_name == tag_name) + ) + ) + + if exclude_tags: + stmt = stmt.where( + ~exists().where( + (AssetReferenceTag.asset_reference_id == AssetReference.id) + & (AssetReferenceTag.tag_name.in_(exclude_tags)) + ) + ) + return stmt + + +def apply_metadata_filter( + stmt: sa.sql.Select, + metadata_filter: dict | None = None, +) -> sa.sql.Select: + """Apply filters using asset_reference_meta projection table.""" + if not metadata_filter: + return stmt + + def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement: + return sa.exists().where( + AssetReferenceMeta.asset_reference_id == AssetReference.id, + AssetReferenceMeta.key == key, + *preds, + ) + + def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement: + if value is None: + no_row_for_key = sa.not_( + sa.exists().where( + AssetReferenceMeta.asset_reference_id == AssetReference.id, + AssetReferenceMeta.key == key, + ) + ) + null_row = _exists_for_pred( + key, + AssetReferenceMeta.val_json.is_(None), + AssetReferenceMeta.val_str.is_(None), + AssetReferenceMeta.val_num.is_(None), + AssetReferenceMeta.val_bool.is_(None), + ) + return sa.or_(no_row_for_key, null_row) + + if isinstance(value, bool): + return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value)) + if isinstance(value, (int, float, Decimal)): + num = value if isinstance(value, Decimal) else Decimal(str(value)) + return _exists_for_pred(key, AssetReferenceMeta.val_num == num) + if isinstance(value, str): + return _exists_for_pred(key, AssetReferenceMeta.val_str == value) + return _exists_for_pred(key, AssetReferenceMeta.val_json == value) + + for k, v in metadata_filter.items(): + if isinstance(v, list): + ors = [_exists_clause_for_value(k, elem) for elem in v] + if ors: + stmt = stmt.where(sa.or_(*ors)) + else: + stmt = stmt.where(_exists_clause_for_value(k, v)) + return stmt diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index 8b25fee67..05acbdbd9 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -8,12 +8,15 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from app.assets.database.models import ( + Asset, AssetReference, AssetReferenceMeta, AssetReferenceTag, Tag, ) from app.assets.database.queries.common import ( + apply_metadata_filter, + apply_tag_filters, build_visible_owner_clause, iter_row_chunks, ) @@ -320,6 +323,53 @@ def list_tags_with_usage( return rows_norm, int(total or 0) +def list_tag_counts_for_filtered_assets( + session: Session, + owner_id: str = "", + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, + name_contains: str | None = None, + metadata_filter: dict | None = None, + limit: int = 100, +) -> dict[str, int]: + """Return tag counts for assets matching the given filters. + + Uses the same filtering logic as list_references_page but returns + {tag_name: count} instead of paginated references. + """ + # Build a subquery of matching reference IDs + ref_sq = ( + select(AssetReference.id) + .join(Asset, Asset.id == AssetReference.asset_id) + .where(build_visible_owner_clause(owner_id)) + .where(AssetReference.is_missing == False) # noqa: E712 + .where(AssetReference.deleted_at.is_(None)) + ) + + if name_contains: + escaped, esc = escape_sql_like_string(name_contains) + ref_sq = ref_sq.where(AssetReference.name.ilike(f"%{escaped}%", escape=esc)) + + ref_sq = apply_tag_filters(ref_sq, include_tags, exclude_tags) + ref_sq = apply_metadata_filter(ref_sq, metadata_filter) + ref_sq = ref_sq.subquery() + + # Count tags across those references + q = ( + select( + AssetReferenceTag.tag_name, + func.count(AssetReferenceTag.asset_reference_id).label("cnt"), + ) + .where(AssetReferenceTag.asset_reference_id.in_(select(ref_sq.c.id))) + .group_by(AssetReferenceTag.tag_name) + .order_by(func.count(AssetReferenceTag.asset_reference_id).desc()) + .limit(limit) + ) + + rows = session.execute(q).all() + return {tag_name: int(cnt) for tag_name, cnt in rows} + + def bulk_insert_tags_and_meta( session: Session, tag_rows: list[dict], diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index 3fe7115c8..b85e77edb 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -20,6 +20,7 @@ from app.assets.database.queries import ( set_reference_metadata, set_reference_preview, set_reference_tags, + update_asset_hash_and_mime, update_reference_access_time, update_reference_name, update_reference_updated_at, @@ -67,6 +68,8 @@ def update_asset_metadata( user_metadata: UserMetadata = None, tag_origin: str = "manual", owner_id: str = "", + mime_type: str | None = None, + preview_id: str | None = None, ) -> AssetDetailResult: with create_session() as session: ref = get_reference_with_owner_check(session, reference_id, owner_id) @@ -103,6 +106,20 @@ def update_asset_metadata( ) touched = True + if mime_type is not None: + update_asset_hash_and_mime( + session, asset_id=ref.asset_id, mime_type=mime_type + ) + touched = True + + if preview_id is not None: + set_reference_preview( + session, + reference_id=reference_id, + preview_asset_id=preview_id, + ) + touched = True + if touched and user_metadata is None: update_reference_updated_at(session, reference_id=reference_id) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 44d7aef36..42ce08c41 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -18,6 +18,7 @@ from app.assets.database.queries import ( remove_missing_tag_for_asset_id, set_reference_metadata, set_reference_tags, + update_asset_hash_and_mime, upsert_asset, upsert_reference, validate_tags_exist, @@ -242,6 +243,9 @@ def upload_from_temp_path( client_filename: str | None = None, owner_id: str = "", expected_hash: str | None = None, + mime_type: str | None = None, + preview_id: str | None = None, + asset_id: str | None = None, ) -> UploadResult: try: digest, _ = hashing.compute_blake3_hash(temp_path) @@ -291,7 +295,7 @@ def upload_from_temp_path( dest_abs = os.path.abspath(os.path.join(dest_dir, hashed_basename)) validate_path_within_base(dest_abs, base_dir) - content_type = ( + content_type = mime_type or ( mimetypes.guess_type(os.path.basename(src_for_ext), strict=False)[0] or mimetypes.guess_type(hashed_basename, strict=False)[0] or "application/octet-stream" @@ -315,7 +319,7 @@ def upload_from_temp_path( mime_type=content_type, info_name=_sanitize_filename(name or client_filename, fallback=digest), owner_id=owner_id, - preview_id=None, + preview_id=preview_id, user_metadata=user_metadata or {}, tags=tags, tag_origin="manual", @@ -348,6 +352,7 @@ def create_from_hash( tags: list[str] | None = None, user_metadata: dict | None = None, owner_id: str = "", + mime_type: str | None = None, ) -> UploadResult | None: canonical = hash_str.strip().lower() @@ -356,6 +361,10 @@ def create_from_hash( if not asset: return None + if mime_type and asset.mime_type != mime_type: + update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type) + session.commit() + result = _register_existing_asset( asset_hash=canonical, name=_sanitize_filename( diff --git a/app/assets/services/schemas.py b/app/assets/services/schemas.py index 8b1f1f4dc..d63c1f60d 100644 --- a/app/assets/services/schemas.py +++ b/app/assets/services/schemas.py @@ -23,9 +23,11 @@ class ReferenceData: file_path: str | None user_metadata: UserMetadata preview_id: str | None - created_at: datetime - updated_at: datetime - last_access_time: datetime | None + system_metadata: dict[str, Any] | None = None + prompt_id: str | None = None + created_at: datetime = None # type: ignore[assignment] + updated_at: datetime = None # type: ignore[assignment] + last_access_time: datetime | None = None @dataclass(frozen=True) @@ -93,6 +95,8 @@ def extract_reference_data(ref: AssetReference) -> ReferenceData: file_path=ref.file_path, user_metadata=ref.user_metadata, preview_id=ref.preview_id, + system_metadata=ref.system_metadata, + prompt_id=ref.prompt_id, created_at=ref.created_at, updated_at=ref.updated_at, last_access_time=ref.last_access_time, diff --git a/app/assets/services/tagging.py b/app/assets/services/tagging.py index 28900464d..37b612753 100644 --- a/app/assets/services/tagging.py +++ b/app/assets/services/tagging.py @@ -1,3 +1,5 @@ +from typing import Sequence + from app.assets.database.queries import ( AddTagsResult, RemoveTagsResult, @@ -6,6 +8,7 @@ from app.assets.database.queries import ( list_tags_with_usage, remove_tags_from_reference, ) +from app.assets.database.queries.tags import list_tag_counts_for_filtered_assets from app.assets.services.schemas import TagUsage from app.database.db import create_session @@ -73,3 +76,23 @@ def list_tags( ) return [TagUsage(name, tag_type, count) for name, tag_type, count in rows], total + + +def list_tag_histogram( + owner_id: str = "", + include_tags: Sequence[str] | None = None, + exclude_tags: Sequence[str] | None = None, + name_contains: str | None = None, + metadata_filter: dict | None = None, + limit: int = 100, +) -> dict[str, int]: + with create_session() as session: + return list_tag_counts_for_filtered_assets( + session, + owner_id=owner_id, + include_tags=include_tags, + exclude_tags=exclude_tags, + name_contains=name_contains, + metadata_filter=metadata_filter, + limit=limit, + ) diff --git a/tests-unit/assets_test/test_tags_api.py b/tests-unit/assets_test/test_tags_api.py index 595bf29c6..cc351ef1b 100644 --- a/tests-unit/assets_test/test_tags_api.py +++ b/tests-unit/assets_test/test_tags_api.py @@ -97,7 +97,7 @@ def test_add_and_remove_tags(http: requests.Session, api_base: str, seeded_asset # normalized, deduplicated; 'unit-tests' was already present from the seed assert set(b1["added"]) == {"newtag", "beta"} assert set(b1["already_present"]) == {"unit-tests"} - assert "newtag" in b1["total_tags"] and "beta" in b1["total_tags"] + assert "newtag" in b1["tags"] and "beta" in b1["tags"] rg = http.get(f"{api_base}/api/assets/{aid}", timeout=120) g = rg.json() From 8f9b81bf7bdd8fa2645f00d0596c8e5bda3773d6 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 10 Mar 2026 14:23:02 -0700 Subject: [PATCH 02/34] Fix review issues: tags validation, size nullability, type annotation, hash mismatch check, and add tag histogram tests - Remove contradictory min_length=1 from CreateFromHashBody.tags default - Restore size field to int|None=None for proper null semantics - Add Union type annotation to _build_asset_response result param - Add hash mismatch validation on idempotent upload path (409 HASH_MISMATCH) - Add unit tests for list_tag_histogram service function Amp-Thread-ID: https://ampcode.com/threads/T-019cd993-f43c-704e-b3d7-6cfc3d4d4a80 Co-authored-by: Amp --- app/assets/api/routes.py | 13 +- app/assets/api/schemas_in.py | 2 +- app/assets/api/schemas_out.py | 2 +- .../services/test_tag_histogram.py | 123 ++++++++++++++++++ 4 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 tests-unit/assets_test/services/test_tag_histogram.py diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 489ace2f1..c848e4c89 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -13,6 +13,7 @@ from pydantic import ValidationError import folder_paths from app import user_manager from app.assets.api import schemas_in, schemas_out +from app.assets.services import schemas from app.assets.api.schemas_in import ( AssetValidationError, UploadError, @@ -123,7 +124,7 @@ def _validate_sort_field(requested: str | None) -> str: return "created_at" -def _build_asset_response(result) -> schemas_out.Asset: +def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResult) -> schemas_out.Asset: """Build an Asset response from a service result.""" preview_url = None if result.ref.preview_id: @@ -132,7 +133,7 @@ def _build_asset_response(result) -> schemas_out.Asset: id=result.ref.id, name=result.ref.name, asset_hash=result.asset.hash if result.asset else None, - size=int(result.asset.size_bytes) if result.asset else 0, + size=int(result.asset.size_bytes) if result.asset else None, mime_type=result.asset.mime_type if result.asset else None, tags=result.tags, preview_url=preview_url, @@ -386,6 +387,14 @@ async def upload_asset(request: web.Request) -> web.Response: owner_id=owner_id, ) if existing: + # Validate that uploaded content matches existing asset + if spec.hash and existing.asset and existing.asset.hash != spec.hash: + delete_temp_file_if_exists(parsed.tmp_path) + return _build_error_response( + 409, + "HASH_MISMATCH", + "Uploaded file hash does not match existing asset.", + ) delete_temp_file_if_exists(parsed.tmp_path) asset = _build_asset_response(existing) payload_out = schemas_out.AssetCreated( diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 48d11a391..7593e617a 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -121,7 +121,7 @@ class CreateFromHashBody(BaseModel): hash: str name: str | None = None - tags: list[str] = Field(default_factory=list, min_length=1) + tags: list[str] = Field(default_factory=list) user_metadata: dict[str, Any] = Field(default_factory=dict) mime_type: str | None = None diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index e2d52c75f..972d88022 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -8,7 +8,7 @@ class Asset(BaseModel): id: str name: str asset_hash: str | None = None - size: int = 0 + size: int | None = None mime_type: str | None = None tags: list[str] = Field(default_factory=list) preview_url: str | None = None diff --git a/tests-unit/assets_test/services/test_tag_histogram.py b/tests-unit/assets_test/services/test_tag_histogram.py new file mode 100644 index 000000000..7bcd518ec --- /dev/null +++ b/tests-unit/assets_test/services/test_tag_histogram.py @@ -0,0 +1,123 @@ +"""Tests for list_tag_histogram service function.""" +from sqlalchemy.orm import Session + +from app.assets.database.models import Asset, AssetReference +from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference +from app.assets.helpers import get_utc_now +from app.assets.services.tagging import list_tag_histogram + + +def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset: + asset = Asset(hash=hash_val, size_bytes=1024) + session.add(asset) + session.flush() + return asset + + +def _make_reference( + session: Session, + asset: Asset, + name: str = "test", + owner_id: str = "", +) -> AssetReference: + now = get_utc_now() + ref = AssetReference( + owner_id=owner_id, + name=name, + asset_id=asset.id, + created_at=now, + updated_at=now, + last_access_time=now, + ) + session.add(ref) + session.flush() + return ref + + +class TestListTagHistogram: + def test_returns_counts_for_all_tags(self, mock_create_session, session: Session): + ensure_tags_exist(session, ["alpha", "beta"]) + a1 = _make_asset(session, "blake3:aaa") + r1 = _make_reference(session, a1, name="r1") + add_tags_to_reference(session, reference_id=r1.id, tags=["alpha", "beta"]) + + a2 = _make_asset(session, "blake3:bbb") + r2 = _make_reference(session, a2, name="r2") + add_tags_to_reference(session, reference_id=r2.id, tags=["alpha"]) + session.commit() + + result = list_tag_histogram() + + assert result["alpha"] == 2 + assert result["beta"] == 1 + + def test_empty_when_no_assets(self, mock_create_session, session: Session): + ensure_tags_exist(session, ["unused"]) + session.commit() + + result = list_tag_histogram() + + assert result == {} + + def test_include_tags_filter(self, mock_create_session, session: Session): + ensure_tags_exist(session, ["models", "loras", "input"]) + a1 = _make_asset(session, "blake3:aaa") + r1 = _make_reference(session, a1, name="r1") + add_tags_to_reference(session, reference_id=r1.id, tags=["models", "loras"]) + + a2 = _make_asset(session, "blake3:bbb") + r2 = _make_reference(session, a2, name="r2") + add_tags_to_reference(session, reference_id=r2.id, tags=["input"]) + session.commit() + + result = list_tag_histogram(include_tags=["models"]) + + # Only r1 has "models", so only its tags appear + assert "models" in result + assert "loras" in result + assert "input" not in result + + def test_exclude_tags_filter(self, mock_create_session, session: Session): + ensure_tags_exist(session, ["models", "loras", "input"]) + a1 = _make_asset(session, "blake3:aaa") + r1 = _make_reference(session, a1, name="r1") + add_tags_to_reference(session, reference_id=r1.id, tags=["models", "loras"]) + + a2 = _make_asset(session, "blake3:bbb") + r2 = _make_reference(session, a2, name="r2") + add_tags_to_reference(session, reference_id=r2.id, tags=["input"]) + session.commit() + + result = list_tag_histogram(exclude_tags=["models"]) + + # r1 excluded, only r2's tags remain + assert "input" in result + assert "loras" not in result + + def test_name_contains_filter(self, mock_create_session, session: Session): + ensure_tags_exist(session, ["alpha", "beta"]) + a1 = _make_asset(session, "blake3:aaa") + r1 = _make_reference(session, a1, name="my_model.safetensors") + add_tags_to_reference(session, reference_id=r1.id, tags=["alpha"]) + + a2 = _make_asset(session, "blake3:bbb") + r2 = _make_reference(session, a2, name="picture.png") + add_tags_to_reference(session, reference_id=r2.id, tags=["beta"]) + session.commit() + + result = list_tag_histogram(name_contains="model") + + assert "alpha" in result + assert "beta" not in result + + def test_limit_caps_results(self, mock_create_session, session: Session): + tags = [f"tag{i}" for i in range(10)] + ensure_tags_exist(session, tags) + a = _make_asset(session, "blake3:aaa") + r = _make_reference(session, a, name="r1") + add_tags_to_reference(session, reference_id=r.id, tags=tags) + session.commit() + + result = list_tag_histogram(limit=3) + + assert len(result) == 3 From a0c2fe9f0cec2b5daeac13e228901c1cc46530a2 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 10 Mar 2026 17:59:23 -0700 Subject: [PATCH 03/34] Add preview_url to /assets API response using /api/view endpoint For input and output assets, generate a preview_url pointing to the existing /api/view endpoint using the asset's filename and tag-derived type (input/output). Handles subdirectories via subfolder param and URL-encodes filenames with spaces, unicode, and special characters. This aligns the OSS backend response with the frontend AssetCard expectation for thumbnail rendering. Amp-Thread-ID: https://ampcode.com/threads/T-019cda3f-5c2c-751a-a906-ac6c9153ac5c Co-authored-by: Amp --- app/assets/api/routes.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index c848e4c89..d9a8babd3 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -124,11 +124,35 @@ def _validate_sort_field(requested: str | None) -> str: return "created_at" +def _build_preview_url_from_view(tags: list[str], user_metadata: dict[str, Any] | None) -> str | None: + """Build a /api/view preview URL from asset tags and user_metadata filename.""" + if not user_metadata: + return None + filename = user_metadata.get("filename") + if not filename: + return None + + if "input" in tags: + view_type = "input" + elif "output" in tags: + view_type = "output" + else: + return None + + subfolder = "" + if "/" in filename: + subfolder, filename = filename.rsplit("/", 1) + + encoded_filename = urllib.parse.quote(filename, safe="") + url = f"/api/view?type={view_type}&filename={encoded_filename}" + if subfolder: + url += f"&subfolder={urllib.parse.quote(subfolder, safe='')}" + return url + + def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResult) -> schemas_out.Asset: """Build an Asset response from a service result.""" - preview_url = None - if result.ref.preview_id: - preview_url = f"/api/assets/{result.ref.preview_id}/content?disposition=inline" + preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata) return schemas_out.Asset( id=result.ref.id, name=result.ref.name, From c8370c03f6186c05df0fb6cdd3472befc65e407a Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 10 Mar 2026 18:28:35 -0700 Subject: [PATCH 04/34] chore: remove unused imports from asset_reference queries Amp-Thread-ID: https://ampcode.com/threads/T-019cda7d-cb21-77b4-a51b-b965af60208c Co-authored-by: Amp --- app/assets/database/queries/asset_reference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index d096670c1..4c9965e3b 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -10,7 +10,7 @@ from decimal import Decimal from typing import NamedTuple, Sequence import sqlalchemy as sa -from sqlalchemy import delete, exists, select +from sqlalchemy import delete, select from sqlalchemy.dialects import sqlite from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, noload @@ -31,7 +31,7 @@ from app.assets.database.queries.common import ( calculate_rows_per_statement, iter_chunks, ) -from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags +from app.assets.helpers import escape_sql_like_string, get_utc_now def _check_is_scalar(v): From 818494764aea9905a2f54fb934ec83b78e8497a1 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 10 Mar 2026 18:31:05 -0700 Subject: [PATCH 05/34] feat: resolve blake3 hashes in /view endpoint via asset database Amp-Thread-ID: https://ampcode.com/threads/T-019cda7d-cb21-77b4-a51b-b965af60208c Co-authored-by: Amp --- server.py | 68 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/server.py b/server.py index 85a8964be..5aeffa5ec 100644 --- a/server.py +++ b/server.py @@ -479,30 +479,37 @@ class PromptServer(): async def view_image(request): if "filename" in request.rel_url.query: filename = request.rel_url.query["filename"] - filename, output_dir = folder_paths.annotated_filepath(filename) - if not filename: - return web.Response(status=400) + # If the filename is a blake3 hash, resolve it via the asset database + if filename.startswith("blake3:"): + file, filename = self._resolve_blake3_to_path(filename) + if file is None: + return web.Response(status=404) + else: + filename, output_dir = folder_paths.annotated_filepath(filename) - # validation for security: prevent accessing arbitrary path - if filename[0] == '/' or '..' in filename: - return web.Response(status=400) + if not filename: + return web.Response(status=400) - if output_dir is None: - type = request.rel_url.query.get("type", "output") - output_dir = folder_paths.get_directory_by_type(type) + # validation for security: prevent accessing arbitrary path + if filename[0] == '/' or '..' in filename: + return web.Response(status=400) - if output_dir is None: - return web.Response(status=400) + if output_dir is None: + type = request.rel_url.query.get("type", "output") + output_dir = folder_paths.get_directory_by_type(type) - if "subfolder" in request.rel_url.query: - full_output_dir = os.path.join(output_dir, request.rel_url.query["subfolder"]) - if os.path.commonpath((os.path.abspath(full_output_dir), output_dir)) != output_dir: - return web.Response(status=403) - output_dir = full_output_dir + if output_dir is None: + return web.Response(status=400) - filename = os.path.basename(filename) - file = os.path.join(output_dir, filename) + if "subfolder" in request.rel_url.query: + full_output_dir = os.path.join(output_dir, request.rel_url.query["subfolder"]) + if os.path.commonpath((os.path.abspath(full_output_dir), output_dir)) != output_dir: + return web.Response(status=403) + output_dir = full_output_dir + + filename = os.path.basename(filename) + file = os.path.join(output_dir, filename) if os.path.isfile(file): if 'preview' in request.rel_url.query: @@ -995,6 +1002,31 @@ class PromptServer(): timeout = aiohttp.ClientTimeout(total=None) # no timeout self.client_session = aiohttp.ClientSession(timeout=timeout) + def _resolve_blake3_to_path(self, asset_hash: str) -> tuple[str | None, str]: + """Resolve a blake3 hash to an absolute file path via the asset database. + + Returns (abs_path, display_filename) or (None, "") if not found. + """ + from app.database.db import create_session + from app.assets.database.queries import get_asset_by_hash, list_references_by_asset_id + from app.assets.helpers import select_best_live_path + + with create_session() as session: + asset = get_asset_by_hash(session, asset_hash) + if not asset: + return None, "" + refs = list_references_by_asset_id(session, asset_id=asset.id) + abs_path = select_best_live_path(refs) + if not abs_path: + return None, "" + display_name = os.path.basename(abs_path) + # Prefer the reference name if available + for ref in refs: + if ref.file_path == abs_path and ref.name: + display_name = ref.name + break + return abs_path, display_name + def add_routes(self): self.user_manager.add_routes(self.routes) self.model_file_manager.add_routes(self.routes) From 9c97dab90eb6efadd1c6d27ea9a05cd83bbe04e5 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Wed, 11 Mar 2026 21:35:18 -0700 Subject: [PATCH 06/34] Register uploaded images in asset database when --enable-assets is set Add register_file_in_place() service function to ingest module for registering already-saved files without moving them. Call it from the /upload/image endpoint to return asset metadata in the response. Amp-Thread-ID: https://ampcode.com/threads/T-019ce023-3384-7560-bacf-de40b0de0dd2 Co-authored-by: Amp --- app/assets/services/ingest.py | 55 +++++++++++++++++++++++++++++++++++ server.py | 26 +++++++++++++++-- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 42ce08c41..64a1ef68b 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -346,6 +346,61 @@ def upload_from_temp_path( ) +def register_file_in_place( + abs_path: str, + name: str, + tags: list[str], + owner_id: str = "", + mime_type: str | None = None, +) -> UploadResult: + """Register an already-saved file in the asset database without moving it.""" + try: + digest, _ = hashing.compute_blake3_hash(abs_path) + except ImportError as e: + raise DependencyMissingError(str(e)) + except Exception as e: + raise RuntimeError(f"failed to hash file: {e}") + asset_hash = "blake3:" + digest + + size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path) + content_type = mime_type or ( + mimetypes.guess_type(abs_path, strict=False)[0] + or "application/octet-stream" + ) + + ingest_result = _ingest_file_from_path( + abs_path=abs_path, + asset_hash=asset_hash, + size_bytes=size_bytes, + mtime_ns=mtime_ns, + mime_type=content_type, + info_name=_sanitize_filename(name, fallback=digest), + owner_id=owner_id, + tags=tags, + tag_origin="upload", + require_existing_tags=False, + ) + reference_id = ingest_result.reference_id + if not reference_id: + raise RuntimeError("failed to create asset reference") + + with create_session() as session: + pair = fetch_reference_and_asset( + session, reference_id=reference_id, owner_id=owner_id + ) + if not pair: + raise RuntimeError("inconsistent DB state after ingest") + ref, asset = pair + tag_names = get_reference_tags(session, reference_id=ref.id) + + return UploadResult( + ref=extract_reference_data(ref), + asset=extract_asset_data(asset), + tags=tag_names, + created_new=ingest_result.asset_created, + ) + + def create_from_hash( hash_str: str, name: str, diff --git a/server.py b/server.py index 5aeffa5ec..75b69d31b 100644 --- a/server.py +++ b/server.py @@ -35,6 +35,7 @@ from app.frontend_management import FrontendManager, parse_version from comfy_api.internal import _ComfyNodeInternal from app.assets.seeder import asset_seeder from app.assets.api.routes import register_assets_routes +from app.assets.services.ingest import register_file_in_place from app.user_manager import UserManager from app.model_manager import ModelFileManager @@ -163,7 +164,11 @@ def create_origin_only_middleware(): if host_domain_parsed.port is None: origin_domain = parsed.hostname - if loopback and host_domain is not None and origin_domain is not None and len(host_domain) > 0 and len(origin_domain) > 0: + # When both host and origin are loopback, allow port differences + # (e.g. frontend dev server on :5173 proxying to backend on :8188) + if loopback and is_loopback(parsed.hostname): + pass + elif loopback and host_domain is not None and origin_domain is not None and len(host_domain) > 0 and len(origin_domain) > 0: if host_domain != origin_domain: logging.warning("WARNING: request with non matching host and origin {} != {}, returning 403".format(host_domain, origin_domain)) return web.Response(status=403) @@ -419,7 +424,24 @@ class PromptServer(): with open(filepath, "wb") as f: f.write(image.file.read()) - return web.json_response({"name" : filename, "subfolder": subfolder, "type": image_upload_type}) + resp = {"name" : filename, "subfolder": subfolder, "type": image_upload_type} + + if args.enable_assets: + try: + tag = image_upload_type if image_upload_type in ("input", "output") else "input" + result = register_file_in_place(abs_path=filepath, name=filename, tags=[tag]) + resp["asset"] = { + "id": result.ref.id, + "name": result.ref.name, + "asset_hash": result.asset.hash if result.asset else None, + "size": result.asset.size_bytes if result.asset else None, + "mime_type": result.asset.mime_type if result.asset else None, + "tags": result.tags, + } + except Exception: + logging.warning("Failed to register uploaded image as asset", exc_info=True) + + return web.json_response(resp) else: return web.Response(status=400) From a6079b40b4ed562e71c59db2599be031bec188d3 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Wed, 11 Mar 2026 21:38:54 -0700 Subject: [PATCH 07/34] Exclude None fields from asset API JSON responses Add exclude_none=True to model_dump() calls across asset routes to keep response payloads clean by omitting unset optional fields. Amp-Thread-ID: https://ampcode.com/threads/T-019ce023-3384-7560-bacf-de40b0de0dd2 Co-authored-by: Amp --- app/assets/api/routes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index d9a8babd3..c6683d88a 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -255,7 +255,7 @@ async def get_asset_route(request: web.Request) -> web.Response: USER_MANAGER.get_request_user_id(request), ) return _build_error_response(500, "INTERNAL", "Unexpected server error.") - return web.json_response(payload.model_dump(mode="json"), status=200) + return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200) @ROUTES.get(f"/api/assets/{{id:{UUID_RE}}}/content") @@ -526,7 +526,7 @@ async def update_asset_route(request: web.Request) -> web.Response: USER_MANAGER.get_request_user_id(request), ) return _build_error_response(500, "INTERNAL", "Unexpected server error.") - return web.json_response(payload.model_dump(mode="json"), status=200) + return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200) @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}") @@ -595,7 +595,7 @@ async def get_tags(request: web.Request) -> web.Response: payload = schemas_out.TagsList( tags=tags, total=total, has_more=(query.offset + len(tags)) < total ) - return web.json_response(payload.model_dump(mode="json")) + return web.json_response(payload.model_dump(mode="json", exclude_none=True)) @ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags") @@ -643,7 +643,7 @@ async def add_asset_tags(request: web.Request) -> web.Response: ) return _build_error_response(500, "INTERNAL", "Unexpected server error.") - return web.json_response(payload.model_dump(mode="json"), status=200) + return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200) @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags") @@ -690,7 +690,7 @@ async def delete_asset_tags(request: web.Request) -> web.Response: ) return _build_error_response(500, "INTERNAL", "Unexpected server error.") - return web.json_response(payload.model_dump(mode="json"), status=200) + return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200) @ROUTES.get("/api/assets/tags/refine") @@ -712,7 +712,7 @@ async def get_tags_refine(request: web.Request) -> web.Response: limit=q.limit, ) payload = schemas_out.TagHistogram(tag_counts=tag_counts) - return web.json_response(payload.model_dump(mode="json"), status=200) + return web.json_response(payload.model_dump(mode="json", exclude_none=True), status=200) @ROUTES.post("/api/assets/seed") From 8270a0954f8a7f1fbec9ec1f59992f0d1eb431c8 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Wed, 11 Mar 2026 21:47:56 -0700 Subject: [PATCH 08/34] Add comment explaining why /view resolves blake3 hashes Amp-Thread-ID: https://ampcode.com/threads/T-019ce023-3384-7560-bacf-de40b0de0dd2 Co-authored-by: Amp --- server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 75b69d31b..eaeacfa45 100644 --- a/server.py +++ b/server.py @@ -502,7 +502,10 @@ class PromptServer(): if "filename" in request.rel_url.query: filename = request.rel_url.query["filename"] - # If the filename is a blake3 hash, resolve it via the asset database + # The frontend's LoadImage combo widget uses asset_hash values + # (e.g. "blake3:...") as widget values. When litegraph renders the + # node preview, it constructs /view?filename=, so this + # endpoint must resolve blake3 hashes to their on-disk file paths. if filename.startswith("blake3:"): file, filename = self._resolve_blake3_to_path(filename) if file is None: From c776199f5fc29d251ed56b242d8ca08f66cc3678 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Wed, 11 Mar 2026 22:00:12 -0700 Subject: [PATCH 09/34] Move blake3 hash resolution to asset_management service Extract resolve_hash_to_path() into asset_management.py and remove _resolve_blake3_to_path from server.py. Also revert loopback origin check to original logic. Amp-Thread-ID: https://ampcode.com/threads/T-019ce023-3384-7560-bacf-de40b0de0dd2 Co-authored-by: Amp --- app/assets/services/asset_management.py | 33 ++++++++++++++++++++++ server.py | 37 ++++--------------------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index b85e77edb..bcdbf8689 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -280,6 +280,39 @@ def list_assets_page( return ListAssetsResult(items=items, total=total) +def resolve_hash_to_path( + asset_hash: str, +) -> DownloadResolutionResult | None: + """Resolve a blake3 hash to an on-disk file path. + + Returns a DownloadResolutionResult with abs_path, content_type, and + download_name, or None if no asset or live path is found. + """ + with create_session() as session: + asset = queries_get_asset_by_hash(session, asset_hash) + if not asset: + return None + refs = list_references_by_asset_id(session, asset_id=asset.id) + abs_path = select_best_live_path(refs) + if not abs_path: + return None + display_name = os.path.basename(abs_path) + for ref in refs: + if ref.file_path == abs_path and ref.name: + display_name = ref.name + break + ctype = ( + asset.mime_type + or mimetypes.guess_type(display_name)[0] + or "application/octet-stream" + ) + return DownloadResolutionResult( + abs_path=abs_path, + content_type=ctype, + download_name=display_name, + ) + + def resolve_asset_for_download( reference_id: str, owner_id: str = "", diff --git a/server.py b/server.py index eaeacfa45..7cc3574fe 100644 --- a/server.py +++ b/server.py @@ -36,6 +36,7 @@ from comfy_api.internal import _ComfyNodeInternal from app.assets.seeder import asset_seeder from app.assets.api.routes import register_assets_routes from app.assets.services.ingest import register_file_in_place +from app.assets.services.asset_management import resolve_hash_to_path from app.user_manager import UserManager from app.model_manager import ModelFileManager @@ -164,11 +165,7 @@ def create_origin_only_middleware(): if host_domain_parsed.port is None: origin_domain = parsed.hostname - # When both host and origin are loopback, allow port differences - # (e.g. frontend dev server on :5173 proxying to backend on :8188) - if loopback and is_loopback(parsed.hostname): - pass - elif loopback and host_domain is not None and origin_domain is not None and len(host_domain) > 0 and len(origin_domain) > 0: + if loopback and host_domain is not None and origin_domain is not None and len(host_domain) > 0 and len(origin_domain) > 0: if host_domain != origin_domain: logging.warning("WARNING: request with non matching host and origin {} != {}, returning 403".format(host_domain, origin_domain)) return web.Response(status=403) @@ -507,9 +504,10 @@ class PromptServer(): # node preview, it constructs /view?filename=, so this # endpoint must resolve blake3 hashes to their on-disk file paths. if filename.startswith("blake3:"): - file, filename = self._resolve_blake3_to_path(filename) - if file is None: + result = resolve_hash_to_path(filename) + if result is None: return web.Response(status=404) + file, filename = result.abs_path, result.download_name else: filename, output_dir = folder_paths.annotated_filepath(filename) @@ -1027,31 +1025,6 @@ class PromptServer(): timeout = aiohttp.ClientTimeout(total=None) # no timeout self.client_session = aiohttp.ClientSession(timeout=timeout) - def _resolve_blake3_to_path(self, asset_hash: str) -> tuple[str | None, str]: - """Resolve a blake3 hash to an absolute file path via the asset database. - - Returns (abs_path, display_filename) or (None, "") if not found. - """ - from app.database.db import create_session - from app.assets.database.queries import get_asset_by_hash, list_references_by_asset_id - from app.assets.helpers import select_best_live_path - - with create_session() as session: - asset = get_asset_by_hash(session, asset_hash) - if not asset: - return None, "" - refs = list_references_by_asset_id(session, asset_id=asset.id) - abs_path = select_best_live_path(refs) - if not abs_path: - return None, "" - display_name = os.path.basename(abs_path) - # Prefer the reference name if available - for ref in refs: - if ref.file_path == abs_path and ref.name: - display_name = ref.name - break - return abs_path, display_name - def add_routes(self): self.user_manager.add_routes(self.routes) self.model_file_manager.add_routes(self.routes) From 2e4aa301349ec6f290556b694dd61150468902b1 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 12:59:22 -0700 Subject: [PATCH 10/34] Require at least one tag in UploadAssetSpec Enforce non-empty tags at the Pydantic validation layer so uploads with no tags are rejected with a 400 before reaching ingest. Adds test_upload_empty_tags_rejected to cover this case. Amp-Thread-ID: https://ampcode.com/threads/T-019ce377-8bde-7048-bc28-a9df063409f9 Co-authored-by: Amp --- app/assets/api/schemas_in.py | 19 ++++++++++--------- tests-unit/assets_test/test_uploads.py | 9 +++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 7593e617a..9ef406756 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -333,13 +333,14 @@ class UploadAssetSpec(BaseModel): @model_validator(mode="after") def _validate_order(self): - if self.tags: - root = self.tags[0] - if root not in {"models", "input", "output"}: - raise ValueError("first tag must be one of: models, input, output") - if root == "models": - if len(self.tags) < 2: - raise ValueError( - "models uploads require a category tag as the second tag" - ) + if not self.tags: + raise ValueError("at least one tag is required for uploads") + root = self.tags[0] + if root not in {"models", "input", "output"}: + raise ValueError("first tag must be one of: models, input, output") + if root == "models": + if len(self.tags) < 2: + raise ValueError( + "models uploads require a category tag as the second tag" + ) return self diff --git a/tests-unit/assets_test/test_uploads.py b/tests-unit/assets_test/test_uploads.py index d68e5b5d7..0f2b124a3 100644 --- a/tests-unit/assets_test/test_uploads.py +++ b/tests-unit/assets_test/test_uploads.py @@ -243,6 +243,15 @@ def test_upload_tags_traversal_guard(http: requests.Session, api_base: str): assert body["error"]["code"] in ("BAD_REQUEST", "INVALID_BODY") +def test_upload_empty_tags_rejected(http: requests.Session, api_base: str): + files = {"file": ("notags.bin", b"A" * 64, "application/octet-stream")} + form = {"tags": json.dumps([]), "name": "notags.bin", "user_metadata": json.dumps({})} + r = http.post(api_base + "/api/assets", data=form, files=files, timeout=120) + body = r.json() + assert r.status_code == 400 + assert body["error"]["code"] == "INVALID_BODY" + + @pytest.mark.parametrize("root", ["input", "output"]) def test_duplicate_upload_same_display_name_does_not_clobber( root: str, From b27f1a1e17686af919dbd1b13bbb80c20d5b51d9 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 13:03:47 -0700 Subject: [PATCH 11/34] Add owner_id check to resolve_hash_to_path Filter asset references by owner visibility so the /view endpoint only resolves hashes for assets the requesting user can access. Adds table-driven tests for owner visibility cases. Amp-Thread-ID: https://ampcode.com/threads/T-019ce377-8bde-7048-bc28-a9df063409f9 Co-authored-by: Amp --- app/assets/services/asset_management.py | 12 +++++- server.py | 3 +- .../services/test_asset_management.py | 40 +++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index bcdbf8689..a82a04c40 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -282,9 +282,13 @@ def list_assets_page( def resolve_hash_to_path( asset_hash: str, + owner_id: str = "", ) -> DownloadResolutionResult | None: """Resolve a blake3 hash to an on-disk file path. + Only references visible to *owner_id* are considered (owner-less + references are always visible). + Returns a DownloadResolutionResult with abs_path, content_type, and download_name, or None if no asset or live path is found. """ @@ -293,11 +297,15 @@ def resolve_hash_to_path( if not asset: return None refs = list_references_by_asset_id(session, asset_id=asset.id) - abs_path = select_best_live_path(refs) + visible = [ + r for r in refs + if r.owner_id == "" or r.owner_id == owner_id + ] + abs_path = select_best_live_path(visible) if not abs_path: return None display_name = os.path.basename(abs_path) - for ref in refs: + for ref in visible: if ref.file_path == abs_path and ref.name: display_name = ref.name break diff --git a/server.py b/server.py index 7cc3574fe..3f1d1b6a7 100644 --- a/server.py +++ b/server.py @@ -504,7 +504,8 @@ class PromptServer(): # node preview, it constructs /view?filename=, so this # endpoint must resolve blake3 hashes to their on-disk file paths. if filename.startswith("blake3:"): - result = resolve_hash_to_path(filename) + owner_id = self.user_manager.get_request_user_id(request) + result = resolve_hash_to_path(filename, owner_id=owner_id) if result is None: return web.Response(status=404) file, filename = result.abs_path, result.download_name diff --git a/tests-unit/assets_test/services/test_asset_management.py b/tests-unit/assets_test/services/test_asset_management.py index 101ef7292..2413b39db 100644 --- a/tests-unit/assets_test/services/test_asset_management.py +++ b/tests-unit/assets_test/services/test_asset_management.py @@ -11,6 +11,7 @@ from app.assets.services import ( delete_asset_reference, set_asset_preview, ) +from app.assets.services.asset_management import resolve_hash_to_path def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 1024) -> Asset: @@ -266,3 +267,42 @@ class TestSetAssetPreview: preview_asset_id=None, owner_id="user2", ) + + +class TestResolveHashToPath: + def test_returns_none_for_unknown_hash(self, mock_create_session): + result = resolve_hash_to_path("blake3:" + "a" * 64) + assert result is None + + @pytest.mark.parametrize( + "ref_owner, query_owner, expect_found", + [ + ("user1", "user1", True), + ("user1", "user2", False), + ("", "anyone", True), + ("", "", True), + ], + ids=[ + "owner_sees_own_ref", + "other_owner_blocked", + "ownerless_visible_to_anyone", + "ownerless_visible_to_empty", + ], + ) + def test_owner_visibility( + self, ref_owner, query_owner, expect_found, + mock_create_session, session: Session, temp_dir, + ): + f = temp_dir / "file.bin" + f.write_bytes(b"data") + asset = _make_asset(session, hash_val="blake3:" + "b" * 64) + ref = _make_reference(session, asset, name="file.bin", owner_id=ref_owner) + ref.file_path = str(f) + session.commit() + + result = resolve_hash_to_path(asset.hash, owner_id=query_owner) + if expect_found: + assert result is not None + assert result.abs_path == str(f) + else: + assert result is None From 8ccd89d695e51fc89ca2ab90557088e92395e961 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 13:06:12 -0700 Subject: [PATCH 12/34] Make ReferenceData.created_at and updated_at required Remove None defaults and type: ignore comments. Move fields before optional fields to satisfy dataclass ordering. Amp-Thread-ID: https://ampcode.com/threads/T-019ce377-8bde-7048-bc28-a9df063409f9 Co-authored-by: Amp --- app/assets/services/schemas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/assets/services/schemas.py b/app/assets/services/schemas.py index d63c1f60d..b744283b2 100644 --- a/app/assets/services/schemas.py +++ b/app/assets/services/schemas.py @@ -23,10 +23,10 @@ class ReferenceData: file_path: str | None user_metadata: UserMetadata preview_id: str | None + created_at: datetime + updated_at: datetime system_metadata: dict[str, Any] | None = None prompt_id: str | None = None - created_at: datetime = None # type: ignore[assignment] - updated_at: datetime = None # type: ignore[assignment] last_access_time: datetime | None = None From 808c17abd0256a38923db0ddaf380e6473224057 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 13:21:58 -0700 Subject: [PATCH 13/34] Fix double commit in create_from_hash Move mime_type update into _register_existing_asset so it shares a single transaction with reference creation. Log a warning when the hash is not found instead of silently returning None. Amp-Thread-ID: https://ampcode.com/threads/T-019ce377-8bde-7048-bc28-a9df063409f9 Co-authored-by: Amp --- app/assets/services/ingest.py | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 64a1ef68b..3d6640223 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -136,6 +136,7 @@ def _register_existing_asset( tags: list[str] | None = None, tag_origin: str = "manual", owner_id: str = "", + mime_type: str | None = None, ) -> RegisterAssetResult: user_metadata = user_metadata or {} @@ -144,6 +145,9 @@ def _register_existing_asset( if not asset: raise ValueError(f"No asset with hash {asset_hash}") + if mime_type and asset.mime_type != mime_type: + update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type) + ref, ref_created = get_or_create_reference( session, asset_id=asset.id, @@ -411,25 +415,21 @@ def create_from_hash( ) -> UploadResult | None: canonical = hash_str.strip().lower() - with create_session() as session: - asset = get_asset_by_hash(session, asset_hash=canonical) - if not asset: - return None - - if mime_type and asset.mime_type != mime_type: - update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type) - session.commit() - - result = _register_existing_asset( - asset_hash=canonical, - name=_sanitize_filename( - name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical - ), - user_metadata=user_metadata or {}, - tags=tags or [], - tag_origin="manual", - owner_id=owner_id, - ) + try: + result = _register_existing_asset( + asset_hash=canonical, + name=_sanitize_filename( + name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical + ), + user_metadata=user_metadata or {}, + tags=tags or [], + tag_origin="manual", + owner_id=owner_id, + mime_type=mime_type, + ) + except ValueError: + logging.warning("create_from_hash: no asset found for hash %s", canonical) + return None return UploadResult( ref=result.ref, From e8f67bfdbb065b8982794d90d141098586740cd7 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 13:22:04 -0700 Subject: [PATCH 14/34] Add exclude_none=True to create/upload responses Align with get/update/list endpoints for consistent JSON output. Amp-Thread-ID: https://ampcode.com/threads/T-019ce377-8bde-7048-bc28-a9df063409f9 Co-authored-by: Amp --- app/assets/api/routes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index c6683d88a..1ec511021 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -360,7 +360,7 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response: **asset.model_dump(), created_new=result.created_new, ) - return web.json_response(payload_out.model_dump(mode="json"), status=201) + return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=201) @ROUTES.post("/api/assets") @@ -425,7 +425,7 @@ async def upload_asset(request: web.Request) -> web.Response: **asset.model_dump(), created_new=False, ) - return web.json_response(payload_out.model_dump(mode="json"), status=200) + return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=200) # Fast path: hash exists, create AssetReference without writing anything if spec.hash and parsed.provided_hash_exists is True: @@ -487,7 +487,7 @@ async def upload_asset(request: web.Request) -> web.Response: created_new=result.created_new, ) status = 201 if result.created_new else 200 - return web.json_response(payload_out.model_dump(mode="json"), status=status) + return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=status) @ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}") From 2b450b7a92a3e935b9235c1be82a812024af6ffd Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 14:33:29 -0700 Subject: [PATCH 15/34] Change preview_id to reference asset by reference ID, not content ID Clients receive preview_id in API responses but could not dereference it through public routes (which use reference IDs). Now preview_id is a self-referential FK to asset_references.id so the value is directly usable in the public API. Co-Authored-By: Claude Opus 4.6 --- .../versions/0003_add_metadata_prompt.py | 29 +++++++++++++++++++ app/assets/database/models.py | 16 ++++------ app/assets/database/queries/__init__.py | 2 ++ .../database/queries/asset_reference.py | 25 ++++++++++++---- app/assets/services/asset_management.py | 6 ++-- app/assets/services/ingest.py | 4 +-- .../assets_test/queries/test_asset_info.py | 21 ++++++++------ .../services/test_asset_management.py | 14 +++++---- .../assets_test/services/test_ingest.py | 12 ++++++-- 9 files changed, 91 insertions(+), 38 deletions(-) diff --git a/alembic_db/versions/0003_add_metadata_prompt.py b/alembic_db/versions/0003_add_metadata_prompt.py index 484d92923..c55a30065 100644 --- a/alembic_db/versions/0003_add_metadata_prompt.py +++ b/alembic_db/versions/0003_add_metadata_prompt.py @@ -1,5 +1,6 @@ """ Add system_metadata and prompt_id columns to asset_references. +Change preview_id FK from assets.id to asset_references.id. Revision ID: 0003_add_metadata_prompt Revises: 0002_merge_to_asset_references @@ -24,8 +25,36 @@ def upgrade() -> None: sa.Column("prompt_id", sa.String(length=36), nullable=True) ) + # Change preview_id FK from assets.id to asset_references.id (self-ref). + # Existing values are asset-content IDs that won't match reference IDs, + # so null them out first. + op.execute("UPDATE asset_references SET preview_id = NULL WHERE preview_id IS NOT NULL") + with op.batch_alter_table("asset_references") as batch_op: + batch_op.drop_constraint( + "fk_asset_references_preview_id_assets", type_="foreignkey" + ) + batch_op.create_foreign_key( + "fk_asset_references_preview_id_asset_references", + "asset_references", + ["preview_id"], + ["id"], + ondelete="SET NULL", + ) + def downgrade() -> None: + with op.batch_alter_table("asset_references") as batch_op: + batch_op.drop_constraint( + "fk_asset_references_preview_id_asset_references", type_="foreignkey" + ) + batch_op.create_foreign_key( + "fk_asset_references_preview_id_assets", + "assets", + ["preview_id"], + ["id"], + ondelete="SET NULL", + ) + with op.batch_alter_table("asset_references") as batch_op: batch_op.drop_column("prompt_id") batch_op.drop_column("system_metadata") diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 22340ebd5..5534e89d5 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -45,13 +45,7 @@ class Asset(Base): passive_deletes=True, ) - preview_of: Mapped[list[AssetReference]] = relationship( - "AssetReference", - back_populates="preview_asset", - primaryjoin=lambda: Asset.id == foreign(AssetReference.preview_id), - foreign_keys=lambda: [AssetReference.preview_id], - viewonly=True, - ) + # preview_id on AssetReference is a self-referential FK to asset_references.id __table_args__ = ( Index("uq_assets_hash", "hash", unique=True), @@ -91,7 +85,7 @@ class AssetReference(Base): owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="") name: Mapped[str] = mapped_column(String(512), nullable=False) preview_id: Mapped[str | None] = mapped_column( - String(36), ForeignKey("assets.id", ondelete="SET NULL") + String(36), ForeignKey("asset_references.id", ondelete="SET NULL") ) user_metadata: Mapped[dict[str, Any] | None] = mapped_column( JSON(none_as_null=True) @@ -119,10 +113,10 @@ class AssetReference(Base): foreign_keys=[asset_id], lazy="selectin", ) - preview_asset: Mapped[Asset | None] = relationship( - "Asset", - back_populates="preview_of", + preview_ref: Mapped[AssetReference | None] = relationship( + "AssetReference", foreign_keys=[preview_id], + remote_side=lambda: [AssetReference.id], ) metadata_entries: Mapped[list[AssetReferenceMeta]] = relationship( diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 5283b400e..9b04baf17 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -34,6 +34,7 @@ from app.assets.database.queries.asset_reference import ( list_references_by_asset_id, list_references_page, mark_references_missing_outside_prefixes, + reference_exists, reference_exists_for_asset_id, restore_references_by_paths, set_reference_metadata, @@ -104,6 +105,7 @@ __all__ = [ "list_tags_with_usage", "mark_references_missing_outside_prefixes", "reassign_asset_references", + "reference_exists", "reference_exists_for_asset_id", "remove_missing_tag_for_asset_id", "remove_tags_from_reference", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 4c9965e3b..c63d39fd6 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -137,6 +137,21 @@ def reference_exists_for_asset_id( return session.execute(q).first() is not None +def reference_exists( + session: Session, + reference_id: str, +) -> bool: + """Return True if a reference with the given ID exists (not soft-deleted).""" + q = ( + select(sa.literal(True)) + .select_from(AssetReference) + .where(AssetReference.id == reference_id) + .where(AssetReference.deleted_at.is_(None)) + .limit(1) + ) + return session.execute(q).first() is not None + + def insert_reference( session: Session, asset_id: str, @@ -496,19 +511,19 @@ def soft_delete_reference_by_id( def set_reference_preview( session: Session, reference_id: str, - preview_asset_id: str | None = None, + preview_reference_id: str | None = None, ) -> None: """Set or clear preview_id and bump updated_at. Raises on unknown IDs.""" ref = session.get(AssetReference, reference_id) if not ref: raise ValueError(f"AssetReference {reference_id} not found") - if preview_asset_id is None: + if preview_reference_id is None: ref.preview_id = None else: - if not session.get(Asset, preview_asset_id): - raise ValueError(f"Preview Asset {preview_asset_id} not found") - ref.preview_id = preview_asset_id + if not session.get(AssetReference, preview_reference_id): + raise ValueError(f"Preview AssetReference {preview_reference_id} not found") + ref.preview_id = preview_reference_id ref.updated_at = get_utc_now() session.flush() diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index a82a04c40..b02490871 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -116,7 +116,7 @@ def update_asset_metadata( set_reference_preview( session, reference_id=reference_id, - preview_asset_id=preview_id, + preview_reference_id=preview_id, ) touched = True @@ -202,7 +202,7 @@ def delete_asset_reference( def set_asset_preview( reference_id: str, - preview_asset_id: str | None = None, + preview_reference_id: str | None = None, owner_id: str = "", ) -> AssetDetailResult: with create_session() as session: @@ -211,7 +211,7 @@ def set_asset_preview( set_reference_preview( session, reference_id=reference_id, - preview_asset_id=preview_asset_id, + preview_reference_id=preview_reference_id, ) result = fetch_reference_asset_and_tags( diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 3d6640223..5be09f8e3 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -11,10 +11,10 @@ from app.assets.database.queries import ( add_tags_to_reference, fetch_reference_and_asset, get_asset_by_hash, - get_existing_asset_ids, get_reference_by_file_path, get_reference_tags, get_or_create_reference, + reference_exists, remove_missing_tag_for_asset_id, set_reference_metadata, set_reference_tags, @@ -66,7 +66,7 @@ def _ingest_file_from_path( with create_session() as session: if preview_id: - if preview_id not in get_existing_asset_ids(session, [preview_id]): + if not reference_exists(session, preview_id): preview_id = None asset, asset_created, asset_updated = upsert_asset( diff --git a/tests-unit/assets_test/queries/test_asset_info.py b/tests-unit/assets_test/queries/test_asset_info.py index 8f6c7fcdb..fe510e342 100644 --- a/tests-unit/assets_test/queries/test_asset_info.py +++ b/tests-unit/assets_test/queries/test_asset_info.py @@ -242,22 +242,24 @@ class TestSetReferencePreview: asset = _make_asset(session, "hash1") preview_asset = _make_asset(session, "preview_hash") ref = _make_reference(session, asset) + preview_ref = _make_reference(session, preview_asset, name="preview.png") session.commit() - set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id) + set_reference_preview(session, reference_id=ref.id, preview_reference_id=preview_ref.id) session.commit() session.refresh(ref) - assert ref.preview_id == preview_asset.id + assert ref.preview_id == preview_ref.id def test_clears_preview(self, session: Session): asset = _make_asset(session, "hash1") preview_asset = _make_asset(session, "preview_hash") ref = _make_reference(session, asset) - ref.preview_id = preview_asset.id + preview_ref = _make_reference(session, preview_asset, name="preview.png") + ref.preview_id = preview_ref.id session.commit() - set_reference_preview(session, reference_id=ref.id, preview_asset_id=None) + set_reference_preview(session, reference_id=ref.id, preview_reference_id=None) session.commit() session.refresh(ref) @@ -265,15 +267,15 @@ class TestSetReferencePreview: def test_raises_for_nonexistent_reference(self, session: Session): with pytest.raises(ValueError, match="not found"): - set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None) + set_reference_preview(session, reference_id="nonexistent", preview_reference_id=None) def test_raises_for_nonexistent_preview(self, session: Session): asset = _make_asset(session, "hash1") ref = _make_reference(session, asset) session.commit() - with pytest.raises(ValueError, match="Preview Asset"): - set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent") + with pytest.raises(ValueError, match="Preview AssetReference"): + set_reference_preview(session, reference_id=ref.id, preview_reference_id="nonexistent") class TestInsertReference: @@ -351,13 +353,14 @@ class TestUpdateReferenceTimestamps: asset = _make_asset(session, "hash1") preview_asset = _make_asset(session, "preview_hash") ref = _make_reference(session, asset) + preview_ref = _make_reference(session, preview_asset, name="preview.png") session.commit() - update_reference_timestamps(session, ref, preview_id=preview_asset.id) + update_reference_timestamps(session, ref, preview_id=preview_ref.id) session.commit() session.refresh(ref) - assert ref.preview_id == preview_asset.id + assert ref.preview_id == preview_ref.id class TestSetReferenceMetadata: diff --git a/tests-unit/assets_test/services/test_asset_management.py b/tests-unit/assets_test/services/test_asset_management.py index 2413b39db..e8ff989e9 100644 --- a/tests-unit/assets_test/services/test_asset_management.py +++ b/tests-unit/assets_test/services/test_asset_management.py @@ -220,31 +220,33 @@ class TestSetAssetPreview: asset = _make_asset(session, hash_val="blake3:main") preview_asset = _make_asset(session, hash_val="blake3:preview") ref = _make_reference(session, asset) + preview_ref = _make_reference(session, preview_asset, name="preview.png") ref_id = ref.id - preview_id = preview_asset.id + preview_ref_id = preview_ref.id session.commit() set_asset_preview( reference_id=ref_id, - preview_asset_id=preview_id, + preview_reference_id=preview_ref_id, ) # Verify by re-fetching from DB session.expire_all() updated_ref = session.get(AssetReference, ref_id) - assert updated_ref.preview_id == preview_id + assert updated_ref.preview_id == preview_ref_id def test_clears_preview(self, mock_create_session, session: Session): asset = _make_asset(session) preview_asset = _make_asset(session, hash_val="blake3:preview") ref = _make_reference(session, asset) - ref.preview_id = preview_asset.id + preview_ref = _make_reference(session, preview_asset, name="preview.png") + ref.preview_id = preview_ref.id ref_id = ref.id session.commit() set_asset_preview( reference_id=ref_id, - preview_asset_id=None, + preview_reference_id=None, ) # Verify by re-fetching from DB @@ -264,7 +266,7 @@ class TestSetAssetPreview: with pytest.raises(PermissionError, match="not owner"): set_asset_preview( reference_id=ref.id, - preview_asset_id=None, + preview_reference_id=None, owner_id="user2", ) diff --git a/tests-unit/assets_test/services/test_ingest.py b/tests-unit/assets_test/services/test_ingest.py index 367bc7721..dbb8441c2 100644 --- a/tests-unit/assets_test/services/test_ingest.py +++ b/tests-unit/assets_test/services/test_ingest.py @@ -113,11 +113,19 @@ class TestIngestFileFromPath: file_path = temp_dir / "with_preview.bin" file_path.write_bytes(b"data") - # Create a preview asset first + # Create a preview asset and reference preview_asset = Asset(hash="blake3:preview", size_bytes=100) session.add(preview_asset) + session.flush() + from app.assets.helpers import get_utc_now + now = get_utc_now() + preview_ref = AssetReference( + asset_id=preview_asset.id, name="preview.png", owner_id="", + created_at=now, updated_at=now, last_access_time=now, + ) + session.add(preview_ref) session.commit() - preview_id = preview_asset.id + preview_id = preview_ref.id result = _ingest_file_from_path( abs_path=str(file_path), From 85cc3cad5731ffd3dd9b2d38d495cda8c9ee5021 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 14:43:06 -0700 Subject: [PATCH 16/34] Filter soft-deleted and missing refs from visibility queries list_references_by_asset_id and list_tags_with_usage were not filtering out deleted_at/is_missing refs, allowing /view?filename=blake3:... to serve files through hidden references and inflating tag usage counts. Add list_all_file_paths_by_asset_id for orphan cleanup which intentionally needs unfiltered access. Co-Authored-By: Claude Opus 4.6 --- app/assets/database/queries/__init__.py | 2 ++ .../database/queries/asset_reference.py | 21 +++++++++++++++++++ app/assets/database/queries/tags.py | 2 ++ app/assets/services/asset_management.py | 9 ++++---- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 9b04baf17..17ea8f27a 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -31,6 +31,7 @@ from app.assets.database.queries.asset_reference import ( get_unenriched_references, get_unreferenced_unhashed_asset_ids, insert_reference, + list_all_file_paths_by_asset_id, list_references_by_asset_id, list_references_page, mark_references_missing_outside_prefixes, @@ -99,6 +100,7 @@ __all__ = [ "get_unenriched_references", "get_unreferenced_unhashed_asset_ids", "insert_reference", + "list_all_file_paths_by_asset_id", "list_references_by_asset_id", "list_references_page", "list_tag_counts_for_filtered_assets", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index c63d39fd6..24e7743a2 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -549,6 +549,8 @@ def list_references_by_asset_id( session.execute( select(AssetReference) .where(AssetReference.asset_id == asset_id) + .where(AssetReference.is_missing == False) # noqa: E712 + .where(AssetReference.deleted_at.is_(None)) .order_by(AssetReference.id.asc()) ) .scalars() @@ -556,6 +558,25 @@ def list_references_by_asset_id( ) +def list_all_file_paths_by_asset_id( + session: Session, + asset_id: str, +) -> list[str]: + """Return every file_path for an asset, including soft-deleted/missing refs. + + Used for orphan cleanup where all on-disk files must be removed. + """ + return list( + session.execute( + select(AssetReference.file_path) + .where(AssetReference.asset_id == asset_id) + .where(AssetReference.file_path.isnot(None)) + ) + .scalars() + .all() + ) + + def upsert_reference( session: Session, asset_id: str, diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index 05acbdbd9..fbca80743 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -275,6 +275,7 @@ def list_tags_with_usage( .select_from(AssetReferenceTag) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id) .where(build_visible_owner_clause(owner_id)) + .where(AssetReference.is_missing == False) # noqa: E712 .where(AssetReference.deleted_at.is_(None)) .group_by(AssetReferenceTag.tag_name) .subquery() @@ -311,6 +312,7 @@ def list_tags_with_usage( select(AssetReferenceTag.tag_name) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id) .where(build_visible_owner_clause(owner_id)) + .where(AssetReference.is_missing == False) # noqa: E712 .where(AssetReference.deleted_at.is_(None)) .group_by(AssetReferenceTag.tag_name) ) diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index b02490871..a51f0f48f 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -16,6 +16,7 @@ from app.assets.database.queries import ( get_reference_by_id, get_reference_with_owner_check, list_references_page, + list_all_file_paths_by_asset_id, list_references_by_asset_id, set_reference_metadata, set_reference_preview, @@ -176,11 +177,9 @@ def delete_asset_reference( session.commit() return True - # Orphaned asset - delete it and its files - refs = list_references_by_asset_id(session, asset_id=asset_id) - file_paths = [ - r.file_path for r in (refs or []) if getattr(r, "file_path", None) - ] + # Orphaned asset - gather ALL file paths (including + # soft-deleted / missing refs) so their on-disk files get cleaned up. + file_paths = list_all_file_paths_by_asset_id(session, asset_id=asset_id) # Also include the just-deleted file path if file_path: file_paths.append(file_path) From a832837af80f7fd779caeb1c4f1e29f87be3624e Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 14:52:15 -0700 Subject: [PATCH 17/34] Pass preview_id and mime_type through all asset creation fast paths The duplicate-content upload path and hash-based creation paths were silently dropping preview_id and mime_type. This wires both fields through _register_existing_asset, create_from_hash, and all route call sites so behavior is consistent regardless of whether the asset content already exists. Co-Authored-By: Claude Opus 4.6 --- app/assets/api/routes.py | 2 ++ app/assets/api/schemas_in.py | 1 + app/assets/services/ingest.py | 13 +++++++++++++ 3 files changed, 16 insertions(+) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 1ec511021..c1c319893 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -349,6 +349,7 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response: user_metadata=body.user_metadata, owner_id=USER_MANAGER.get_request_user_id(request), mime_type=body.mime_type, + preview_id=body.preview_id, ) if result is None: return _build_error_response( @@ -436,6 +437,7 @@ async def upload_asset(request: web.Request) -> web.Response: user_metadata=spec.user_metadata or {}, owner_id=owner_id, mime_type=spec.mime_type, + preview_id=spec.preview_id, ) if result is None: delete_temp_file_if_exists(parsed.tmp_path) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 9ef406756..a4ca72a37 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -124,6 +124,7 @@ class CreateFromHashBody(BaseModel): tags: list[str] = Field(default_factory=list) user_metadata: dict[str, Any] = Field(default_factory=dict) mime_type: str | None = None + preview_id: str | None = None @field_validator("hash") @classmethod diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 5be09f8e3..3edb97470 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -137,6 +137,7 @@ def _register_existing_asset( tag_origin: str = "manual", owner_id: str = "", mime_type: str | None = None, + preview_id: str | None = None, ) -> RegisterAssetResult: user_metadata = user_metadata or {} @@ -148,14 +149,22 @@ def _register_existing_asset( if mime_type and asset.mime_type != mime_type: update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type) + if preview_id: + if not reference_exists(session, preview_id): + preview_id = None + ref, ref_created = get_or_create_reference( session, asset_id=asset.id, owner_id=owner_id, name=name, + preview_id=preview_id, ) if not ref_created: + if preview_id and ref.preview_id != preview_id: + ref.preview_id = preview_id + tag_names = get_reference_tags(session, reference_id=ref.id) result = RegisterAssetResult( ref=extract_reference_data(ref), @@ -278,6 +287,8 @@ def upload_from_temp_path( tags=tags or [], tag_origin="manual", owner_id=owner_id, + mime_type=mime_type, + preview_id=preview_id, ) return UploadResult( ref=result.ref, @@ -412,6 +423,7 @@ def create_from_hash( user_metadata: dict | None = None, owner_id: str = "", mime_type: str | None = None, + preview_id: str | None = None, ) -> UploadResult | None: canonical = hash_str.strip().lower() @@ -426,6 +438,7 @@ def create_from_hash( tag_origin="manual", owner_id=owner_id, mime_type=mime_type, + preview_id=preview_id, ) except ValueError: logging.warning("create_from_hash: no asset found for hash %s", canonical) From d5601fc9a989841b314e970d89badd4d9ed906b0 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 14:57:26 -0700 Subject: [PATCH 18/34] Remove unimplemented client-provided ID from upload API The `id` field on UploadAssetSpec was advertised for idempotent creation but never actually honored when creating new references. Remove it rather than implementing the feature. Co-Authored-By: Claude Opus 4.6 --- app/assets/api/routes.py | 24 ------------------------ app/assets/api/schemas_in.py | 2 -- app/assets/services/ingest.py | 1 - 3 files changed, 27 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index c1c319893..e011f9a83 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -405,29 +405,6 @@ async def upload_asset(request: web.Request) -> web.Response: ) try: - # Idempotent create: if spec.id is provided, check if reference already exists - if spec.id: - existing = get_asset_detail( - reference_id=spec.id, - owner_id=owner_id, - ) - if existing: - # Validate that uploaded content matches existing asset - if spec.hash and existing.asset and existing.asset.hash != spec.hash: - delete_temp_file_if_exists(parsed.tmp_path) - return _build_error_response( - 409, - "HASH_MISMATCH", - "Uploaded file hash does not match existing asset.", - ) - delete_temp_file_if_exists(parsed.tmp_path) - asset = _build_asset_response(existing) - payload_out = schemas_out.AssetCreated( - **asset.model_dump(), - created_new=False, - ) - return web.json_response(payload_out.model_dump(mode="json", exclude_none=True), status=200) - # Fast path: hash exists, create AssetReference without writing anything if spec.hash and parsed.provided_hash_exists is True: result = create_from_hash( @@ -464,7 +441,6 @@ async def upload_asset(request: web.Request) -> web.Response: expected_hash=spec.hash, mime_type=spec.mime_type, preview_id=spec.preview_id, - asset_id=spec.id, ) except AssetValidationError as e: delete_temp_file_if_exists(parsed.tmp_path) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index a4ca72a37..535b31bf0 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -241,7 +241,6 @@ class UploadAssetSpec(BaseModel): - name: display name - user_metadata: arbitrary JSON object (optional) - hash: optional canonical 'blake3:' for validation / fast-path - - id: optional UUID for idempotent creation - mime_type: optional MIME type override - preview_id: optional asset ID for preview @@ -254,7 +253,6 @@ class UploadAssetSpec(BaseModel): name: str | None = Field(default=None, max_length=512, description="Display Name") user_metadata: dict[str, Any] = Field(default_factory=dict) hash: str | None = Field(default=None) - id: str | None = Field(default=None) mime_type: str | None = Field(default=None) preview_id: str | None = Field(default=None) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 3edb97470..9502588de 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -258,7 +258,6 @@ def upload_from_temp_path( expected_hash: str | None = None, mime_type: str | None = None, preview_id: str | None = None, - asset_id: str | None = None, ) -> UploadResult: try: digest, _ = hashing.compute_blake3_hash(temp_path) From 1285231ce2ea4cc436cf000d8e662b069dd2b674 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:05:16 -0700 Subject: [PATCH 19/34] Make asset mime_type immutable after first ingest Prevents cross-tenant metadata mutation when multiple references share the same content-addressed Asset row. mime_type can now only be set when NULL (first ingest); subsequent attempts to change it are silently ignored. Co-Authored-By: Claude Opus 4.6 --- app/assets/database/queries/asset.py | 4 +- app/assets/services/asset_management.py | 5 ++- app/assets/services/ingest.py | 2 +- tests-unit/assets_test/queries/test_asset.py | 43 ++++++++++++++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/app/assets/database/queries/asset.py b/app/assets/database/queries/asset.py index a21f5b68f..594d1f1b2 100644 --- a/app/assets/database/queries/asset.py +++ b/app/assets/database/queries/asset.py @@ -69,7 +69,7 @@ def upsert_asset( if asset.size_bytes != int(size_bytes) and int(size_bytes) > 0: asset.size_bytes = int(size_bytes) changed = True - if mime_type and asset.mime_type != mime_type: + if mime_type and not asset.mime_type: asset.mime_type = mime_type changed = True if changed: @@ -118,7 +118,7 @@ def update_asset_hash_and_mime( return False if asset_hash is not None: asset.hash = asset_hash - if mime_type is not None: + if mime_type is not None and not asset.mime_type: asset.mime_type = mime_type return True diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index a51f0f48f..5aefd9956 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -108,10 +108,11 @@ def update_asset_metadata( touched = True if mime_type is not None: - update_asset_hash_and_mime( + updated = update_asset_hash_and_mime( session, asset_id=ref.asset_id, mime_type=mime_type ) - touched = True + if updated: + touched = True if preview_id is not None: set_reference_preview( diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 9502588de..b1c789c8c 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -146,7 +146,7 @@ def _register_existing_asset( if not asset: raise ValueError(f"No asset with hash {asset_hash}") - if mime_type and asset.mime_type != mime_type: + if mime_type and not asset.mime_type: update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=mime_type) if preview_id: diff --git a/tests-unit/assets_test/queries/test_asset.py b/tests-unit/assets_test/queries/test_asset.py index 08f84cd11..9b7eb4bac 100644 --- a/tests-unit/assets_test/queries/test_asset.py +++ b/tests-unit/assets_test/queries/test_asset.py @@ -10,6 +10,7 @@ from app.assets.database.queries import ( get_asset_by_hash, upsert_asset, bulk_insert_assets, + update_asset_hash_and_mime, ) @@ -142,3 +143,45 @@ class TestBulkInsertAssets: session.commit() assert session.query(Asset).count() == 200 + + +class TestMimeTypeImmutability: + """mime_type on Asset is write-once: set on first ingest, never overwritten.""" + + @pytest.mark.parametrize( + "initial_mime,second_mime,expected_mime", + [ + ("image/png", "image/jpeg", "image/png"), + (None, "image/png", "image/png"), + ], + ids=["preserves_existing", "fills_null"], + ) + def test_upsert_mime_immutability(self, session: Session, initial_mime, second_mime, expected_mime): + h = f"blake3:upsert_{initial_mime}_{second_mime}" + upsert_asset(session, asset_hash=h, size_bytes=100, mime_type=initial_mime) + session.commit() + + asset, created, _ = upsert_asset(session, asset_hash=h, size_bytes=100, mime_type=second_mime) + assert created is False + assert asset.mime_type == expected_mime + + @pytest.mark.parametrize( + "initial_mime,update_mime,update_hash,expected_mime,expected_hash", + [ + (None, "image/png", None, "image/png", "blake3:upd0"), + ("image/png", "image/jpeg", None, "image/png", "blake3:upd1"), + ("image/png", "image/jpeg", "blake3:upd2_new", "image/png", "blake3:upd2_new"), + ], + ids=["fills_null", "preserves_existing", "hash_updates_mime_locked"], + ) + def test_update_asset_hash_and_mime_immutability( + self, session: Session, initial_mime, update_mime, update_hash, expected_mime, expected_hash, + ): + h = expected_hash.removesuffix("_new") + asset = Asset(hash=h, size_bytes=100, mime_type=initial_mime) + session.add(asset) + session.flush() + + update_asset_hash_and_mime(session, asset_id=asset.id, mime_type=update_mime, asset_hash=update_hash) + assert asset.mime_type == expected_mime + assert asset.hash == expected_hash From c8c1168ab36e1b7752dcf3cb27cbe208f03cc446 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:08:18 -0700 Subject: [PATCH 20/34] Use resolved content_type from asset lookup in /view endpoint The /view endpoint was discarding the content_type computed by resolve_hash_to_path() and re-guessing from the filename, which produced wrong results for extensionless files or mismatched extensions. Co-Authored-By: Claude Opus 4.6 --- server.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index 3f1d1b6a7..e07bdfbc7 100644 --- a/server.py +++ b/server.py @@ -508,8 +508,9 @@ class PromptServer(): result = resolve_hash_to_path(filename, owner_id=owner_id) if result is None: return web.Response(status=404) - file, filename = result.abs_path, result.download_name + file, filename, resolved_content_type = result.abs_path, result.download_name, result.content_type else: + resolved_content_type = None filename, output_dir = folder_paths.annotated_filepath(filename) if not filename: @@ -593,8 +594,13 @@ class PromptServer(): return web.Response(body=alpha_buffer.read(), content_type='image/png', headers={"Content-Disposition": f"filename=\"{filename}\""}) else: - # Get content type from mimetype, defaulting to 'application/octet-stream' - content_type = mimetypes.guess_type(filename)[0] or 'application/octet-stream' + # Use the content type from asset resolution if available, + # otherwise guess from the filename. + content_type = ( + resolved_content_type + or mimetypes.guess_type(filename)[0] + or 'application/octet-stream' + ) # For security, force certain mimetypes to download instead of display if content_type in {'text/html', 'text/html-sandboxed', 'application/xhtml+xml', 'text/javascript', 'text/css'}: From dd227a431105bd07cc77612d4ecdb58a1ae242da Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:31:35 -0700 Subject: [PATCH 21/34] Merge system+user metadata into filter projection Extract rebuild_metadata_projection() to build AssetReferenceMeta rows from {**system_metadata, **user_metadata}, so system-generated metadata is queryable via metadata_filter and user keys override system keys. Co-Authored-By: Claude Opus 4.6 --- app/assets/database/queries/__init__.py | 4 + .../database/queries/asset_reference.py | 77 +++++++++++++------ .../assets_test/queries/test_metadata.py | 51 +++++++++++- 3 files changed, 105 insertions(+), 27 deletions(-) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 17ea8f27a..3f9dd6b2b 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -35,11 +35,13 @@ from app.assets.database.queries.asset_reference import ( list_references_by_asset_id, list_references_page, mark_references_missing_outside_prefixes, + rebuild_metadata_projection, reference_exists, reference_exists_for_asset_id, restore_references_by_paths, set_reference_metadata, set_reference_preview, + set_reference_system_metadata, soft_delete_reference_by_id, update_reference_access_time, update_reference_name, @@ -107,6 +109,7 @@ __all__ = [ "list_tags_with_usage", "mark_references_missing_outside_prefixes", "reassign_asset_references", + "rebuild_metadata_projection", "reference_exists", "reference_exists_for_asset_id", "remove_missing_tag_for_asset_id", @@ -114,6 +117,7 @@ __all__ = [ "restore_references_by_paths", "set_reference_metadata", "set_reference_preview", + "set_reference_system_metadata", "soft_delete_reference_by_id", "set_reference_tags", "update_asset_hash_and_mime", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 24e7743a2..cab824edf 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -432,6 +432,42 @@ def update_reference_updated_at( ) +def rebuild_metadata_projection(session: Session, ref: AssetReference) -> None: + """Delete and rebuild AssetReferenceMeta rows from merged system+user metadata. + + The merged dict is ``{**system_metadata, **user_metadata}`` so user keys + override system keys of the same name. + """ + session.execute( + delete(AssetReferenceMeta).where( + AssetReferenceMeta.asset_reference_id == ref.id + ) + ) + session.flush() + + merged = {**(ref.system_metadata or {}), **(ref.user_metadata or {})} + if not merged: + return + + rows: list[AssetReferenceMeta] = [] + for k, v in merged.items(): + for r in convert_metadata_to_rows(k, v): + rows.append( + AssetReferenceMeta( + asset_reference_id=ref.id, + key=r["key"], + ordinal=int(r["ordinal"]), + val_str=r.get("val_str"), + val_num=r.get("val_num"), + val_bool=r.get("val_bool"), + val_json=r.get("val_json"), + ) + ) + if rows: + session.add_all(rows) + session.flush() + + def set_reference_metadata( session: Session, reference_id: str, @@ -445,33 +481,24 @@ def set_reference_metadata( ref.updated_at = get_utc_now() session.flush() - session.execute( - delete(AssetReferenceMeta).where( - AssetReferenceMeta.asset_reference_id == reference_id - ) - ) + rebuild_metadata_projection(session, ref) + + +def set_reference_system_metadata( + session: Session, + reference_id: str, + system_metadata: dict | None = None, +) -> None: + """Set system_metadata on a reference and rebuild the merged projection.""" + ref = session.get(AssetReference, reference_id) + if not ref: + raise ValueError(f"AssetReference {reference_id} not found") + + ref.system_metadata = system_metadata or {} + ref.updated_at = get_utc_now() session.flush() - if not user_metadata: - return - - rows: list[AssetReferenceMeta] = [] - for k, v in user_metadata.items(): - for r in convert_metadata_to_rows(k, v): - rows.append( - AssetReferenceMeta( - asset_reference_id=reference_id, - key=r["key"], - ordinal=int(r["ordinal"]), - val_str=r.get("val_str"), - val_num=r.get("val_num"), - val_bool=r.get("val_bool"), - val_json=r.get("val_json"), - ) - ) - if rows: - session.add_all(rows) - session.flush() + rebuild_metadata_projection(session, ref) def delete_reference_by_id( diff --git a/tests-unit/assets_test/queries/test_metadata.py b/tests-unit/assets_test/queries/test_metadata.py index 6a545e819..d7a747789 100644 --- a/tests-unit/assets_test/queries/test_metadata.py +++ b/tests-unit/assets_test/queries/test_metadata.py @@ -20,6 +20,7 @@ def _make_reference( asset: Asset, name: str, metadata: dict | None = None, + system_metadata: dict | None = None, ) -> AssetReference: now = get_utc_now() ref = AssetReference( @@ -27,6 +28,7 @@ def _make_reference( name=name, asset_id=asset.id, user_metadata=metadata, + system_metadata=system_metadata, created_at=now, updated_at=now, last_access_time=now, @@ -34,8 +36,10 @@ def _make_reference( session.add(ref) session.flush() - if metadata: - for key, val in metadata.items(): + # Build merged projection: {**system_metadata, **user_metadata} + merged = {**(system_metadata or {}), **(metadata or {})} + if merged: + for key, val in merged.items(): for row in convert_metadata_to_rows(key, val): meta_row = AssetReferenceMeta( asset_reference_id=ref.id, @@ -182,3 +186,46 @@ class TestMetadataFilterEmptyDict: refs, _, total = list_references_page(session, metadata_filter={}) assert total == 2 + + +class TestSystemMetadataProjection: + """Tests for system_metadata merging into the filter projection.""" + + def test_system_metadata_keys_are_filterable(self, session: Session): + """system_metadata keys should appear in the merged projection.""" + asset = _make_asset(session, "hash1") + _make_reference( + session, asset, "with_sys", + system_metadata={"source": "scanner"}, + ) + _make_reference(session, asset, "without_sys") + session.commit() + + refs, _, total = list_references_page( + session, metadata_filter={"source": "scanner"} + ) + assert total == 1 + assert refs[0].name == "with_sys" + + def test_user_metadata_overrides_system_metadata(self, session: Session): + """user_metadata should win when both have the same key.""" + asset = _make_asset(session, "hash1") + _make_reference( + session, asset, "overridden", + metadata={"origin": "user_upload"}, + system_metadata={"origin": "auto_scan"}, + ) + session.commit() + + # Should match the user value, not the system value + refs, _, total = list_references_page( + session, metadata_filter={"origin": "user_upload"} + ) + assert total == 1 + assert refs[0].name == "overridden" + + # Should NOT match the system value (it was overridden) + refs, _, total = list_references_page( + session, metadata_filter={"origin": "auto_scan"} + ) + assert total == 0 From c8cc9c0f8f27108c2d24c9e76a4a170846444391 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:36:10 -0700 Subject: [PATCH 22/34] Standardize tag ordering to alphabetical across all endpoints Co-Authored-By: Claude Opus 4.6 --- app/assets/database/queries/asset_reference.py | 2 +- app/assets/database/queries/tags.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index cab824edf..3735d0ba8 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -319,7 +319,7 @@ def list_references_page( select(AssetReferenceTag.asset_reference_id, Tag.name) .join(Tag, Tag.name == AssetReferenceTag.tag_name) .where(AssetReferenceTag.asset_reference_id.in_(id_list)) - .order_by(AssetReferenceTag.added_at) + .order_by(AssetReferenceTag.tag_name.asc()) ) for ref_id, tag_name in rows.all(): tag_map[ref_id].append(tag_name) diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index fbca80743..2074a93f6 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -75,9 +75,9 @@ def get_reference_tags(session: Session, reference_id: str) -> list[str]: tag_name for (tag_name,) in ( session.execute( - select(AssetReferenceTag.tag_name).where( - AssetReferenceTag.asset_reference_id == reference_id - ) + select(AssetReferenceTag.tag_name) + .where(AssetReferenceTag.asset_reference_id == reference_id) + .order_by(AssetReferenceTag.tag_name.asc()) ) ).all() ] @@ -120,7 +120,7 @@ def set_reference_tags( ) session.flush() - return SetTagsResult(added=to_add, removed=to_remove, total=desired) + return SetTagsResult(added=sorted(to_add), removed=sorted(to_remove), total=sorted(desired)) def add_tags_to_reference( @@ -364,7 +364,7 @@ def list_tag_counts_for_filtered_assets( ) .where(AssetReferenceTag.asset_reference_id.in_(select(ref_sq.c.id))) .group_by(AssetReferenceTag.tag_name) - .order_by(func.count(AssetReferenceTag.asset_reference_id).desc()) + .order_by(func.count(AssetReferenceTag.asset_reference_id).desc(), AssetReferenceTag.tag_name.asc()) .limit(limit) ) From 7f47b74ba6d7b26cdad53f0561257a5890f3c997 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 15:42:07 -0700 Subject: [PATCH 23/34] Derive subfolder tags from path in register_file_in_place Co-Authored-By: Claude Opus 4.6 --- app/assets/services/ingest.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index b1c789c8c..90c51994f 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -27,6 +27,7 @@ from app.assets.helpers import normalize_tags from app.assets.services.file_utils import get_size_and_mtime_ns from app.assets.services.path_utils import ( compute_relative_filename, + get_name_and_tags_from_asset_path, resolve_destination_from_tags, validate_path_within_base, ) @@ -367,7 +368,18 @@ def register_file_in_place( owner_id: str = "", mime_type: str | None = None, ) -> UploadResult: - """Register an already-saved file in the asset database without moving it.""" + """Register an already-saved file in the asset database without moving it. + + Tags are derived from the filesystem path (root category + subfolder names), + merged with any caller-provided tags, matching the behavior of the scanner. + If the path is not under a known root, only the caller-provided tags are used. + """ + try: + _, path_tags = get_name_and_tags_from_asset_path(abs_path) + except ValueError: + path_tags = [] + merged_tags = normalize_tags([*path_tags, *tags]) + try: digest, _ = hashing.compute_blake3_hash(abs_path) except ImportError as e: @@ -390,7 +402,7 @@ def register_file_in_place( mime_type=content_type, info_name=_sanitize_filename(name, fallback=digest), owner_id=owner_id, - tags=tags, + tags=merged_tags, tag_origin="upload", require_existing_tags=False, ) From 2967c6b6af62325027a973cc466d74199cc2b8bd Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 16:29:51 -0700 Subject: [PATCH 24/34] =?UTF-8?q?Reject=20client-provided=20id,=20fix=20pr?= =?UTF-8?q?eview=20URLs,=20rename=20tags=E2=86=92total=5Ftags?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reject 'id' field in multipart upload with 400 UNSUPPORTED_FIELD instead of silently ignoring it - Build preview URL from the preview asset's own metadata rather than the parent asset's - Rename 'tags' to 'total_tags' in TagsAdd/TagsRemove response schemas for clarity Co-Authored-By: Claude Opus 4.6 --- app/assets/api/routes.py | 14 ++++++++++---- app/assets/api/schemas_in.py | 1 - app/assets/api/schemas_out.py | 4 ++-- app/assets/api/upload.py | 8 +++++--- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index e011f9a83..46a9af698 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -152,7 +152,14 @@ def _build_preview_url_from_view(tags: list[str], user_metadata: dict[str, Any] def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResult) -> schemas_out.Asset: """Build an Asset response from a service result.""" - preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata) + if result.ref.preview_id: + preview_detail = get_asset_detail(result.ref.preview_id) + if preview_detail: + preview_url = _build_preview_url_from_view(preview_detail.tags, preview_detail.ref.user_metadata) + else: + preview_url = None + else: + preview_url = _build_preview_url_from_view(result.tags, result.ref.user_metadata) return schemas_out.Asset( id=result.ref.id, name=result.ref.name, @@ -382,7 +389,6 @@ async def upload_asset(request: web.Request) -> web.Response: "name": parsed.provided_name, "user_metadata": parsed.user_metadata_raw, "hash": parsed.provided_hash, - "id": parsed.provided_id, "mime_type": parsed.provided_mime_type, "preview_id": parsed.provided_preview_id, } @@ -605,7 +611,7 @@ async def add_asset_tags(request: web.Request) -> web.Response: payload = schemas_out.TagsAdd( added=result.added, already_present=result.already_present, - tags=result.total_tags, + total_tags=result.total_tags, ) except PermissionError as pe: return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id}) @@ -652,7 +658,7 @@ async def delete_asset_tags(request: web.Request) -> web.Response: payload = schemas_out.TagsRemove( removed=result.removed, not_present=result.not_present, - tags=result.total_tags, + total_tags=result.total_tags, ) except PermissionError as pe: return _build_error_response(403, "FORBIDDEN", str(pe), {"id": reference_id}) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 535b31bf0..efb9c5439 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -45,7 +45,6 @@ class ParsedUpload: user_metadata_raw: str | None provided_hash: str | None provided_hash_exists: bool | None - provided_id: str | None = None provided_mime_type: str | None = None provided_preview_id: str | None = None diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index 972d88022..8db642b45 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -54,14 +54,14 @@ class TagsAdd(BaseModel): model_config = ConfigDict(str_strip_whitespace=True) added: list[str] = Field(default_factory=list) already_present: list[str] = Field(default_factory=list) - tags: list[str] = Field(default_factory=list) + total_tags: list[str] = Field(default_factory=list) class TagsRemove(BaseModel): model_config = ConfigDict(str_strip_whitespace=True) removed: list[str] = Field(default_factory=list) not_present: list[str] = Field(default_factory=list) - tags: list[str] = Field(default_factory=list) + total_tags: list[str] = Field(default_factory=list) class TagHistogram(BaseModel): diff --git a/app/assets/api/upload.py b/app/assets/api/upload.py index c36257ae0..13d3d372c 100644 --- a/app/assets/api/upload.py +++ b/app/assets/api/upload.py @@ -52,7 +52,6 @@ async def parse_multipart_upload( user_metadata_raw: str | None = None provided_hash: str | None = None provided_hash_exists: bool | None = None - provided_id: str | None = None provided_mime_type: str | None = None provided_preview_id: str | None = None @@ -132,7 +131,11 @@ async def parse_multipart_upload( elif fname == "user_metadata": user_metadata_raw = (await field.text()) or None elif fname == "id": - provided_id = ((await field.text()) or "").strip() or None + raise UploadError( + 400, + "UNSUPPORTED_FIELD", + "Client-provided 'id' is not supported. Asset IDs are assigned by the server.", + ) elif fname == "mime_type": provided_mime_type = ((await field.text()) or "").strip() or None elif fname == "preview_id": @@ -161,7 +164,6 @@ async def parse_multipart_upload( user_metadata_raw=user_metadata_raw, provided_hash=provided_hash, provided_hash_exists=provided_hash_exists, - provided_id=provided_id, provided_mime_type=provided_mime_type, provided_preview_id=provided_preview_id, ) From aac08609ce0671572cd1ee770f7df77133e45fbc Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 16:41:45 -0700 Subject: [PATCH 25/34] fix: SQLite migration 0003 FK drop fails on file-backed DBs (MB-2) Add naming_convention to Base.metadata so Alembic batch-mode reflection can match unnamed FK constraints created by migration 0002. Pass naming_convention and render_as_batch=True through env.py online config. Add migration roundtrip tests (upgrade/downgrade/cycle from baseline). Amp-Thread-ID: https://ampcode.com/threads/T-019ce466-1683-7471-b6e1-bb078223cda0 Co-authored-by: Amp --- alembic_db/env.py | 7 ++- .../versions/0003_add_metadata_prompt.py | 10 +++- app/database/models.py | 11 +++- tests-unit/app_test/test_migrations.py | 57 +++++++++++++++++++ 4 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 tests-unit/app_test/test_migrations.py diff --git a/alembic_db/env.py b/alembic_db/env.py index 4d7770679..4ce37c012 100644 --- a/alembic_db/env.py +++ b/alembic_db/env.py @@ -8,7 +8,7 @@ from alembic import context config = context.config -from app.database.models import Base +from app.database.models import Base, NAMING_CONVENTION target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, @@ -51,7 +51,10 @@ def run_migrations_online() -> None: with connectable.connect() as connection: context.configure( - connection=connection, target_metadata=target_metadata + connection=connection, + target_metadata=target_metadata, + render_as_batch=True, + naming_convention=NAMING_CONVENTION, ) with context.begin_transaction(): diff --git a/alembic_db/versions/0003_add_metadata_prompt.py b/alembic_db/versions/0003_add_metadata_prompt.py index c55a30065..522d02c33 100644 --- a/alembic_db/versions/0003_add_metadata_prompt.py +++ b/alembic_db/versions/0003_add_metadata_prompt.py @@ -10,6 +10,8 @@ Create Date: 2026-03-09 from alembic import op import sqlalchemy as sa +from app.database.models import NAMING_CONVENTION + revision = "0003_add_metadata_prompt" down_revision = "0002_merge_to_asset_references" branch_labels = None @@ -29,7 +31,9 @@ def upgrade() -> None: # Existing values are asset-content IDs that won't match reference IDs, # so null them out first. op.execute("UPDATE asset_references SET preview_id = NULL WHERE preview_id IS NOT NULL") - with op.batch_alter_table("asset_references") as batch_op: + with op.batch_alter_table( + "asset_references", naming_convention=NAMING_CONVENTION + ) as batch_op: batch_op.drop_constraint( "fk_asset_references_preview_id_assets", type_="foreignkey" ) @@ -43,7 +47,9 @@ def upgrade() -> None: def downgrade() -> None: - with op.batch_alter_table("asset_references") as batch_op: + with op.batch_alter_table( + "asset_references", naming_convention=NAMING_CONVENTION + ) as batch_op: batch_op.drop_constraint( "fk_asset_references_preview_id_asset_references", type_="foreignkey" ) diff --git a/app/database/models.py b/app/database/models.py index e7572677a..b02856f6e 100644 --- a/app/database/models.py +++ b/app/database/models.py @@ -1,9 +1,18 @@ from typing import Any from datetime import datetime +from sqlalchemy import MetaData from sqlalchemy.orm import DeclarativeBase +NAMING_CONVENTION = { + "ix": "ix_%(table_name)s_%(column_0_N_name)s", + "uq": "uq_%(table_name)s_%(column_0_N_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", +} + class Base(DeclarativeBase): - pass + metadata = MetaData(naming_convention=NAMING_CONVENTION) def to_dict(obj: Any, include_none: bool = False) -> dict[str, Any]: fields = obj.__table__.columns.keys() diff --git a/tests-unit/app_test/test_migrations.py b/tests-unit/app_test/test_migrations.py new file mode 100644 index 000000000..fa10c1727 --- /dev/null +++ b/tests-unit/app_test/test_migrations.py @@ -0,0 +1,57 @@ +"""Test that Alembic migrations run cleanly on a file-backed SQLite DB. + +This catches problems like unnamed FK constraints that prevent batch-mode +drop_constraint from working on real SQLite files (see MB-2). + +Migrations 0001 and 0002 are already shipped, so we only exercise +upgrade/downgrade for 0003+. +""" + +import os + +import pytest +from alembic import command +from alembic.config import Config + + +# Oldest shipped revision — we upgrade to here as a baseline and never +# downgrade past it. +_BASELINE = "0002_merge_to_asset_references" + + +def _make_config(db_path: str) -> Config: + root = os.path.join(os.path.dirname(__file__), "../..") + config_path = os.path.abspath(os.path.join(root, "alembic.ini")) + scripts_path = os.path.abspath(os.path.join(root, "alembic_db")) + + cfg = Config(config_path) + cfg.set_main_option("script_location", scripts_path) + cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db_path}") + return cfg + + +@pytest.fixture +def migration_db(tmp_path): + """Yield an alembic Config pre-upgraded to the baseline revision.""" + db_path = str(tmp_path / "test_migration.db") + cfg = _make_config(db_path) + command.upgrade(cfg, _BASELINE) + yield cfg + + +def test_upgrade_to_head(migration_db): + """Upgrade from baseline to head must succeed on a file-backed DB.""" + command.upgrade(migration_db, "head") + + +def test_downgrade_to_baseline(migration_db): + """Upgrade to head then downgrade back to baseline.""" + command.upgrade(migration_db, "head") + command.downgrade(migration_db, _BASELINE) + + +def test_upgrade_downgrade_cycle(migration_db): + """Full cycle: upgrade → downgrade → upgrade again.""" + command.upgrade(migration_db, "head") + command.downgrade(migration_db, _BASELINE) + command.upgrade(migration_db, "head") From 0068664df6980ad84c89b6ca0a7f747a016f1bef Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 17:19:09 -0700 Subject: [PATCH 26/34] Fix missing tag count for is_missing references and update test for total_tags field - Allow is_missing=True references to be counted in list_tags_with_usage when the tag is 'missing', so the missing tag count reflects all references that have been tagged as missing - Add update_is_missing_by_asset_id query helper for bulk updates by asset - Update test_add_and_remove_tags to use 'total_tags' matching the API schema Amp-Thread-ID: https://ampcode.com/threads/T-019ce482-05e7-7324-a1b0-a56a929cc7ef Co-authored-by: Amp --- app/assets/database/queries/__init__.py | 2 ++ app/assets/database/queries/asset_reference.py | 16 ++++++++++++++++ app/assets/database/queries/tags.py | 14 ++++++++++++-- app/assets/scanner.py | 6 ++++-- tests-unit/assets_test/test_tags_api.py | 2 +- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 3f9dd6b2b..1632937b2 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -45,6 +45,7 @@ from app.assets.database.queries.asset_reference import ( soft_delete_reference_by_id, update_reference_access_time, update_reference_name, + update_is_missing_by_asset_id, update_reference_timestamps, update_reference_updated_at, upsert_reference, @@ -121,6 +122,7 @@ __all__ = [ "soft_delete_reference_by_id", "set_reference_tags", "update_asset_hash_and_mime", + "update_is_missing_by_asset_id", "update_reference_access_time", "update_reference_name", "update_reference_timestamps", diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 3735d0ba8..04019b374 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -843,6 +843,22 @@ def bulk_update_is_missing( return total +def update_is_missing_by_asset_id( + session: Session, asset_id: str, value: bool +) -> int: + """Set is_missing flag for ALL references belonging to an asset. + + Returns: Number of rows updated + """ + result = session.execute( + sa.update(AssetReference) + .where(AssetReference.asset_id == asset_id) + .where(AssetReference.deleted_at.is_(None)) + .values(is_missing=value) + ) + return result.rowcount + + def delete_references_by_ids(session: Session, reference_ids: list[str]) -> int: """Delete references by their IDs. diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index 2074a93f6..f4126dba8 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -275,7 +275,12 @@ def list_tags_with_usage( .select_from(AssetReferenceTag) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id) .where(build_visible_owner_clause(owner_id)) - .where(AssetReference.is_missing == False) # noqa: E712 + .where( + sa.or_( + AssetReference.is_missing == False, # noqa: E712 + AssetReferenceTag.tag_name == "missing", + ) + ) .where(AssetReference.deleted_at.is_(None)) .group_by(AssetReferenceTag.tag_name) .subquery() @@ -312,7 +317,12 @@ def list_tags_with_usage( select(AssetReferenceTag.tag_name) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id) .where(build_visible_owner_clause(owner_id)) - .where(AssetReference.is_missing == False) # noqa: E712 + .where( + sa.or_( + AssetReference.is_missing == False, # noqa: E712 + AssetReferenceTag.tag_name == "missing", + ) + ) .where(AssetReference.deleted_at.is_(None)) .group_by(AssetReferenceTag.tag_name) ) diff --git a/app/assets/scanner.py b/app/assets/scanner.py index e27ea5123..3a49d12e0 100644 --- a/app/assets/scanner.py +++ b/app/assets/scanner.py @@ -19,7 +19,9 @@ from app.assets.database.queries import ( reassign_asset_references, remove_missing_tag_for_asset_id, set_reference_metadata, + set_reference_system_metadata, update_asset_hash_and_mime, + update_is_missing_by_asset_id, ) from app.assets.services.bulk_ingest import ( SeedAssetSpec, @@ -490,8 +492,8 @@ def enrich_asset( logging.warning("Failed to hash %s: %s", file_path, e) if extract_metadata and metadata: - user_metadata = metadata.to_user_metadata() - set_reference_metadata(session, reference_id, user_metadata) + system_metadata = metadata.to_user_metadata() + set_reference_system_metadata(session, reference_id, system_metadata) if full_hash: existing = get_asset_by_hash(session, full_hash) diff --git a/tests-unit/assets_test/test_tags_api.py b/tests-unit/assets_test/test_tags_api.py index cc351ef1b..595bf29c6 100644 --- a/tests-unit/assets_test/test_tags_api.py +++ b/tests-unit/assets_test/test_tags_api.py @@ -97,7 +97,7 @@ def test_add_and_remove_tags(http: requests.Session, api_base: str, seeded_asset # normalized, deduplicated; 'unit-tests' was already present from the seed assert set(b1["added"]) == {"newtag", "beta"} assert set(b1["already_present"]) == {"unit-tests"} - assert "newtag" in b1["tags"] and "beta" in b1["tags"] + assert "newtag" in b1["total_tags"] and "beta" in b1["total_tags"] rg = http.get(f"{api_base}/api/assets/{aid}", timeout=120) g = rg.json() From c7ec6e0819ae7139691d2d5730f04f0b79a6265d Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Thu, 12 Mar 2026 17:37:06 -0700 Subject: [PATCH 27/34] Remove unused imports in scanner.py Co-Authored-By: Claude Opus 4.6 --- app/assets/scanner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/app/assets/scanner.py b/app/assets/scanner.py index 3a49d12e0..4e05a97b5 100644 --- a/app/assets/scanner.py +++ b/app/assets/scanner.py @@ -18,10 +18,8 @@ from app.assets.database.queries import ( mark_references_missing_outside_prefixes, reassign_asset_references, remove_missing_tag_for_asset_id, - set_reference_metadata, set_reference_system_metadata, update_asset_hash_and_mime, - update_is_missing_by_asset_id, ) from app.assets.services.bulk_ingest import ( SeedAssetSpec, From dde5a60357def710d27616ae6a56916731a2cf6c Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:13:00 -0400 Subject: [PATCH 28/34] Rename prompt_id to job_id on asset_references Rename the column in the DB model, migration, and service schemas. The API response emits both job_id and prompt_id (deprecated alias) for backward compatibility with the cloud API. Amp-Thread-ID: https://ampcode.com/threads/T-019cef41-60b0-752a-aa3c-ed7f20fda2f7 Co-authored-by: Amp --- ..._metadata_prompt.py => 0003_add_metadata_job_id.py} | 10 +++++----- app/assets/api/routes.py | 3 ++- app/assets/api/schemas_out.py | 3 ++- app/assets/database/models.py | 2 +- app/assets/services/schemas.py | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) rename alembic_db/versions/{0003_add_metadata_prompt.py => 0003_add_metadata_job_id.py} (88%) diff --git a/alembic_db/versions/0003_add_metadata_prompt.py b/alembic_db/versions/0003_add_metadata_job_id.py similarity index 88% rename from alembic_db/versions/0003_add_metadata_prompt.py rename to alembic_db/versions/0003_add_metadata_job_id.py index 522d02c33..66c0458ed 100644 --- a/alembic_db/versions/0003_add_metadata_prompt.py +++ b/alembic_db/versions/0003_add_metadata_job_id.py @@ -1,8 +1,8 @@ """ -Add system_metadata and prompt_id columns to asset_references. +Add system_metadata and job_id columns to asset_references. Change preview_id FK from assets.id to asset_references.id. -Revision ID: 0003_add_metadata_prompt +Revision ID: 0003_add_metadata_job_id Revises: 0002_merge_to_asset_references Create Date: 2026-03-09 """ @@ -12,7 +12,7 @@ import sqlalchemy as sa from app.database.models import NAMING_CONVENTION -revision = "0003_add_metadata_prompt" +revision = "0003_add_metadata_job_id" down_revision = "0002_merge_to_asset_references" branch_labels = None depends_on = None @@ -24,7 +24,7 @@ def upgrade() -> None: sa.Column("system_metadata", sa.JSON(), nullable=True) ) batch_op.add_column( - sa.Column("prompt_id", sa.String(length=36), nullable=True) + sa.Column("job_id", sa.String(length=36), nullable=True) ) # Change preview_id FK from assets.id to asset_references.id (self-ref). @@ -62,5 +62,5 @@ def downgrade() -> None: ) with op.batch_alter_table("asset_references") as batch_op: - batch_op.drop_column("prompt_id") + batch_op.drop_column("job_id") batch_op.drop_column("system_metadata") diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 46a9af698..02b4d1726 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -171,7 +171,8 @@ def _build_asset_response(result: schemas.AssetDetailResult | schemas.UploadResu preview_id=result.ref.preview_id, user_metadata=result.ref.user_metadata or {}, metadata=result.ref.system_metadata, - prompt_id=result.ref.prompt_id, + job_id=result.ref.job_id, + prompt_id=result.ref.job_id, # deprecated: mirrors job_id for cloud compat created_at=result.ref.created_at, updated_at=result.ref.updated_at, last_access_time=result.ref.last_access_time, diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index 8db642b45..48d6292e4 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -16,7 +16,8 @@ class Asset(BaseModel): user_metadata: dict[str, Any] = Field(default_factory=dict) is_immutable: bool = False metadata: dict[str, Any] | None = None - prompt_id: str | None = None + job_id: str | None = None + prompt_id: str | None = None # deprecated: use job_id created_at: datetime updated_at: datetime last_access_time: datetime | None = None diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 5534e89d5..c3df8d669 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -93,7 +93,7 @@ class AssetReference(Base): system_metadata: Mapped[dict[str, Any] | None] = mapped_column( JSON(none_as_null=True), nullable=True, default=None ) - prompt_id: Mapped[str | None] = mapped_column(String(36), nullable=True, default=None) + job_id: Mapped[str | None] = mapped_column(String(36), nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=False), nullable=False, default=get_utc_now ) diff --git a/app/assets/services/schemas.py b/app/assets/services/schemas.py index b744283b2..0eb128f58 100644 --- a/app/assets/services/schemas.py +++ b/app/assets/services/schemas.py @@ -26,7 +26,7 @@ class ReferenceData: created_at: datetime updated_at: datetime system_metadata: dict[str, Any] | None = None - prompt_id: str | None = None + job_id: str | None = None last_access_time: datetime | None = None @@ -96,7 +96,7 @@ def extract_reference_data(ref: AssetReference) -> ReferenceData: user_metadata=ref.user_metadata, preview_id=ref.preview_id, system_metadata=ref.system_metadata, - prompt_id=ref.prompt_id, + job_id=ref.job_id, created_at=ref.created_at, updated_at=ref.updated_at, last_access_time=ref.last_access_time, From c12d4c0755eb5b31aa374a711eb093a1cde2ba46 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:17:27 -0400 Subject: [PATCH 29/34] Add index on asset_references.preview_id for FK cascade performance Amp-Thread-ID: https://ampcode.com/threads/T-019cef45-a4d2-7548-86d2-d46bcd3db419 Co-authored-by: Amp --- alembic_db/versions/0003_add_metadata_job_id.py | 4 ++++ app/assets/database/models.py | 1 + 2 files changed, 5 insertions(+) diff --git a/alembic_db/versions/0003_add_metadata_job_id.py b/alembic_db/versions/0003_add_metadata_job_id.py index 66c0458ed..3a0e0d0cf 100644 --- a/alembic_db/versions/0003_add_metadata_job_id.py +++ b/alembic_db/versions/0003_add_metadata_job_id.py @@ -44,12 +44,16 @@ def upgrade() -> None: ["id"], ondelete="SET NULL", ) + batch_op.create_index( + "ix_asset_references_preview_id", ["preview_id"] + ) def downgrade() -> None: with op.batch_alter_table( "asset_references", naming_convention=NAMING_CONVENTION ) as batch_op: + batch_op.drop_index("ix_asset_references_preview_id") batch_op.drop_constraint( "fk_asset_references_preview_id_asset_references", type_="foreignkey" ) diff --git a/app/assets/database/models.py b/app/assets/database/models.py index c3df8d669..733e807d0 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -150,6 +150,7 @@ class AssetReference(Base): Index("ix_asset_references_created_at", "created_at"), Index("ix_asset_references_last_access_time", "last_access_time"), Index("ix_asset_references_deleted_at", "deleted_at"), + Index("ix_asset_references_preview_id", "preview_id"), Index("ix_asset_references_owner_name", "owner_id", "name"), CheckConstraint( "(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg" From 79c2bd51f72b144a2ffa2d3b78644bec33a592c2 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:22:24 -0400 Subject: [PATCH 30/34] Add clarifying comments for Asset/AssetReference naming and preview_id Amp-Thread-ID: https://ampcode.com/threads/T-019cef49-f94e-7348-bf23-9a19ebf65e0d Co-authored-by: Amp --- app/assets/api/schemas_in.py | 8 ++++---- app/assets/api/schemas_out.py | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index efb9c5439..8bcbc6dca 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -101,7 +101,7 @@ class UpdateAssetBody(BaseModel): name: str | None = None user_metadata: dict[str, Any] | None = None mime_type: str | None = None - preview_id: str | None = None + preview_id: str | None = None # references an asset_reference id, not an asset id @model_validator(mode="after") def _validate_at_least_one_field(self): @@ -123,7 +123,7 @@ class CreateFromHashBody(BaseModel): tags: list[str] = Field(default_factory=list) user_metadata: dict[str, Any] = Field(default_factory=dict) mime_type: str | None = None - preview_id: str | None = None + preview_id: str | None = None # references an asset_reference id, not an asset id @field_validator("hash") @classmethod @@ -241,7 +241,7 @@ class UploadAssetSpec(BaseModel): - user_metadata: arbitrary JSON object (optional) - hash: optional canonical 'blake3:' for validation / fast-path - mime_type: optional MIME type override - - preview_id: optional asset ID for preview + - preview_id: optional asset_reference ID for preview Files are stored using the content hash as filename stem. """ @@ -253,7 +253,7 @@ class UploadAssetSpec(BaseModel): user_metadata: dict[str, Any] = Field(default_factory=dict) hash: str | None = Field(default=None) mime_type: str | None = Field(default=None) - preview_id: str | None = Field(default=None) + preview_id: str | None = Field(default=None) # references an asset_reference id @field_validator("hash", mode="before") @classmethod diff --git a/app/assets/api/schemas_out.py b/app/assets/api/schemas_out.py index 48d6292e4..d99b1098d 100644 --- a/app/assets/api/schemas_out.py +++ b/app/assets/api/schemas_out.py @@ -5,6 +5,9 @@ from pydantic import BaseModel, ConfigDict, Field, field_serializer class Asset(BaseModel): + """API view of an asset. Maps to DB ``AssetReference`` joined with its ``Asset`` blob; + ``id`` here is the AssetReference id, not the content-addressed Asset id.""" + id: str name: str asset_hash: str | None = None @@ -12,7 +15,7 @@ class Asset(BaseModel): mime_type: str | None = None tags: list[str] = Field(default_factory=list) preview_url: str | None = None - preview_id: str | None = None + preview_id: str | None = None # references an asset_reference id, not an asset id user_metadata: dict[str, Any] = Field(default_factory=dict) is_immutable: bool = False metadata: dict[str, Any] | None = None From d6e6c3990a4db518e14908d5e8a395f10007793a Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:29:45 -0400 Subject: [PATCH 31/34] Disallow all-null meta rows: add CHECK constraint, skip null values on write - convert_metadata_to_rows returns [] for None values instead of an all-null row - Remove dead None branch from _scalar_to_row - Simplify null filter in common.py to just check for row absence - Add CHECK constraint ck_asset_reference_meta_has_value to model and migration 0003 Amp-Thread-ID: https://ampcode.com/threads/T-019cef4e-5240-7749-bb25-1f17fcf9c09c Co-authored-by: Amp --- .../versions/0003_add_metadata_job_id.py | 20 +++++++++++++++++++ app/assets/database/models.py | 4 ++++ .../database/queries/asset_reference.py | 11 +--------- app/assets/database/queries/common.py | 10 +--------- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/alembic_db/versions/0003_add_metadata_job_id.py b/alembic_db/versions/0003_add_metadata_job_id.py index 3a0e0d0cf..1d75e836e 100644 --- a/alembic_db/versions/0003_add_metadata_job_id.py +++ b/alembic_db/versions/0003_add_metadata_job_id.py @@ -48,8 +48,28 @@ def upgrade() -> None: "ix_asset_references_preview_id", ["preview_id"] ) + # Purge any all-null meta rows before adding the constraint + op.execute( + "DELETE FROM asset_reference_meta" + " WHERE val_str IS NULL AND val_num IS NULL AND val_bool IS NULL AND val_json IS NULL" + ) + with op.batch_alter_table( + "asset_reference_meta", naming_convention=NAMING_CONVENTION + ) as batch_op: + batch_op.create_check_constraint( + "ck_asset_reference_meta_has_value", + "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL", + ) + def downgrade() -> None: + with op.batch_alter_table( + "asset_reference_meta", naming_convention=NAMING_CONVENTION + ) as batch_op: + batch_op.drop_constraint( + "ck_asset_reference_meta_has_value", type_="check" + ) + with op.batch_alter_table( "asset_references", naming_convention=NAMING_CONVENTION ) as batch_op: diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 733e807d0..5c7ff8154 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -191,6 +191,10 @@ class AssetReferenceMeta(Base): Index("ix_asset_reference_meta_key_val_str", "key", "val_str"), Index("ix_asset_reference_meta_key_val_num", "key", "val_num"), Index("ix_asset_reference_meta_key_val_bool", "key", "val_bool"), + CheckConstraint( + "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL", + name="ck_asset_reference_meta_has_value", + ), ) diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 04019b374..7af552483 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -46,15 +46,6 @@ def _check_is_scalar(v): def _scalar_to_row(key: str, ordinal: int, value) -> dict: """Convert a scalar value to a typed projection row.""" - if value is None: - return { - "key": key, - "ordinal": ordinal, - "val_str": None, - "val_num": None, - "val_bool": None, - "val_json": None, - } if isinstance(value, bool): return {"key": key, "ordinal": ordinal, "val_bool": bool(value)} if isinstance(value, (int, float, Decimal)): @@ -68,7 +59,7 @@ def _scalar_to_row(key: str, ordinal: int, value) -> dict: def convert_metadata_to_rows(key: str, value) -> list[dict]: """Turn a metadata key/value into typed projection rows.""" if value is None: - return [_scalar_to_row(key, 0, None)] + return [] if _check_is_scalar(value): return [_scalar_to_row(key, 0, value)] diff --git a/app/assets/database/queries/common.py b/app/assets/database/queries/common.py index 94ec5a526..89bb49327 100644 --- a/app/assets/database/queries/common.py +++ b/app/assets/database/queries/common.py @@ -101,20 +101,12 @@ def apply_metadata_filter( def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement: if value is None: - no_row_for_key = sa.not_( + return sa.not_( sa.exists().where( AssetReferenceMeta.asset_reference_id == AssetReference.id, AssetReferenceMeta.key == key, ) ) - null_row = _exists_for_pred( - key, - AssetReferenceMeta.val_json.is_(None), - AssetReferenceMeta.val_str.is_(None), - AssetReferenceMeta.val_num.is_(None), - AssetReferenceMeta.val_bool.is_(None), - ) - return sa.or_(no_row_for_key, null_row) if isinstance(value, bool): return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value)) From ecc20c9a27dec60451128defed0ad4f54e38f27c Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:38:40 -0400 Subject: [PATCH 32/34] Remove dead None guards on result.asset in upload handler register_file_in_place guarantees a non-None asset, so the 'if result.asset else None' checks were unreachable. Amp-Thread-ID: https://ampcode.com/threads/T-019cef5b-4cf8-723c-8a98-8fb8f333c133 Co-authored-by: Amp --- server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index e07bdfbc7..173a28376 100644 --- a/server.py +++ b/server.py @@ -430,9 +430,9 @@ class PromptServer(): resp["asset"] = { "id": result.ref.id, "name": result.ref.name, - "asset_hash": result.asset.hash if result.asset else None, - "size": result.asset.size_bytes if result.asset else None, - "mime_type": result.asset.mime_type if result.asset else None, + "asset_hash": result.asset.hash, + "size": result.asset.size_bytes, + "mime_type": result.asset.mime_type, "tags": result.tags, } except Exception: From a8d524a0225ea0a93279ccd938c9c1933b0f8e85 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 22:46:14 -0400 Subject: [PATCH 33/34] Remove mime_type from asset update API Clients can no longer modify mime_type after asset creation via the PUT /api/assets/{id} endpoint. This reduces the risk of mime_type spoofing. The internal update_asset_hash_and_mime function remains available for server-side use (e.g., enrichment). Amp-Thread-ID: https://ampcode.com/threads/T-019cef5d-8d61-75cc-a1c6-2841ac395648 Co-authored-by: Amp --- app/assets/api/routes.py | 1 - app/assets/api/schemas_in.py | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 02b4d1726..68126b6a5 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -494,7 +494,6 @@ async def update_asset_route(request: web.Request) -> web.Response: name=body.name, user_metadata=body.user_metadata, owner_id=USER_MANAGER.get_request_user_id(request), - mime_type=body.mime_type, preview_id=body.preview_id, ) payload = _build_asset_response(result) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 8bcbc6dca..186a6ae1e 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -100,17 +100,16 @@ class ListAssetsQuery(BaseModel): class UpdateAssetBody(BaseModel): name: str | None = None user_metadata: dict[str, Any] | None = None - mime_type: str | None = None preview_id: str | None = None # references an asset_reference id, not an asset id @model_validator(mode="after") def _validate_at_least_one_field(self): if all( v is None - for v in (self.name, self.user_metadata, self.mime_type, self.preview_id) + for v in (self.name, self.user_metadata, self.preview_id) ): raise ValueError( - "Provide at least one of: name, user_metadata, mime_type, preview_id." + "Provide at least one of: name, user_metadata, preview_id." ) return self From 63bae494ad906028b5afd9cd7375bce95aea6030 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Sat, 14 Mar 2026 23:30:15 -0400 Subject: [PATCH 34/34] Fix migration constraint naming double-prefix and NULL in mixed metadata lists - Use fully-rendered constraint names in migration 0003 to avoid the naming convention doubling the ck_ prefix on batch operations. - Add table_args to downgrade so SQLite batch mode can find the CHECK constraint (not exposed by SQLite reflection). - Fix model CheckConstraint name to use bare 'has_value' (convention auto-prefixes). - Skip None items when converting metadata lists to rows, preventing all-NULL rows that violate the has_value check constraint. Amp-Thread-ID: https://ampcode.com/threads/T-019cef87-94f9-7172-a6af-c6282290ce4f Co-authored-by: Amp --- alembic_db/versions/0003_add_metadata_job_id.py | 16 ++++++++++++---- app/assets/database/models.py | 2 +- app/assets/database/queries/asset_reference.py | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/alembic_db/versions/0003_add_metadata_job_id.py b/alembic_db/versions/0003_add_metadata_job_id.py index 1d75e836e..2a14ee924 100644 --- a/alembic_db/versions/0003_add_metadata_job_id.py +++ b/alembic_db/versions/0003_add_metadata_job_id.py @@ -53,9 +53,7 @@ def upgrade() -> None: "DELETE FROM asset_reference_meta" " WHERE val_str IS NULL AND val_num IS NULL AND val_bool IS NULL AND val_json IS NULL" ) - with op.batch_alter_table( - "asset_reference_meta", naming_convention=NAMING_CONVENTION - ) as batch_op: + with op.batch_alter_table("asset_reference_meta") as batch_op: batch_op.create_check_constraint( "ck_asset_reference_meta_has_value", "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL", @@ -63,8 +61,18 @@ def upgrade() -> None: def downgrade() -> None: + # SQLite doesn't reflect CHECK constraints, so we must declare it + # explicitly via table_args for the batch recreate to find it. + # Use the fully-rendered constraint name to avoid the naming convention + # doubling the prefix. with op.batch_alter_table( - "asset_reference_meta", naming_convention=NAMING_CONVENTION + "asset_reference_meta", + table_args=[ + sa.CheckConstraint( + "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL", + name="ck_asset_reference_meta_has_value", + ), + ], ) as batch_op: batch_op.drop_constraint( "ck_asset_reference_meta_has_value", type_="check" diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 5c7ff8154..a3af8a192 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -193,7 +193,7 @@ class AssetReferenceMeta(Base): Index("ix_asset_reference_meta_key_val_bool", "key", "val_bool"), CheckConstraint( "val_str IS NOT NULL OR val_num IS NOT NULL OR val_bool IS NOT NULL OR val_json IS NOT NULL", - name="ck_asset_reference_meta_has_value", + name="has_value", ), ) diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 7af552483..084a32512 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -66,8 +66,8 @@ def convert_metadata_to_rows(key: str, value) -> list[dict]: if isinstance(value, list): if all(_check_is_scalar(x) for x in value): - return [_scalar_to_row(key, i, x) for i, x in enumerate(value)] - return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)] + return [_scalar_to_row(key, i, x) for i, x in enumerate(value) if x is not None] + return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value) if x is not None] return [{"key": key, "ordinal": 0, "val_json": value}]