From b9286572d3ac581bcd5eff5aae3bea7656777b38 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 17 Mar 2026 15:54:47 -0700 Subject: [PATCH] Use ExtractedMetadata in ingest_existing_file instead of passing raw dict Have ingest_existing_file call extract_file_metadata() to build a proper ExtractedMetadata object, matching what the scanner does. This tightens SeedAssetSpec.metadata to ExtractedMetadata | None and removes dict-handling branches in bulk_ingest.py that would have raised AttributeError on to_meta_rows()/to_user_metadata(). Amp-Thread-ID: https://ampcode.com/threads/T-019cfdf9-2379-723a-82cf-306755e54396 Co-authored-by: Amp --- app/assets/services/bulk_ingest.py | 8 +++----- app/assets/services/ingest.py | 6 ++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/assets/services/bulk_ingest.py b/app/assets/services/bulk_ingest.py index 658a08067..67aad838f 100644 --- a/app/assets/services/bulk_ingest.py +++ b/app/assets/services/bulk_ingest.py @@ -34,7 +34,7 @@ class SeedAssetSpec(TypedDict): info_name: str tags: list[str] fname: str - metadata: ExtractedMetadata | dict[str, Any] | None + metadata: ExtractedMetadata | None hash: str | None mime_type: str | None job_id: str | None @@ -152,10 +152,8 @@ def batch_insert_seed_assets( # Build user_metadata from extracted metadata or fallback to filename extracted_metadata = spec.get("metadata") - if isinstance(extracted_metadata, dict): - user_metadata: dict[str, Any] | None = extracted_metadata - elif extracted_metadata: - user_metadata = extracted_metadata.to_user_metadata() + if extracted_metadata: + user_metadata: dict[str, Any] | None = extracted_metadata.to_user_metadata() elif spec["fname"]: user_metadata = {"filename": spec["fname"]} else: diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index f6391e1a9..dc53276dc 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -26,6 +26,7 @@ from app.assets.database.queries import ( from app.assets.helpers import get_utc_now, normalize_tags from app.assets.services.bulk_ingest import batch_insert_seed_assets from app.assets.services.file_utils import get_size_and_mtime_ns +from app.assets.services.metadata_extract import extract_file_metadata from app.assets.services.path_utils import ( compute_relative_filename, get_name_and_tags_from_asset_path, @@ -196,6 +197,7 @@ def ingest_existing_file( session.commit() return True + metadata = extract_file_metadata(locator) spec = { "abs_path": abs_path, "size_bytes": size_bytes, @@ -203,9 +205,9 @@ def ingest_existing_file( "info_name": name, "tags": tags, "fname": os.path.basename(abs_path), - "metadata": user_metadata, + "metadata": metadata, "hash": None, - "mime_type": mime_type, + "mime_type": mime_type or metadata.content_type, "job_id": job_id, } result = batch_insert_seed_assets(session, [spec], owner_id=owner_id)