diff --git a/app/assets/services/bulk_ingest.py b/app/assets/services/bulk_ingest.py index 658a08067..67aad838f 100644 --- a/app/assets/services/bulk_ingest.py +++ b/app/assets/services/bulk_ingest.py @@ -34,7 +34,7 @@ class SeedAssetSpec(TypedDict): info_name: str tags: list[str] fname: str - metadata: ExtractedMetadata | dict[str, Any] | None + metadata: ExtractedMetadata | None hash: str | None mime_type: str | None job_id: str | None @@ -152,10 +152,8 @@ def batch_insert_seed_assets( # Build user_metadata from extracted metadata or fallback to filename extracted_metadata = spec.get("metadata") - if isinstance(extracted_metadata, dict): - user_metadata: dict[str, Any] | None = extracted_metadata - elif extracted_metadata: - user_metadata = extracted_metadata.to_user_metadata() + if extracted_metadata: + user_metadata: dict[str, Any] | None = extracted_metadata.to_user_metadata() elif spec["fname"]: user_metadata = {"filename": spec["fname"]} else: diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index f6391e1a9..dc53276dc 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -26,6 +26,7 @@ from app.assets.database.queries import ( from app.assets.helpers import get_utc_now, normalize_tags from app.assets.services.bulk_ingest import batch_insert_seed_assets from app.assets.services.file_utils import get_size_and_mtime_ns +from app.assets.services.metadata_extract import extract_file_metadata from app.assets.services.path_utils import ( compute_relative_filename, get_name_and_tags_from_asset_path, @@ -196,6 +197,7 @@ def ingest_existing_file( session.commit() return True + metadata = extract_file_metadata(locator) spec = { "abs_path": abs_path, "size_bytes": size_bytes, @@ -203,9 +205,9 @@ def ingest_existing_file( "info_name": name, "tags": tags, "fname": os.path.basename(abs_path), - "metadata": user_metadata, + "metadata": metadata, "hash": None, - "mime_type": mime_type, + "mime_type": mime_type or metadata.content_type, "job_id": job_id, } result = batch_insert_seed_assets(session, [spec], owner_id=owner_id)