From 6436190143adc9c7c7b8989d78d26264b7c9e974 Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Mon, 23 Feb 2026 17:03:00 -0800 Subject: [PATCH] fix(assets): fix 12 bugs found during code review - Fix _hash_file_obj not restoring file position when orig_pos == 0 - Fix validate_path_within_base swallowing its own ValueError - Guard against tags=None crash in upload_from_temp_path - Filter is_missing references from list_references_page results - Include Decimal in metadata filter isinstance check - Return 403 instead of 404 for PermissionError in update/add_tags routes - Re-disable seeder after non-blocking start with --disable-assets-autoscan - Remove duplicate HashMismatchError/DependencyMissingError from schemas_in - Move _emit_event outside lock in seeder resume() to prevent deadlock - Fix total_q ignoring owner_id in list_tags_with_usage - Capture ORM attributes before commit in resolve_asset_for_download - Remove dead code (if mime_type is None: pass) Co-Authored-By: Claude Opus 4.6 --- app/assets/api/routes.py | 17 +++++++++++++++-- app/assets/api/schemas_in.py | 16 ---------------- app/assets/database/queries/asset_reference.py | 4 +++- app/assets/database/queries/tags.py | 10 ++++++---- app/assets/scanner.py | 2 -- app/assets/seeder.py | 8 ++++---- app/assets/services/asset_management.py | 10 +++++++--- app/assets/services/hashing.py | 3 +-- app/assets/services/ingest.py | 2 ++ app/assets/services/metadata_extract.py | 2 -- app/assets/services/path_utils.py | 5 +++-- main.py | 4 ++-- 12 files changed, 43 insertions(+), 40 deletions(-) diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index 2f845b228..c02d3e96f 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -433,7 +433,11 @@ async def update_asset_route(request: web.Request) -> web.Response: user_metadata=result.ref.user_metadata or {}, updated_at=result.ref.updated_at, ) - except (ValueError, PermissionError) as ve: + except PermissionError as pe: + return _build_error_response( + 403, "FORBIDDEN", str(pe), {"id": reference_id} + ) + except ValueError as ve: return _build_error_response( 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id} ) @@ -544,7 +548,11 @@ async def add_asset_tags(request: web.Request) -> web.Response: already_present=result.already_present, total_tags=result.total_tags, ) - except (ValueError, PermissionError) as ve: + except PermissionError as pe: + return _build_error_response( + 403, "FORBIDDEN", str(pe), {"id": reference_id} + ) + except ValueError as ve: return _build_error_response( 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id} ) @@ -659,6 +667,11 @@ async def seed_assets(request: web.Request) -> web.Response: status=200, ) + # Re-disable after starting: the running thread doesn't check _disabled, + # so this only prevents new scans from auto-starting while this one runs. + if was_disabled: + asset_seeder.disable() + return web.json_response({"status": "started"}, status=202) diff --git a/app/assets/api/schemas_in.py b/app/assets/api/schemas_in.py index 6a976129f..8bfc221d3 100644 --- a/app/assets/api/schemas_in.py +++ b/app/assets/api/schemas_in.py @@ -39,22 +39,6 @@ class AssetNotFoundError(Exception): self.message = message -class HashMismatchError(Exception): - """Uploaded file hash does not match provided hash.""" - - def __init__(self, message: str): - super().__init__(message) - self.message = message - - -class DependencyMissingError(Exception): - """A required dependency is not installed.""" - - def __init__(self, message: str): - super().__init__(message) - self.message = message - - @dataclass class ParsedUpload: """Result of parsing a multipart upload request.""" diff --git a/app/assets/database/queries/asset_reference.py b/app/assets/database/queries/asset_reference.py index 55e087b6e..4a569ba5e 100644 --- a/app/assets/database/queries/asset_reference.py +++ b/app/assets/database/queries/asset_reference.py @@ -141,7 +141,7 @@ def _apply_metadata_filter( if isinstance(value, bool): return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value)) - if isinstance(value, (int, float)): + if isinstance(value, (int, float, Decimal)): num = value if isinstance(value, Decimal) else Decimal(str(value)) return _exists_for_pred(key, AssetReferenceMeta.val_num == num) if isinstance(value, str): @@ -307,6 +307,7 @@ def list_references_page( select(AssetReference) .join(Asset, Asset.id == AssetReference.asset_id) .where(build_visible_owner_clause(owner_id)) + .where(AssetReference.is_missing == False) # noqa: E712 .options(noload(AssetReference.tags)) ) @@ -336,6 +337,7 @@ def list_references_page( .select_from(AssetReference) .join(Asset, Asset.id == AssetReference.asset_id) .where(build_visible_owner_clause(owner_id)) + .where(AssetReference.is_missing == False) # noqa: E712 ) if name_contains: escaped, esc = escape_sql_like_string(name_contains) diff --git a/app/assets/database/queries/tags.py b/app/assets/database/queries/tags.py index d4f64ae1b..047c88793 100644 --- a/app/assets/database/queries/tags.py +++ b/app/assets/database/queries/tags.py @@ -315,11 +315,13 @@ def list_tags_with_usage( escaped, esc = escape_sql_like_string(prefix.strip().lower()) total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc)) if not include_zero: - total_q = total_q.where( - Tag.name.in_( - select(AssetReferenceTag.tag_name).group_by(AssetReferenceTag.tag_name) - ) + visible_tags_sq = ( + select(AssetReferenceTag.tag_name) + .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id) + .where(build_visible_owner_clause(owner_id)) + .group_by(AssetReferenceTag.tag_name) ) + total_q = total_q.where(Tag.name.in_(visible_tags_sq)) rows = (session.execute(q.limit(limit).offset(offset))).all() total = (session.execute(total_q)).scalar_one() diff --git a/app/assets/scanner.py b/app/assets/scanner.py index 003a1d85f..d60082207 100644 --- a/app/assets/scanner.py +++ b/app/assets/scanner.py @@ -329,8 +329,6 @@ def build_asset_specs( logging.warning("Failed to hash %s: %s", abs_p, e) mime_type = metadata.content_type if metadata else None - if mime_type is None: - pass specs.append( { "abs_path": abs_p, diff --git a/app/assets/seeder.py b/app/assets/seeder.py index 7ded8107b..aeb213fcd 100644 --- a/app/assets/seeder.py +++ b/app/assets/seeder.py @@ -247,8 +247,8 @@ class AssetSeeder: return True def resume(self) -> bool: - """Resume a paused scan. - + """Resume a paused scan. + This is a noop if the scan is not in the PAUSED state Returns: @@ -260,8 +260,8 @@ class AssetSeeder: logging.info("Asset seeder resuming") self._state = State.RUNNING self._pause_event.set() - self._emit_event("assets.seed.resumed", {}) - return True + self._emit_event("assets.seed.resumed", {}) + return True def restart( self, diff --git a/app/assets/services/asset_management.py b/app/assets/services/asset_management.py index a29deb435..40233ebd7 100644 --- a/app/assets/services/asset_management.py +++ b/app/assets/services/asset_management.py @@ -287,15 +287,19 @@ def resolve_asset_for_download( f"(asset id={asset.id}, name={ref.name})" ) + # Capture ORM attributes before commit (commit expires loaded objects) + ref_name = ref.name + asset_mime = asset.mime_type + update_reference_access_time(session, reference_id=reference_id) session.commit() ctype = ( - asset.mime_type - or mimetypes.guess_type(ref.name or abs_path)[0] + asset_mime + or mimetypes.guess_type(ref_name or abs_path)[0] or "application/octet-stream" ) - download_name = ref.name or os.path.basename(abs_path) + download_name = ref_name or os.path.basename(abs_path) return DownloadResolutionResult( abs_path=abs_path, content_type=ctype, diff --git a/app/assets/services/hashing.py b/app/assets/services/hashing.py index 38aeae4d7..e92d34aaf 100644 --- a/app/assets/services/hashing.py +++ b/app/assets/services/hashing.py @@ -50,5 +50,4 @@ def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str: h.update(chunk) return h.hexdigest() finally: - if orig_pos != 0: - file_obj.seek(orig_pos) + file_obj.seek(orig_pos) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 38c286fa1..2e046dbd9 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -283,6 +283,8 @@ def upload_from_temp_path( created_new=False, ) + if not tags: + raise ValueError("tags are required for new asset uploads") base_dir, subdirs = resolve_destination_from_tags(tags) dest_dir = os.path.join(base_dir, *subdirs) if subdirs else base_dir os.makedirs(dest_dir, exist_ok=True) diff --git a/app/assets/services/metadata_extract.py b/app/assets/services/metadata_extract.py index 5868036e7..0e0fb8b1c 100644 --- a/app/assets/services/metadata_extract.py +++ b/app/assets/services/metadata_extract.py @@ -325,8 +325,6 @@ def extract_file_metadata( _register_custom_mime_types() mime_type, _ = mimetypes.guess_type(abs_path) meta.content_type = mime_type - if mime_type is None: - pass # Size from stat if stat_result is None: diff --git a/app/assets/services/path_utils.py b/app/assets/services/path_utils.py index 8fdd59c23..d4d21f2e3 100644 --- a/app/assets/services/path_utils.py +++ b/app/assets/services/path_utils.py @@ -54,10 +54,11 @@ def validate_path_within_base(candidate: str, base: str) -> None: cand_abs = os.path.abspath(candidate) base_abs = os.path.abspath(base) try: - if os.path.commonpath([cand_abs, base_abs]) != base_abs: - raise ValueError("destination escapes base directory") + common = os.path.commonpath([cand_abs, base_abs]) except Exception: raise ValueError("invalid destination path") + if common != base_abs: + raise ValueError("destination escapes base directory") def compute_relative_filename(file_path: str) -> str | None: diff --git a/main.py b/main.py index 0249da689..25ef3abd6 100644 --- a/main.py +++ b/main.py @@ -259,9 +259,9 @@ def prompt_worker(q, server_instance): extra_data[k] = sensitive[k] asset_seeder.pause() - + e.execute(item[2], prompt_id, extra_data, item[4]) - + asset_seeder.resume() need_gc = True