fix(assets): fix 12 bugs found during code review

- Fix _hash_file_obj not restoring file position when orig_pos == 0
- Fix validate_path_within_base swallowing its own ValueError
- Guard against tags=None crash in upload_from_temp_path
- Filter is_missing references from list_references_page results
- Include Decimal in metadata filter isinstance check
- Return 403 instead of 404 for PermissionError in update/add_tags routes
- Re-disable seeder after non-blocking start with --disable-assets-autoscan
- Remove duplicate HashMismatchError/DependencyMissingError from schemas_in
- Move _emit_event outside lock in seeder resume() to prevent deadlock
- Fix total_q ignoring owner_id in list_tags_with_usage
- Capture ORM attributes before commit in resolve_asset_for_download
- Remove dead code (if mime_type is None: pass)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Luke Mino-Altherr 2026-02-23 17:03:00 -08:00
parent 196959472a
commit 6436190143
12 changed files with 43 additions and 40 deletions

View File

@ -433,7 +433,11 @@ async def update_asset_route(request: web.Request) -> web.Response:
user_metadata=result.ref.user_metadata or {}, user_metadata=result.ref.user_metadata or {},
updated_at=result.ref.updated_at, updated_at=result.ref.updated_at,
) )
except (ValueError, PermissionError) as ve: except PermissionError as pe:
return _build_error_response(
403, "FORBIDDEN", str(pe), {"id": reference_id}
)
except ValueError as ve:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id} 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
) )
@ -544,7 +548,11 @@ async def add_asset_tags(request: web.Request) -> web.Response:
already_present=result.already_present, already_present=result.already_present,
total_tags=result.total_tags, total_tags=result.total_tags,
) )
except (ValueError, PermissionError) as ve: except PermissionError as pe:
return _build_error_response(
403, "FORBIDDEN", str(pe), {"id": reference_id}
)
except ValueError as ve:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id} 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
) )
@ -659,6 +667,11 @@ async def seed_assets(request: web.Request) -> web.Response:
status=200, status=200,
) )
# Re-disable after starting: the running thread doesn't check _disabled,
# so this only prevents new scans from auto-starting while this one runs.
if was_disabled:
asset_seeder.disable()
return web.json_response({"status": "started"}, status=202) return web.json_response({"status": "started"}, status=202)

View File

@ -39,22 +39,6 @@ class AssetNotFoundError(Exception):
self.message = message self.message = message
class HashMismatchError(Exception):
"""Uploaded file hash does not match provided hash."""
def __init__(self, message: str):
super().__init__(message)
self.message = message
class DependencyMissingError(Exception):
"""A required dependency is not installed."""
def __init__(self, message: str):
super().__init__(message)
self.message = message
@dataclass @dataclass
class ParsedUpload: class ParsedUpload:
"""Result of parsing a multipart upload request.""" """Result of parsing a multipart upload request."""

View File

@ -141,7 +141,7 @@ def _apply_metadata_filter(
if isinstance(value, bool): if isinstance(value, bool):
return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value)) return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value))
if isinstance(value, (int, float)): if isinstance(value, (int, float, Decimal)):
num = value if isinstance(value, Decimal) else Decimal(str(value)) num = value if isinstance(value, Decimal) else Decimal(str(value))
return _exists_for_pred(key, AssetReferenceMeta.val_num == num) return _exists_for_pred(key, AssetReferenceMeta.val_num == num)
if isinstance(value, str): if isinstance(value, str):
@ -307,6 +307,7 @@ def list_references_page(
select(AssetReference) select(AssetReference)
.join(Asset, Asset.id == AssetReference.asset_id) .join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
.options(noload(AssetReference.tags)) .options(noload(AssetReference.tags))
) )
@ -336,6 +337,7 @@ def list_references_page(
.select_from(AssetReference) .select_from(AssetReference)
.join(Asset, Asset.id == AssetReference.asset_id) .join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
) )
if name_contains: if name_contains:
escaped, esc = escape_sql_like_string(name_contains) escaped, esc = escape_sql_like_string(name_contains)

View File

@ -315,11 +315,13 @@ def list_tags_with_usage(
escaped, esc = escape_sql_like_string(prefix.strip().lower()) escaped, esc = escape_sql_like_string(prefix.strip().lower())
total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc)) total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc))
if not include_zero: if not include_zero:
total_q = total_q.where( visible_tags_sq = (
Tag.name.in_( select(AssetReferenceTag.tag_name)
select(AssetReferenceTag.tag_name).group_by(AssetReferenceTag.tag_name) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
) .where(build_visible_owner_clause(owner_id))
.group_by(AssetReferenceTag.tag_name)
) )
total_q = total_q.where(Tag.name.in_(visible_tags_sq))
rows = (session.execute(q.limit(limit).offset(offset))).all() rows = (session.execute(q.limit(limit).offset(offset))).all()
total = (session.execute(total_q)).scalar_one() total = (session.execute(total_q)).scalar_one()

View File

@ -329,8 +329,6 @@ def build_asset_specs(
logging.warning("Failed to hash %s: %s", abs_p, e) logging.warning("Failed to hash %s: %s", abs_p, e)
mime_type = metadata.content_type if metadata else None mime_type = metadata.content_type if metadata else None
if mime_type is None:
pass
specs.append( specs.append(
{ {
"abs_path": abs_p, "abs_path": abs_p,

View File

@ -247,8 +247,8 @@ class AssetSeeder:
return True return True
def resume(self) -> bool: def resume(self) -> bool:
"""Resume a paused scan. """Resume a paused scan.
This is a noop if the scan is not in the PAUSED state This is a noop if the scan is not in the PAUSED state
Returns: Returns:
@ -260,8 +260,8 @@ class AssetSeeder:
logging.info("Asset seeder resuming") logging.info("Asset seeder resuming")
self._state = State.RUNNING self._state = State.RUNNING
self._pause_event.set() self._pause_event.set()
self._emit_event("assets.seed.resumed", {}) self._emit_event("assets.seed.resumed", {})
return True return True
def restart( def restart(
self, self,

View File

@ -287,15 +287,19 @@ def resolve_asset_for_download(
f"(asset id={asset.id}, name={ref.name})" f"(asset id={asset.id}, name={ref.name})"
) )
# Capture ORM attributes before commit (commit expires loaded objects)
ref_name = ref.name
asset_mime = asset.mime_type
update_reference_access_time(session, reference_id=reference_id) update_reference_access_time(session, reference_id=reference_id)
session.commit() session.commit()
ctype = ( ctype = (
asset.mime_type asset_mime
or mimetypes.guess_type(ref.name or abs_path)[0] or mimetypes.guess_type(ref_name or abs_path)[0]
or "application/octet-stream" or "application/octet-stream"
) )
download_name = ref.name or os.path.basename(abs_path) download_name = ref_name or os.path.basename(abs_path)
return DownloadResolutionResult( return DownloadResolutionResult(
abs_path=abs_path, abs_path=abs_path,
content_type=ctype, content_type=ctype,

View File

@ -50,5 +50,4 @@ def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str:
h.update(chunk) h.update(chunk)
return h.hexdigest() return h.hexdigest()
finally: finally:
if orig_pos != 0: file_obj.seek(orig_pos)
file_obj.seek(orig_pos)

View File

@ -283,6 +283,8 @@ def upload_from_temp_path(
created_new=False, created_new=False,
) )
if not tags:
raise ValueError("tags are required for new asset uploads")
base_dir, subdirs = resolve_destination_from_tags(tags) base_dir, subdirs = resolve_destination_from_tags(tags)
dest_dir = os.path.join(base_dir, *subdirs) if subdirs else base_dir dest_dir = os.path.join(base_dir, *subdirs) if subdirs else base_dir
os.makedirs(dest_dir, exist_ok=True) os.makedirs(dest_dir, exist_ok=True)

View File

@ -325,8 +325,6 @@ def extract_file_metadata(
_register_custom_mime_types() _register_custom_mime_types()
mime_type, _ = mimetypes.guess_type(abs_path) mime_type, _ = mimetypes.guess_type(abs_path)
meta.content_type = mime_type meta.content_type = mime_type
if mime_type is None:
pass
# Size from stat # Size from stat
if stat_result is None: if stat_result is None:

View File

@ -54,10 +54,11 @@ def validate_path_within_base(candidate: str, base: str) -> None:
cand_abs = os.path.abspath(candidate) cand_abs = os.path.abspath(candidate)
base_abs = os.path.abspath(base) base_abs = os.path.abspath(base)
try: try:
if os.path.commonpath([cand_abs, base_abs]) != base_abs: common = os.path.commonpath([cand_abs, base_abs])
raise ValueError("destination escapes base directory")
except Exception: except Exception:
raise ValueError("invalid destination path") raise ValueError("invalid destination path")
if common != base_abs:
raise ValueError("destination escapes base directory")
def compute_relative_filename(file_path: str) -> str | None: def compute_relative_filename(file_path: str) -> str | None:

View File

@ -259,9 +259,9 @@ def prompt_worker(q, server_instance):
extra_data[k] = sensitive[k] extra_data[k] = sensitive[k]
asset_seeder.pause() asset_seeder.pause()
e.execute(item[2], prompt_id, extra_data, item[4]) e.execute(item[2], prompt_id, extra_data, item[4])
asset_seeder.resume() asset_seeder.resume()
need_gc = True need_gc = True