fix(assets): fix 12 bugs found during code review

- Fix _hash_file_obj not restoring file position when orig_pos == 0
- Fix validate_path_within_base swallowing its own ValueError
- Guard against tags=None crash in upload_from_temp_path
- Filter is_missing references from list_references_page results
- Include Decimal in metadata filter isinstance check
- Return 403 instead of 404 for PermissionError in update/add_tags routes
- Re-disable seeder after non-blocking start with --disable-assets-autoscan
- Remove duplicate HashMismatchError/DependencyMissingError from schemas_in
- Move _emit_event outside lock in seeder resume() to prevent deadlock
- Fix total_q ignoring owner_id in list_tags_with_usage
- Capture ORM attributes before commit in resolve_asset_for_download
- Remove dead code (if mime_type is None: pass)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Luke Mino-Altherr 2026-02-23 17:03:00 -08:00
parent 196959472a
commit 6436190143
12 changed files with 43 additions and 40 deletions

View File

@ -433,7 +433,11 @@ async def update_asset_route(request: web.Request) -> web.Response:
user_metadata=result.ref.user_metadata or {},
updated_at=result.ref.updated_at,
)
except (ValueError, PermissionError) as ve:
except PermissionError as pe:
return _build_error_response(
403, "FORBIDDEN", str(pe), {"id": reference_id}
)
except ValueError as ve:
return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
)
@ -544,7 +548,11 @@ async def add_asset_tags(request: web.Request) -> web.Response:
already_present=result.already_present,
total_tags=result.total_tags,
)
except (ValueError, PermissionError) as ve:
except PermissionError as pe:
return _build_error_response(
403, "FORBIDDEN", str(pe), {"id": reference_id}
)
except ValueError as ve:
return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
)
@ -659,6 +667,11 @@ async def seed_assets(request: web.Request) -> web.Response:
status=200,
)
# Re-disable after starting: the running thread doesn't check _disabled,
# so this only prevents new scans from auto-starting while this one runs.
if was_disabled:
asset_seeder.disable()
return web.json_response({"status": "started"}, status=202)

View File

@ -39,22 +39,6 @@ class AssetNotFoundError(Exception):
self.message = message
class HashMismatchError(Exception):
"""Uploaded file hash does not match provided hash."""
def __init__(self, message: str):
super().__init__(message)
self.message = message
class DependencyMissingError(Exception):
"""A required dependency is not installed."""
def __init__(self, message: str):
super().__init__(message)
self.message = message
@dataclass
class ParsedUpload:
"""Result of parsing a multipart upload request."""

View File

@ -141,7 +141,7 @@ def _apply_metadata_filter(
if isinstance(value, bool):
return _exists_for_pred(key, AssetReferenceMeta.val_bool == bool(value))
if isinstance(value, (int, float)):
if isinstance(value, (int, float, Decimal)):
num = value if isinstance(value, Decimal) else Decimal(str(value))
return _exists_for_pred(key, AssetReferenceMeta.val_num == num)
if isinstance(value, str):
@ -307,6 +307,7 @@ def list_references_page(
select(AssetReference)
.join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
.options(noload(AssetReference.tags))
)
@ -336,6 +337,7 @@ def list_references_page(
.select_from(AssetReference)
.join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
)
if name_contains:
escaped, esc = escape_sql_like_string(name_contains)

View File

@ -315,11 +315,13 @@ def list_tags_with_usage(
escaped, esc = escape_sql_like_string(prefix.strip().lower())
total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc))
if not include_zero:
total_q = total_q.where(
Tag.name.in_(
select(AssetReferenceTag.tag_name).group_by(AssetReferenceTag.tag_name)
)
visible_tags_sq = (
select(AssetReferenceTag.tag_name)
.join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
.where(build_visible_owner_clause(owner_id))
.group_by(AssetReferenceTag.tag_name)
)
total_q = total_q.where(Tag.name.in_(visible_tags_sq))
rows = (session.execute(q.limit(limit).offset(offset))).all()
total = (session.execute(total_q)).scalar_one()

View File

@ -329,8 +329,6 @@ def build_asset_specs(
logging.warning("Failed to hash %s: %s", abs_p, e)
mime_type = metadata.content_type if metadata else None
if mime_type is None:
pass
specs.append(
{
"abs_path": abs_p,

View File

@ -247,8 +247,8 @@ class AssetSeeder:
return True
def resume(self) -> bool:
"""Resume a paused scan.
"""Resume a paused scan.
This is a noop if the scan is not in the PAUSED state
Returns:
@ -260,8 +260,8 @@ class AssetSeeder:
logging.info("Asset seeder resuming")
self._state = State.RUNNING
self._pause_event.set()
self._emit_event("assets.seed.resumed", {})
return True
self._emit_event("assets.seed.resumed", {})
return True
def restart(
self,

View File

@ -287,15 +287,19 @@ def resolve_asset_for_download(
f"(asset id={asset.id}, name={ref.name})"
)
# Capture ORM attributes before commit (commit expires loaded objects)
ref_name = ref.name
asset_mime = asset.mime_type
update_reference_access_time(session, reference_id=reference_id)
session.commit()
ctype = (
asset.mime_type
or mimetypes.guess_type(ref.name or abs_path)[0]
asset_mime
or mimetypes.guess_type(ref_name or abs_path)[0]
or "application/octet-stream"
)
download_name = ref.name or os.path.basename(abs_path)
download_name = ref_name or os.path.basename(abs_path)
return DownloadResolutionResult(
abs_path=abs_path,
content_type=ctype,

View File

@ -50,5 +50,4 @@ def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str:
h.update(chunk)
return h.hexdigest()
finally:
if orig_pos != 0:
file_obj.seek(orig_pos)
file_obj.seek(orig_pos)

View File

@ -283,6 +283,8 @@ def upload_from_temp_path(
created_new=False,
)
if not tags:
raise ValueError("tags are required for new asset uploads")
base_dir, subdirs = resolve_destination_from_tags(tags)
dest_dir = os.path.join(base_dir, *subdirs) if subdirs else base_dir
os.makedirs(dest_dir, exist_ok=True)

View File

@ -325,8 +325,6 @@ def extract_file_metadata(
_register_custom_mime_types()
mime_type, _ = mimetypes.guess_type(abs_path)
meta.content_type = mime_type
if mime_type is None:
pass
# Size from stat
if stat_result is None:

View File

@ -54,10 +54,11 @@ def validate_path_within_base(candidate: str, base: str) -> None:
cand_abs = os.path.abspath(candidate)
base_abs = os.path.abspath(base)
try:
if os.path.commonpath([cand_abs, base_abs]) != base_abs:
raise ValueError("destination escapes base directory")
common = os.path.commonpath([cand_abs, base_abs])
except Exception:
raise ValueError("invalid destination path")
if common != base_abs:
raise ValueError("destination escapes base directory")
def compute_relative_filename(file_path: str) -> str | None:

View File

@ -259,9 +259,9 @@ def prompt_worker(q, server_instance):
extra_data[k] = sensitive[k]
asset_seeder.pause()
e.execute(item[2], prompt_id, extra_data, item[4])
asset_seeder.resume()
need_gc = True