refactor: use explicit dataclasses instead of ORM objects in service layer
Some checks failed
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run
Build package / Build Test (3.10) (push) Has been cancelled
Build package / Build Test (3.11) (push) Has been cancelled
Build package / Build Test (3.12) (push) Has been cancelled
Build package / Build Test (3.13) (push) Has been cancelled
Build package / Build Test (3.14) (push) Has been cancelled

Replace dict/ORM object returns with explicit dataclasses to fix
DetachedInstanceError when accessing ORM attributes after session closes.

- Add app/assets/services/schemas.py with AssetData, AssetInfoData,
  AssetDetailResult, and RegisterAssetResult dataclasses
- Update asset_management.py and ingest.py to return dataclasses
- Update manager.py to use attribute access on dataclasses
- Fix created_new to be False in create_asset_from_hash (content exists)
- Add DependencyMissingError for better blake3 missing error handling
- Update tests to use attribute access instead of dict subscripting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Luke Mino-Altherr 2026-02-03 18:39:07 -08:00
parent ea01cd665d
commit e3b8e512ca
8 changed files with 169 additions and 69 deletions

View File

@ -12,6 +12,7 @@ from app.assets.api import schemas_in
from app.assets.api.schemas_in import (
AssetNotFoundError,
AssetValidationError,
DependencyMissingError,
HashMismatchError,
UploadError,
)
@ -205,6 +206,8 @@ async def upload_asset(request: web.Request) -> web.Response:
return _build_error_response(404, "ASSET_NOT_FOUND", str(e))
except HashMismatchError as e:
return _build_error_response(400, "HASH_MISMATCH", str(e))
except DependencyMissingError as e:
return _build_error_response(503, "DEPENDENCY_MISSING", e.message)
except Exception:
logging.exception("process_upload failed for owner_id=%s", owner_id)
return _build_error_response(500, "INTERNAL", "Unexpected server error.")

View File

@ -43,6 +43,14 @@ class HashMismatchError(Exception):
pass
class DependencyMissingError(Exception):
"""A required dependency is not installed."""
def __init__(self, message: str):
super().__init__(message)
self.message = message
@dataclass
class ParsedUpload:
"""Result of parsing a multipart upload request."""

View File

@ -21,6 +21,7 @@ from app.assets.api import schemas_out, schemas_in
from app.assets.api.schemas_in import (
AssetNotFoundError,
AssetValidationError,
DependencyMissingError,
HashMismatchError,
ParsedUpload,
)
@ -139,9 +140,8 @@ def get_asset(
if not result:
raise ValueError(f"AssetInfo {asset_info_id} not found")
info = result["info"]
asset = result["asset"]
tag_names = result["tags"]
info = result.info
asset = result.asset
return schemas_out.AssetDetail(
id=info.id,
@ -149,7 +149,7 @@ def get_asset(
asset_hash=asset.hash if asset else None,
size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
mime_type=asset.mime_type if asset else None,
tags=tag_names,
tags=result.tags,
user_metadata=info.user_metadata or {},
preview_id=info.preview_id,
created_at=info.created_at,
@ -189,6 +189,8 @@ def upload_asset_from_temp_path(
) -> schemas_out.AssetCreated:
try:
digest = hashing.compute_blake3_hash(temp_path)
except ImportError as e:
raise DependencyMissingError(str(e))
except Exception as e:
raise RuntimeError(f"failed to hash uploaded file: {e}")
asset_hash = "blake3:" + digest
@ -214,9 +216,8 @@ def upload_asset_from_temp_path(
tag_origin="manual",
owner_id=owner_id,
)
info = result["info"]
asset = result["asset"]
tag_names = result["tags"]
info = result.info
asset = result.asset
return schemas_out.AssetCreated(
id=info.id,
@ -224,7 +225,7 @@ def upload_asset_from_temp_path(
asset_hash=asset.hash,
size=int(asset.size_bytes) if asset.size_bytes is not None else None,
mime_type=asset.mime_type,
tags=tag_names,
tags=result.tags,
user_metadata=info.user_metadata or {},
preview_id=info.preview_id,
created_at=info.created_at,
@ -390,15 +391,14 @@ def update_asset(
tag_origin="manual",
owner_id=owner_id,
)
info = result["info"]
asset = result["asset"]
tag_names = result["tags"]
info = result.info
asset = result.asset
return schemas_out.AssetUpdated(
id=info.id,
name=info.name,
asset_hash=asset.hash if asset else None,
tags=tag_names,
tags=result.tags,
user_metadata=info.user_metadata or {},
updated_at=info.updated_at,
)
@ -414,9 +414,8 @@ def set_asset_preview(
preview_asset_id=preview_asset_id,
owner_id=owner_id,
)
info = result["info"]
asset = result["asset"]
tags = result["tags"]
info = result.info
asset = result.asset
return schemas_out.AssetDetail(
id=info.id,
@ -424,7 +423,7 @@ def set_asset_preview(
asset_hash=asset.hash if asset else None,
size=int(asset.size_bytes) if asset and asset.size_bytes is not None else None,
mime_type=asset.mime_type if asset else None,
tags=tags,
tags=result.tags,
user_metadata=info.user_metadata or {},
preview_id=info.preview_id,
created_at=info.created_at,
@ -462,22 +461,23 @@ def create_asset_from_hash(
tag_origin="manual",
owner_id=owner_id,
)
info = result["info"]
asset = result["asset"]
tag_names = result["tags"]
info = result.info
asset = result.asset
# created_new indicates whether new CONTENT was created, not whether a new AssetInfo was created.
# Since we're referencing an existing hash, the content already exists.
return schemas_out.AssetCreated(
id=info.id,
name=info.name,
asset_hash=asset.hash,
size=int(asset.size_bytes),
mime_type=asset.mime_type,
tags=tag_names,
tags=result.tags,
user_metadata=info.user_metadata or {},
preview_id=info.preview_id,
created_at=info.created_at,
last_access_time=info.last_access_time,
created_new=result["created"],
created_new=False,
)

View File

@ -15,6 +15,11 @@ from app.assets.database.models import Asset
from app.database.db import create_session
from app.assets.helpers import select_best_live_path, get_utc_now
from app.assets.services.path_utils import compute_relative_filename
from app.assets.services.schemas import (
AssetDetailResult,
extract_asset_data,
extract_info_data,
)
from app.assets.database.queries import (
asset_info_exists_for_asset_id,
delete_asset_info_by_id,
@ -30,10 +35,10 @@ from app.assets.database.queries import (
def get_asset_detail(
asset_info_id: str,
owner_id: str = "",
) -> dict | None:
) -> AssetDetailResult | None:
"""
Fetch full asset details including tags.
Returns dict with info, asset, and tags, or None if not found.
Returns AssetDetailResult or None if not found.
"""
with create_session() as session:
result = fetch_asset_info_asset_and_tags(
@ -45,11 +50,11 @@ def get_asset_detail(
return None
info, asset, tags = result
return {
"info": info,
"asset": asset,
"tags": tags,
}
return AssetDetailResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tags,
)
def update_asset_metadata(
@ -59,10 +64,10 @@ def update_asset_metadata(
user_metadata: dict | None = None,
tag_origin: str = "manual",
owner_id: str = "",
) -> dict:
) -> AssetDetailResult:
"""
Update name, tags, and/or metadata on an AssetInfo.
Returns updated info dict with tags.
Returns AssetDetailResult with updated data.
"""
with create_session() as session:
info = get_asset_info_by_id(session, asset_info_id=asset_info_id)
@ -115,17 +120,19 @@ def update_asset_metadata(
asset_info_id=asset_info_id,
owner_id=owner_id,
)
session.commit()
if not result:
raise RuntimeError("State changed during update")
info, asset, tag_list = result
return {
"info": info,
"asset": asset,
"tags": tag_list,
}
# Extract plain data before session closes
detail = AssetDetailResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tag_list,
)
session.commit()
return detail
def delete_asset_reference(
@ -179,10 +186,10 @@ def set_asset_preview(
asset_info_id: str,
preview_asset_id: str | None = None,
owner_id: str = "",
) -> dict:
) -> AssetDetailResult:
"""
Set or clear preview_id on an AssetInfo.
Returns updated asset detail dict.
Returns AssetDetailResult with updated data.
"""
with create_session() as session:
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
@ -204,13 +211,15 @@ def set_asset_preview(
raise RuntimeError("State changed during preview update")
info, asset, tags = result
# Extract plain data before session closes
detail = AssetDetailResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tags,
)
session.commit()
return {
"info": info,
"asset": asset,
"tags": tags,
}
return detail
def _compute_filename_for_asset(session, asset_id: str) -> str | None:

View File

@ -15,6 +15,11 @@ from app.assets.database.models import Asset, Tag
from app.database.db import create_session
from app.assets.helpers import normalize_tags, select_best_live_path
from app.assets.services.path_utils import compute_relative_filename
from app.assets.services.schemas import (
RegisterAssetResult,
extract_asset_data,
extract_info_data,
)
from app.assets.database.queries import (
get_asset_by_hash,
get_or_create_asset_info,
@ -143,11 +148,11 @@ def register_existing_asset(
tags: list[str] | None = None,
tag_origin: str = "manual",
owner_id: str = "",
) -> dict:
) -> RegisterAssetResult:
"""
Create or return existing AssetInfo for an asset that already exists by hash.
Returns dict with asset and info details, or raises ValueError if hash not found.
Returns RegisterAssetResult with plain data.
Raises ValueError if hash not found.
"""
with create_session() as session:
asset = get_asset_by_hash(session, asset_hash=asset_hash)
@ -165,13 +170,15 @@ def register_existing_asset(
if not info_created:
# Return existing info
tag_names = get_asset_tags(session, asset_info_id=info.id)
# Extract plain data before session closes
result = RegisterAssetResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tag_names,
created=False,
)
session.commit()
return {
"info": info,
"asset": asset,
"tags": tag_names,
"created": False,
}
return result
# New info - apply metadata and tags
new_meta = dict(user_metadata or {})
@ -195,14 +202,18 @@ def register_existing_asset(
)
tag_names = get_asset_tags(session, asset_info_id=info.id)
# Refresh to get updated metadata after set_asset_info_metadata
session.refresh(info)
# Extract plain data before session closes
result = RegisterAssetResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tag_names,
created=True,
)
session.commit()
return {
"info": info,
"asset": asset,
"tags": tag_names,
"created": True,
}
return result
def _validate_tags_exist(session, tags: list[str]) -> None:

View File

@ -0,0 +1,69 @@
"""
Service layer data transfer objects.
These dataclasses represent the data returned by service functions,
providing explicit types instead of raw dicts or ORM objects.
"""
from dataclasses import dataclass
from datetime import datetime
@dataclass(frozen=True)
class AssetData:
"""Plain data extracted from an Asset ORM object."""
hash: str
size_bytes: int | None
mime_type: str | None
@dataclass(frozen=True)
class AssetInfoData:
"""Plain data extracted from an AssetInfo ORM object."""
id: str
name: str
user_metadata: dict | None
preview_id: str | None
created_at: datetime
updated_at: datetime
last_access_time: datetime | None
@dataclass(frozen=True)
class AssetDetailResult:
"""Result from get_asset_detail and similar operations."""
info: AssetInfoData
asset: AssetData | None
tags: list[str]
@dataclass(frozen=True)
class RegisterAssetResult:
"""Result from register_existing_asset."""
info: AssetInfoData
asset: AssetData
tags: list[str]
created: bool
def extract_info_data(info) -> AssetInfoData:
"""Extract plain data from an AssetInfo ORM object."""
return AssetInfoData(
id=info.id,
name=info.name,
user_metadata=info.user_metadata,
preview_id=info.preview_id,
created_at=info.created_at,
updated_at=info.updated_at,
last_access_time=info.last_access_time,
)
def extract_asset_data(asset) -> AssetData | None:
"""Extract plain data from an Asset ORM object."""
if asset is None:
return None
return AssetData(
hash=asset.hash,
size_bytes=asset.size_bytes,
mime_type=asset.mime_type,
)

View File

@ -55,9 +55,9 @@ class TestGetAssetDetail:
result = get_asset_detail(asset_info_id=info.id)
assert result is not None
assert result["info"].id == info.id
assert result["asset"].id == asset.id
assert set(result["tags"]) == {"alpha", "beta"}
assert result.info.id == info.id
assert result.asset.hash == asset.hash
assert set(result.tags) == {"alpha", "beta"}
def test_respects_owner_visibility(self, mock_create_session, session: Session):
asset = _make_asset(session)
@ -102,8 +102,8 @@ class TestUpdateAssetMetadata:
tags=["new1", "new2"],
)
assert set(result["tags"]) == {"new1", "new2"}
assert "old" not in result["tags"]
assert set(result.tags) == {"new1", "new2"}
assert "old" not in result.tags
def test_updates_user_metadata(self, mock_create_session, session: Session):
asset = _make_asset(session)

View File

@ -163,8 +163,8 @@ class TestRegisterExistingAsset:
tags=["models"],
)
assert result["created"] is True
assert "models" in result["tags"]
assert result.created is True
assert "models" in result.tags
# Verify by re-fetching from DB
session.expire_all()
@ -197,7 +197,7 @@ class TestRegisterExistingAsset:
owner_id="",
)
assert result["created"] is False
assert result.created is False
# Verify only one AssetInfo exists for this name
session.expire_all()
@ -223,5 +223,5 @@ class TestRegisterExistingAsset:
tags=["alpha", "beta"],
)
assert result["created"] is True
assert set(result["tags"]) == {"alpha", "beta"}
assert result.created is True
assert set(result.tags) == {"alpha", "beta"}