From f2ea0bc22c74ca0158c39ffc64a4baa6058798a5 Mon Sep 17 00:00:00 2001 From: bigcat88 Date: Sun, 24 Aug 2025 14:15:21 +0300 Subject: [PATCH] added create_asset_from_hash endpoint --- app/api/assets_routes.py | 30 ++++++++++++ app/api/schemas_in.py | 41 +++++++++++++++- app/api/schemas_out.py | 20 ++++++++ app/assets_manager.py | 101 ++++++++++++++++++++++++++++++--------- app/database/services.py | 54 +++++++++++++++++++++ 5 files changed, 221 insertions(+), 25 deletions(-) diff --git a/app/api/assets_routes.py b/app/api/assets_routes.py index 014e324d7..636100998 100644 --- a/app/api/assets_routes.py +++ b/app/api/assets_routes.py @@ -11,6 +11,15 @@ from . import schemas_in ROUTES = web.RouteTableDef() +@ROUTES.head("/api/assets/hash/{hash}") +async def head_asset_by_hash(request: web.Request) -> web.Response: + hash_str = request.match_info.get("hash", "").strip().lower() + if not hash_str or ":" not in hash_str: + return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:'") + exists = await assets_manager.asset_exists(asset_hash=hash_str) + return web.Response(status=200 if exists else 404) + + @ROUTES.get("/api/assets") async def list_assets(request: web.Request) -> web.Response: query_dict = dict(request.rel_url.query) @@ -95,6 +104,27 @@ async def update_asset(request: web.Request) -> web.Response: return web.json_response(result.model_dump(mode="json"), status=200) +@ROUTES.post("/api/assets/from-hash") +async def create_asset_from_hash(request: web.Request) -> web.Response: + try: + payload = await request.json() + body = schemas_in.CreateFromHashBody.model_validate(payload) + except ValidationError as ve: + return _validation_error_response("INVALID_BODY", ve) + except Exception: + return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.") + + result = await assets_manager.create_asset_from_hash( + hash_str=body.hash, + name=body.name, + tags=body.tags, + user_metadata=body.user_metadata, + ) + if result is None: + return _error_response(404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist") + return web.json_response(result.model_dump(mode="json"), status=201) + + @ROUTES.delete("/api/assets/{id}") async def delete_asset(request: web.Request) -> web.Response: asset_info_id_raw = request.match_info.get("id") diff --git a/app/api/schemas_in.py b/app/api/schemas_in.py index 4e0eb6253..0f07bf19d 100644 --- a/app/api/schemas_in.py +++ b/app/api/schemas_in.py @@ -1,4 +1,4 @@ -from __future__ import annotations +import json from typing import Any, Optional, Literal from pydantic import BaseModel, Field, ConfigDict, field_validator, model_validator, conint @@ -40,7 +40,6 @@ class ListAssetsQuery(BaseModel): if v is None or isinstance(v, dict): return v if isinstance(v, str) and v.strip(): - import json try: parsed = json.loads(v) except Exception as e: @@ -66,6 +65,44 @@ class UpdateAssetBody(BaseModel): return self +class CreateFromHashBody(BaseModel): + model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) + + hash: str + name: str + tags: list[str] = Field(default_factory=list) + user_metadata: dict[str, Any] = Field(default_factory=dict) + + @field_validator("hash") + @classmethod + def _require_blake3(cls, v): + s = (v or "").strip().lower() + if ":" not in s: + raise ValueError("hash must be 'blake3:'") + algo, digest = s.split(":", 1) + if algo != "blake3": + raise ValueError("only canonical 'blake3:' is accepted here") + if not digest or any(c for c in digest if c not in "0123456789abcdef"): + raise ValueError("hash digest must be lowercase hex") + return s + + @field_validator("tags", mode="before") + @classmethod + def _tags_norm(cls, v): + if v is None: + return [] + if isinstance(v, list): + out = [str(t).strip().lower() for t in v if str(t).strip()] + seen = set(); dedup = [] + for t in out: + if t not in seen: + seen.add(t); dedup.append(t) + return dedup + if isinstance(v, str): + return [t.strip().lower() for t in v.split(",") if t.strip()] + return [] + + class TagsListQuery(BaseModel): model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) diff --git a/app/api/schemas_out.py b/app/api/schemas_out.py index f86da3523..0a71b8bc9 100644 --- a/app/api/schemas_out.py +++ b/app/api/schemas_out.py @@ -43,6 +43,26 @@ class AssetUpdated(BaseModel): return v.isoformat() if v else None +class AssetCreated(BaseModel): + id: int + name: str + asset_hash: str + size: Optional[int] = None + mime_type: Optional[str] = None + tags: list[str] = Field(default_factory=list) + user_metadata: dict[str, Any] = Field(default_factory=dict) + preview_hash: Optional[str] = None + created_at: Optional[datetime] = None + last_access_time: Optional[datetime] = None + created_new: bool + + model_config = ConfigDict(from_attributes=True) + + @field_serializer("created_at", "last_access_time") + def _ser_dt(self, v: Optional[datetime], _info): + return v.isoformat() if v else None + + class TagUsage(BaseModel): name: str count: int diff --git a/app/assets_manager.py b/app/assets_manager.py index 2c07db4b2..f92232a3d 100644 --- a/app/assets_manager.py +++ b/app/assets_manager.py @@ -20,10 +20,18 @@ from .database.services import ( fetch_asset_info_and_asset, touch_asset_info_by_id, delete_asset_info_by_id, + asset_exists_by_hash, + get_asset_by_hash, + create_asset_info_for_existing_asset, ) from .api import schemas_out +async def asset_exists(*, asset_hash: str) -> bool: + async with await create_session() as session: + return await asset_exists_by_hash(session, asset_hash=asset_hash) + + def populate_db_with_asset(tags: list[str], file_name: str, file_path: str) -> None: if not args.disable_model_processing: async_to_sync.AsyncToSyncConverter.run_async_in_thread( @@ -69,14 +77,14 @@ async def add_local_asset(tags: list[str], file_name: str, file_path: str) -> No async def list_assets( *, - include_tags: Sequence[str] | None = None, - exclude_tags: Sequence[str] | None = None, + include_tags: Optional[Sequence[str]] = None, + exclude_tags: Optional[Sequence[str]] = None, name_contains: Optional[str] = None, metadata_filter: Optional[dict] = None, limit: int = 20, offset: int = 0, - sort: str | None = "created_at", - order: str | None = "desc", + sort: str = "created_at", + order: str = "desc", ) -> schemas_out.AssetsList: sort = _safe_sort_field(sort) order = "desc" if (order or "desc").lower() not in {"asc", "desc"} else order.lower() @@ -157,9 +165,9 @@ async def resolve_asset_content_for_download( async def update_asset( *, asset_info_id: int, - name: str | None = None, - tags: list[str] | None = None, - user_metadata: dict | None = None, + name: Optional[str] = None, + tags: Optional[list[str]] = None, + user_metadata: Optional[dict] = None, ) -> schemas_out.AssetUpdated: async with await create_session() as session: info = await update_asset_info_full( @@ -192,9 +200,49 @@ async def delete_asset_reference(*, asset_info_id: int) -> bool: return r +async def create_asset_from_hash( + *, + hash_str: str, + name: str, + tags: Optional[list[str]] = None, + user_metadata: Optional[dict] = None, +) -> Optional[schemas_out.AssetCreated]: + canonical = hash_str.strip().lower() + async with await create_session() as session: + asset = await get_asset_by_hash(session, asset_hash=canonical) + if not asset: + return None + + info = await create_asset_info_for_existing_asset( + session, + asset_hash=canonical, + name=_safe_filename(name, fallback=canonical.split(":", 1)[1]), + user_metadata=user_metadata or {}, + tags=tags or [], + tag_origin="manual", + added_by=None, + ) + tag_names = await get_asset_tags(session, asset_info_id=info.id) + await session.commit() + + return schemas_out.AssetCreated( + id=info.id, + name=info.name, + asset_hash=info.asset_hash, + size=int(asset.size_bytes), + mime_type=asset.mime_type, + tags=tag_names, + user_metadata=info.user_metadata or {}, + preview_hash=info.preview_hash, + created_at=info.created_at, + last_access_time=info.last_access_time, + created_new=False, + ) + + async def list_tags( *, - prefix: str | None = None, + prefix: Optional[str] = None, limit: int = 100, offset: int = 0, order: str = "count_desc", @@ -217,26 +265,12 @@ async def list_tags( return schemas_out.TagsList(tags=tags, total=total, has_more=(offset + len(tags)) < total) -def _safe_sort_field(requested: str | None) -> str: - if not requested: - return "created_at" - v = requested.lower() - if v in {"name", "created_at", "updated_at", "size", "last_access_time"}: - return v - return "created_at" - - -def _get_size_mtime_ns(path: str) -> tuple[int, int]: - st = os.stat(path, follow_symlinks=True) - return st.st_size, getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)) - - async def add_tags_to_asset( *, asset_info_id: int, tags: list[str], origin: str = "manual", - added_by: str | None = None, + added_by: Optional[str] = None, ) -> schemas_out.TagsAdd: async with await create_session() as session: data = await add_tags_to_asset_info( @@ -264,3 +298,24 @@ async def remove_tags_from_asset( ) await session.commit() return schemas_out.TagsRemove(**data) + + +def _safe_sort_field(requested: Optional[str]) -> str: + if not requested: + return "created_at" + v = requested.lower() + if v in {"name", "created_at", "updated_at", "size", "last_access_time"}: + return v + return "created_at" + + +def _get_size_mtime_ns(path: str) -> tuple[int, int]: + st = os.stat(path, follow_symlinks=True) + return st.st_size, getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)) + + +def _safe_filename(name: Optional[str] , fallback: str) -> str: + n = os.path.basename((name or "").strip() or fallback) + if n: + return n + return fallback diff --git a/app/database/services.py b/app/database/services.py index 98a5ae624..b916a2055 100644 --- a/app/database/services.py +++ b/app/database/services.py @@ -15,6 +15,20 @@ from .models import Asset, AssetInfo, AssetInfoTag, AssetLocatorState, Tag, Asse from .timeutil import utcnow + +async def asset_exists_by_hash(session: AsyncSession, *, asset_hash: str) -> bool: + row = ( + await session.execute( + select(sa.literal(True)).select_from(Asset).where(Asset.hash == asset_hash).limit(1) + ) + ).first() + return row is not None + + +async def get_asset_by_hash(session: AsyncSession, *, asset_hash: str) -> Optional[Asset]: + return await session.get(Asset, asset_hash) + + async def check_fs_asset_exists_quick( session, *, @@ -393,6 +407,46 @@ async def fetch_asset_info_and_asset(session: AsyncSession, *, asset_info_id: in return pair[0], pair[1] +async def create_asset_info_for_existing_asset( + session: AsyncSession, + *, + asset_hash: str, + name: str, + user_metadata: Optional[dict] = None, + tags: Optional[Sequence[str]] = None, + tag_origin: str = "manual", + added_by: Optional[str] = None, +) -> AssetInfo: + """Create a new AssetInfo referencing an existing Asset (no content write).""" + now = utcnow() + info = AssetInfo( + owner_id=None, + name=name, + asset_hash=asset_hash, + preview_hash=None, + created_at=now, + updated_at=now, + last_access_time=now, + ) + session.add(info) + await session.flush() # get info.id + + if user_metadata is not None: + await replace_asset_info_metadata_projection( + session, asset_info_id=info.id, user_metadata=user_metadata + ) + + if tags is not None: + await set_asset_info_tags( + session, + asset_info_id=info.id, + tags=tags, + origin=tag_origin, + added_by=added_by, + ) + return info + + async def set_asset_info_tags( session: AsyncSession, *,