added create_asset_from_hash endpoint

This commit is contained in:
bigcat88 2025-08-24 14:15:21 +03:00
parent 0755e5320a
commit f2ea0bc22c
No known key found for this signature in database
GPG Key ID: 1F0BF0EC3CF22721
5 changed files with 221 additions and 25 deletions

View File

@ -11,6 +11,15 @@ from . import schemas_in
ROUTES = web.RouteTableDef() ROUTES = web.RouteTableDef()
@ROUTES.head("/api/assets/hash/{hash}")
async def head_asset_by_hash(request: web.Request) -> web.Response:
hash_str = request.match_info.get("hash", "").strip().lower()
if not hash_str or ":" not in hash_str:
return _error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
exists = await assets_manager.asset_exists(asset_hash=hash_str)
return web.Response(status=200 if exists else 404)
@ROUTES.get("/api/assets") @ROUTES.get("/api/assets")
async def list_assets(request: web.Request) -> web.Response: async def list_assets(request: web.Request) -> web.Response:
query_dict = dict(request.rel_url.query) query_dict = dict(request.rel_url.query)
@ -95,6 +104,27 @@ async def update_asset(request: web.Request) -> web.Response:
return web.json_response(result.model_dump(mode="json"), status=200) return web.json_response(result.model_dump(mode="json"), status=200)
@ROUTES.post("/api/assets/from-hash")
async def create_asset_from_hash(request: web.Request) -> web.Response:
try:
payload = await request.json()
body = schemas_in.CreateFromHashBody.model_validate(payload)
except ValidationError as ve:
return _validation_error_response("INVALID_BODY", ve)
except Exception:
return _error_response(400, "INVALID_JSON", "Request body must be valid JSON.")
result = await assets_manager.create_asset_from_hash(
hash_str=body.hash,
name=body.name,
tags=body.tags,
user_metadata=body.user_metadata,
)
if result is None:
return _error_response(404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist")
return web.json_response(result.model_dump(mode="json"), status=201)
@ROUTES.delete("/api/assets/{id}") @ROUTES.delete("/api/assets/{id}")
async def delete_asset(request: web.Request) -> web.Response: async def delete_asset(request: web.Request) -> web.Response:
asset_info_id_raw = request.match_info.get("id") asset_info_id_raw = request.match_info.get("id")

View File

@ -1,4 +1,4 @@
from __future__ import annotations import json
from typing import Any, Optional, Literal from typing import Any, Optional, Literal
from pydantic import BaseModel, Field, ConfigDict, field_validator, model_validator, conint from pydantic import BaseModel, Field, ConfigDict, field_validator, model_validator, conint
@ -40,7 +40,6 @@ class ListAssetsQuery(BaseModel):
if v is None or isinstance(v, dict): if v is None or isinstance(v, dict):
return v return v
if isinstance(v, str) and v.strip(): if isinstance(v, str) and v.strip():
import json
try: try:
parsed = json.loads(v) parsed = json.loads(v)
except Exception as e: except Exception as e:
@ -66,6 +65,44 @@ class UpdateAssetBody(BaseModel):
return self return self
class CreateFromHashBody(BaseModel):
model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
hash: str
name: str
tags: list[str] = Field(default_factory=list)
user_metadata: dict[str, Any] = Field(default_factory=dict)
@field_validator("hash")
@classmethod
def _require_blake3(cls, v):
s = (v or "").strip().lower()
if ":" not in s:
raise ValueError("hash must be 'blake3:<hex>'")
algo, digest = s.split(":", 1)
if algo != "blake3":
raise ValueError("only canonical 'blake3:<hex>' is accepted here")
if not digest or any(c for c in digest if c not in "0123456789abcdef"):
raise ValueError("hash digest must be lowercase hex")
return s
@field_validator("tags", mode="before")
@classmethod
def _tags_norm(cls, v):
if v is None:
return []
if isinstance(v, list):
out = [str(t).strip().lower() for t in v if str(t).strip()]
seen = set(); dedup = []
for t in out:
if t not in seen:
seen.add(t); dedup.append(t)
return dedup
if isinstance(v, str):
return [t.strip().lower() for t in v.split(",") if t.strip()]
return []
class TagsListQuery(BaseModel): class TagsListQuery(BaseModel):
model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)

View File

@ -43,6 +43,26 @@ class AssetUpdated(BaseModel):
return v.isoformat() if v else None return v.isoformat() if v else None
class AssetCreated(BaseModel):
id: int
name: str
asset_hash: str
size: Optional[int] = None
mime_type: Optional[str] = None
tags: list[str] = Field(default_factory=list)
user_metadata: dict[str, Any] = Field(default_factory=dict)
preview_hash: Optional[str] = None
created_at: Optional[datetime] = None
last_access_time: Optional[datetime] = None
created_new: bool
model_config = ConfigDict(from_attributes=True)
@field_serializer("created_at", "last_access_time")
def _ser_dt(self, v: Optional[datetime], _info):
return v.isoformat() if v else None
class TagUsage(BaseModel): class TagUsage(BaseModel):
name: str name: str
count: int count: int

View File

@ -20,10 +20,18 @@ from .database.services import (
fetch_asset_info_and_asset, fetch_asset_info_and_asset,
touch_asset_info_by_id, touch_asset_info_by_id,
delete_asset_info_by_id, delete_asset_info_by_id,
asset_exists_by_hash,
get_asset_by_hash,
create_asset_info_for_existing_asset,
) )
from .api import schemas_out from .api import schemas_out
async def asset_exists(*, asset_hash: str) -> bool:
async with await create_session() as session:
return await asset_exists_by_hash(session, asset_hash=asset_hash)
def populate_db_with_asset(tags: list[str], file_name: str, file_path: str) -> None: def populate_db_with_asset(tags: list[str], file_name: str, file_path: str) -> None:
if not args.disable_model_processing: if not args.disable_model_processing:
async_to_sync.AsyncToSyncConverter.run_async_in_thread( async_to_sync.AsyncToSyncConverter.run_async_in_thread(
@ -69,14 +77,14 @@ async def add_local_asset(tags: list[str], file_name: str, file_path: str) -> No
async def list_assets( async def list_assets(
*, *,
include_tags: Sequence[str] | None = None, include_tags: Optional[Sequence[str]] = None,
exclude_tags: Sequence[str] | None = None, exclude_tags: Optional[Sequence[str]] = None,
name_contains: Optional[str] = None, name_contains: Optional[str] = None,
metadata_filter: Optional[dict] = None, metadata_filter: Optional[dict] = None,
limit: int = 20, limit: int = 20,
offset: int = 0, offset: int = 0,
sort: str | None = "created_at", sort: str = "created_at",
order: str | None = "desc", order: str = "desc",
) -> schemas_out.AssetsList: ) -> schemas_out.AssetsList:
sort = _safe_sort_field(sort) sort = _safe_sort_field(sort)
order = "desc" if (order or "desc").lower() not in {"asc", "desc"} else order.lower() order = "desc" if (order or "desc").lower() not in {"asc", "desc"} else order.lower()
@ -157,9 +165,9 @@ async def resolve_asset_content_for_download(
async def update_asset( async def update_asset(
*, *,
asset_info_id: int, asset_info_id: int,
name: str | None = None, name: Optional[str] = None,
tags: list[str] | None = None, tags: Optional[list[str]] = None,
user_metadata: dict | None = None, user_metadata: Optional[dict] = None,
) -> schemas_out.AssetUpdated: ) -> schemas_out.AssetUpdated:
async with await create_session() as session: async with await create_session() as session:
info = await update_asset_info_full( info = await update_asset_info_full(
@ -192,9 +200,49 @@ async def delete_asset_reference(*, asset_info_id: int) -> bool:
return r return r
async def create_asset_from_hash(
*,
hash_str: str,
name: str,
tags: Optional[list[str]] = None,
user_metadata: Optional[dict] = None,
) -> Optional[schemas_out.AssetCreated]:
canonical = hash_str.strip().lower()
async with await create_session() as session:
asset = await get_asset_by_hash(session, asset_hash=canonical)
if not asset:
return None
info = await create_asset_info_for_existing_asset(
session,
asset_hash=canonical,
name=_safe_filename(name, fallback=canonical.split(":", 1)[1]),
user_metadata=user_metadata or {},
tags=tags or [],
tag_origin="manual",
added_by=None,
)
tag_names = await get_asset_tags(session, asset_info_id=info.id)
await session.commit()
return schemas_out.AssetCreated(
id=info.id,
name=info.name,
asset_hash=info.asset_hash,
size=int(asset.size_bytes),
mime_type=asset.mime_type,
tags=tag_names,
user_metadata=info.user_metadata or {},
preview_hash=info.preview_hash,
created_at=info.created_at,
last_access_time=info.last_access_time,
created_new=False,
)
async def list_tags( async def list_tags(
*, *,
prefix: str | None = None, prefix: Optional[str] = None,
limit: int = 100, limit: int = 100,
offset: int = 0, offset: int = 0,
order: str = "count_desc", order: str = "count_desc",
@ -217,26 +265,12 @@ async def list_tags(
return schemas_out.TagsList(tags=tags, total=total, has_more=(offset + len(tags)) < total) return schemas_out.TagsList(tags=tags, total=total, has_more=(offset + len(tags)) < total)
def _safe_sort_field(requested: str | None) -> str:
if not requested:
return "created_at"
v = requested.lower()
if v in {"name", "created_at", "updated_at", "size", "last_access_time"}:
return v
return "created_at"
def _get_size_mtime_ns(path: str) -> tuple[int, int]:
st = os.stat(path, follow_symlinks=True)
return st.st_size, getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000))
async def add_tags_to_asset( async def add_tags_to_asset(
*, *,
asset_info_id: int, asset_info_id: int,
tags: list[str], tags: list[str],
origin: str = "manual", origin: str = "manual",
added_by: str | None = None, added_by: Optional[str] = None,
) -> schemas_out.TagsAdd: ) -> schemas_out.TagsAdd:
async with await create_session() as session: async with await create_session() as session:
data = await add_tags_to_asset_info( data = await add_tags_to_asset_info(
@ -264,3 +298,24 @@ async def remove_tags_from_asset(
) )
await session.commit() await session.commit()
return schemas_out.TagsRemove(**data) return schemas_out.TagsRemove(**data)
def _safe_sort_field(requested: Optional[str]) -> str:
if not requested:
return "created_at"
v = requested.lower()
if v in {"name", "created_at", "updated_at", "size", "last_access_time"}:
return v
return "created_at"
def _get_size_mtime_ns(path: str) -> tuple[int, int]:
st = os.stat(path, follow_symlinks=True)
return st.st_size, getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000))
def _safe_filename(name: Optional[str] , fallback: str) -> str:
n = os.path.basename((name or "").strip() or fallback)
if n:
return n
return fallback

View File

@ -15,6 +15,20 @@ from .models import Asset, AssetInfo, AssetInfoTag, AssetLocatorState, Tag, Asse
from .timeutil import utcnow from .timeutil import utcnow
async def asset_exists_by_hash(session: AsyncSession, *, asset_hash: str) -> bool:
row = (
await session.execute(
select(sa.literal(True)).select_from(Asset).where(Asset.hash == asset_hash).limit(1)
)
).first()
return row is not None
async def get_asset_by_hash(session: AsyncSession, *, asset_hash: str) -> Optional[Asset]:
return await session.get(Asset, asset_hash)
async def check_fs_asset_exists_quick( async def check_fs_asset_exists_quick(
session, session,
*, *,
@ -393,6 +407,46 @@ async def fetch_asset_info_and_asset(session: AsyncSession, *, asset_info_id: in
return pair[0], pair[1] return pair[0], pair[1]
async def create_asset_info_for_existing_asset(
session: AsyncSession,
*,
asset_hash: str,
name: str,
user_metadata: Optional[dict] = None,
tags: Optional[Sequence[str]] = None,
tag_origin: str = "manual",
added_by: Optional[str] = None,
) -> AssetInfo:
"""Create a new AssetInfo referencing an existing Asset (no content write)."""
now = utcnow()
info = AssetInfo(
owner_id=None,
name=name,
asset_hash=asset_hash,
preview_hash=None,
created_at=now,
updated_at=now,
last_access_time=now,
)
session.add(info)
await session.flush() # get info.id
if user_metadata is not None:
await replace_asset_info_metadata_projection(
session, asset_info_id=info.id, user_metadata=user_metadata
)
if tags is not None:
await set_asset_info_tags(
session,
asset_info_id=info.id,
tags=tags,
origin=tag_origin,
added_by=added_by,
)
return info
async def set_asset_info_tags( async def set_asset_info_tags(
session: AsyncSession, session: AsyncSession,
*, *,