refactor(assets): merge AssetInfo and AssetCacheState into AssetReference

This change solves the basename collision bug by using UNIQUE(file_path) on the
unified asset_references table. Key changes:

Database:
- Migration 0005 merges asset_cache_states and asset_infos into asset_references
- AssetReference now contains: cache state fields (file_path, mtime_ns, needs_verify,
  is_missing, enrichment_level) plus info fields (name, owner_id, preview_id, etc.)
- AssetReferenceMeta replaces AssetInfoMeta
- AssetReferenceTag replaces AssetInfoTag
- UNIQUE constraint on file_path prevents duplicate entries for same file

Code:
- New unified query module: asset_reference.py (replaces asset_info.py, cache_state.py)
- Updated scanner, seeder, and services to use AssetReference
- Updated API routes to use reference_id instead of asset_info_id

Tests:
- All 175 unit tests updated and passing
- Integration tests require server environment (not run here)

Amp-Thread-ID: https://ampcode.com/threads/T-019c4fe8-9dcb-75ce-bea8-ea786343a581
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr 2026-02-11 20:03:10 -08:00
parent fd30787e98
commit 6b48144751
36 changed files with 3191 additions and 2327 deletions

View File

@ -0,0 +1,32 @@
"""
Drop unique constraint on assets_info (asset_id, owner_id, name)
Allow multiple files with the same name to reference the same asset.
Revision ID: 0004_drop_asset_info_unique
Revises: 0003_add_enrichment_level
Create Date: 2025-02-11 00:00:00
"""
from alembic import op
import sqlalchemy as sa
revision = "0004_drop_asset_info_unique"
down_revision = "0003_add_enrichment_level"
branch_labels = None
depends_on = None
def upgrade() -> None:
with op.batch_alter_table("assets_info") as batch_op:
batch_op.drop_constraint(
"uq_assets_info_asset_owner_name", type_="unique"
)
def downgrade() -> None:
with op.batch_alter_table("assets_info") as batch_op:
batch_op.create_unique_constraint(
"uq_assets_info_asset_owner_name",
["asset_id", "owner_id", "name"],
)

View File

@ -0,0 +1,422 @@
"""
Merge AssetInfo and AssetCacheState into unified asset_references table.
This migration:
1. Creates asset_references table with combined columns
2. Creates asset_reference_tags and asset_reference_meta tables
3. Migrates data from assets_info and asset_cache_state, merging where unambiguous
4. Migrates tags and metadata
5. Drops old tables
Revision ID: 0005_merge_to_asset_references
Revises: 0004_drop_asset_info_unique_constraint
Create Date: 2025-02-11
"""
# ruff: noqa: E501
import os
import uuid
from datetime import datetime
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
revision = "0005_merge_to_asset_references"
down_revision = "0004_drop_asset_info_unique_constraint"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
# Step 1: Create asset_references table
op.create_table(
"asset_references",
sa.Column("id", sa.String(length=36), primary_key=True),
sa.Column(
"asset_id",
sa.String(length=36),
sa.ForeignKey("assets.id", ondelete="CASCADE"),
nullable=False,
),
# From AssetCacheState
sa.Column("file_path", sa.Text(), nullable=True),
sa.Column("mtime_ns", sa.BigInteger(), nullable=True),
sa.Column(
"needs_verify",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
sa.Column(
"is_missing", sa.Boolean(), nullable=False, server_default=sa.text("false")
),
sa.Column("enrichment_level", sa.Integer(), nullable=False, server_default="0"),
# From AssetInfo
sa.Column("owner_id", sa.String(length=128), nullable=False, server_default=""),
sa.Column("name", sa.String(length=512), nullable=False),
sa.Column(
"preview_id",
sa.String(length=36),
sa.ForeignKey("assets.id", ondelete="SET NULL"),
nullable=True,
),
sa.Column("user_metadata", sa.JSON(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=False), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=False), nullable=False),
sa.Column("last_access_time", sa.DateTime(timezone=False), nullable=False),
# Constraints
sa.CheckConstraint(
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg"
),
sa.CheckConstraint(
"enrichment_level >= 0 AND enrichment_level <= 2",
name="ck_ar_enrichment_level_range",
),
)
# Create unique index on file_path where not null (partial unique).
# SQLite UNIQUE on nullable columns works as expected.
op.create_index(
"uq_asset_references_file_path",
"asset_references",
["file_path"],
unique=True,
)
op.create_index("ix_asset_references_asset_id", "asset_references", ["asset_id"])
op.create_index("ix_asset_references_owner_id", "asset_references", ["owner_id"])
op.create_index("ix_asset_references_name", "asset_references", ["name"])
op.create_index(
"ix_asset_references_is_missing", "asset_references", ["is_missing"]
)
op.create_index(
"ix_asset_references_enrichment_level", "asset_references", ["enrichment_level"]
)
op.create_index(
"ix_asset_references_created_at", "asset_references", ["created_at"]
)
op.create_index(
"ix_asset_references_last_access_time", "asset_references", ["last_access_time"]
)
op.create_index(
"ix_asset_references_owner_name", "asset_references", ["owner_id", "name"]
)
# Step 2: Create asset_reference_tags table
op.create_table(
"asset_reference_tags",
sa.Column(
"asset_reference_id",
sa.String(length=36),
sa.ForeignKey("asset_references.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"tag_name",
sa.String(length=512),
sa.ForeignKey("tags.name", ondelete="RESTRICT"),
nullable=False,
),
sa.Column(
"origin", sa.String(length=32), nullable=False, server_default="manual"
),
sa.Column("added_at", sa.DateTime(timezone=False), nullable=False),
sa.PrimaryKeyConstraint(
"asset_reference_id", "tag_name", name="pk_asset_reference_tags"
),
)
op.create_index(
"ix_asset_reference_tags_tag_name", "asset_reference_tags", ["tag_name"]
)
op.create_index(
"ix_asset_reference_tags_asset_reference_id",
"asset_reference_tags",
["asset_reference_id"],
)
# Step 3: Create asset_reference_meta table
op.create_table(
"asset_reference_meta",
sa.Column(
"asset_reference_id",
sa.String(length=36),
sa.ForeignKey("asset_references.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column("key", sa.String(length=256), nullable=False),
sa.Column("ordinal", sa.Integer(), nullable=False, server_default="0"),
sa.Column("val_str", sa.String(length=2048), nullable=True),
sa.Column("val_num", sa.Numeric(38, 10), nullable=True),
sa.Column("val_bool", sa.Boolean(), nullable=True),
sa.Column("val_json", sa.JSON(), nullable=True),
sa.PrimaryKeyConstraint(
"asset_reference_id", "key", "ordinal", name="pk_asset_reference_meta"
),
)
op.create_index("ix_asset_reference_meta_key", "asset_reference_meta", ["key"])
op.create_index(
"ix_asset_reference_meta_key_val_str",
"asset_reference_meta",
["key", "val_str"],
)
op.create_index(
"ix_asset_reference_meta_key_val_num",
"asset_reference_meta",
["key", "val_num"],
)
op.create_index(
"ix_asset_reference_meta_key_val_bool",
"asset_reference_meta",
["key", "val_bool"],
)
# Step 4: Migrate data
# Create mapping from cache_state to info that should absorb it.
# Merge when: same asset_id AND exactly one cache_state AND basename == name
now = datetime.utcnow().isoformat()
# Find unambiguous matches: assets_info rows that have exactly one matching cache_state
# where basename(file_path) == name AND same asset_id
# We'll do this in Python for clarity and SQLite compatibility
# Get all assets_info rows
info_rows = conn.execute(
text("""
SELECT id, owner_id, name, asset_id, preview_id, user_metadata,
created_at, updated_at, last_access_time
FROM assets_info
""")
).fetchall()
# Get all asset_cache_state rows
cache_rows = conn.execute(
text("""
SELECT id, asset_id, file_path, mtime_ns, needs_verify, is_missing, enrichment_level
FROM asset_cache_state
""")
).fetchall()
# Build mapping: asset_id -> list of cache_state rows
cache_by_asset: dict = {}
for row in cache_rows:
(
cache_id,
asset_id,
file_path,
mtime_ns,
needs_verify,
is_missing,
enrichment_level,
) = row
if asset_id not in cache_by_asset:
cache_by_asset[asset_id] = []
cache_by_asset[asset_id].append(
{
"cache_id": cache_id,
"file_path": file_path,
"mtime_ns": mtime_ns,
"needs_verify": needs_verify,
"is_missing": is_missing,
"enrichment_level": enrichment_level,
}
)
# Track which cache_states get merged (so we don't insert them separately)
merged_cache_ids: set = set()
# Track info_id -> cache_data for merged rows
info_to_cache: dict = {}
for info_row in info_rows:
(
info_id,
owner_id,
name,
asset_id,
preview_id,
user_metadata,
created_at,
updated_at,
last_access,
) = info_row
caches = cache_by_asset.get(asset_id, [])
# Only merge if exactly one cache_state AND basename matches
if len(caches) == 1:
cache = caches[0]
basename = os.path.basename(cache["file_path"])
if basename == name:
merged_cache_ids.add(cache["cache_id"])
info_to_cache[info_id] = cache
# Insert merged and non-merged assets_info rows into asset_references
for info_row in info_rows:
(
info_id,
owner_id,
name,
asset_id,
preview_id,
user_metadata,
created_at,
updated_at,
last_access,
) = info_row
cache = info_to_cache.get(info_id)
if cache:
# Merged row: has file_path and cache data
conn.execute(
text("""
INSERT INTO asset_references (
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
enrichment_level, owner_id, name, preview_id, user_metadata,
created_at, updated_at, last_access_time
) VALUES (
:id, :asset_id, :file_path, :mtime_ns, :needs_verify, :is_missing,
:enrichment_level, :owner_id, :name, :preview_id, :user_metadata,
:created_at, :updated_at, :last_access_time
)
"""),
{
"id": info_id,
"asset_id": asset_id,
"file_path": cache["file_path"],
"mtime_ns": cache["mtime_ns"],
"needs_verify": cache["needs_verify"],
"is_missing": cache["is_missing"],
"enrichment_level": cache["enrichment_level"],
"owner_id": owner_id or "",
"name": name,
"preview_id": preview_id,
"user_metadata": user_metadata,
"created_at": created_at,
"updated_at": updated_at,
"last_access_time": last_access,
},
)
else:
# Non-merged row: no file_path
conn.execute(
text("""
INSERT INTO asset_references (
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
enrichment_level, owner_id, name, preview_id, user_metadata,
created_at, updated_at, last_access_time
) VALUES (
:id, :asset_id, NULL, NULL, false, false, 0,
:owner_id, :name, :preview_id, :user_metadata,
:created_at, :updated_at, :last_access_time
)
"""),
{
"id": info_id,
"asset_id": asset_id,
"owner_id": owner_id or "",
"name": name,
"preview_id": preview_id,
"user_metadata": user_metadata,
"created_at": created_at,
"updated_at": updated_at,
"last_access_time": last_access,
},
)
# Insert remaining (non-merged) cache_state rows as new asset_references
for cache_row in cache_rows:
(
cache_id,
asset_id,
file_path,
mtime_ns,
needs_verify,
is_missing,
enrichment_level,
) = cache_row
if cache_id in merged_cache_ids:
continue
new_id = str(uuid.uuid4())
basename = os.path.basename(file_path) if file_path else "unknown"
conn.execute(
text("""
INSERT INTO asset_references (
id, asset_id, file_path, mtime_ns, needs_verify, is_missing,
enrichment_level, owner_id, name, preview_id, user_metadata,
created_at, updated_at, last_access_time
) VALUES (
:id, :asset_id, :file_path, :mtime_ns, :needs_verify, :is_missing,
:enrichment_level, '', :name, NULL, NULL,
:now, :now, :now
)
"""),
{
"id": new_id,
"asset_id": asset_id,
"file_path": file_path,
"mtime_ns": mtime_ns,
"needs_verify": needs_verify,
"is_missing": is_missing,
"enrichment_level": enrichment_level,
"name": basename,
"now": now,
},
)
# Step 5: Migrate tags (asset_info_id maps directly to asset_reference_id since we reused IDs)
conn.execute(
text("""
INSERT INTO asset_reference_tags (asset_reference_id, tag_name, origin, added_at)
SELECT asset_info_id, tag_name, origin, added_at
FROM asset_info_tags
WHERE asset_info_id IN (SELECT id FROM asset_references)
""")
)
# Step 6: Migrate metadata
conn.execute(
text("""
INSERT INTO asset_reference_meta (asset_reference_id, key, ordinal, val_str, val_num, val_bool, val_json)
SELECT asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json
FROM asset_info_meta
WHERE asset_info_id IN (SELECT id FROM asset_references)
""")
)
# Step 7: Drop old tables
op.drop_index("ix_asset_info_meta_key_val_bool", table_name="asset_info_meta")
op.drop_index("ix_asset_info_meta_key_val_num", table_name="asset_info_meta")
op.drop_index("ix_asset_info_meta_key_val_str", table_name="asset_info_meta")
op.drop_index("ix_asset_info_meta_key", table_name="asset_info_meta")
op.drop_table("asset_info_meta")
op.drop_index("ix_asset_info_tags_asset_info_id", table_name="asset_info_tags")
op.drop_index("ix_asset_info_tags_tag_name", table_name="asset_info_tags")
op.drop_table("asset_info_tags")
op.drop_index("ix_asset_cache_state_asset_id", table_name="asset_cache_state")
op.drop_index("ix_asset_cache_state_file_path", table_name="asset_cache_state")
op.drop_index("ix_asset_cache_state_is_missing", table_name="asset_cache_state")
op.drop_index(
"ix_asset_cache_state_enrichment_level", table_name="asset_cache_state"
)
op.drop_table("asset_cache_state")
op.drop_index("ix_assets_info_owner_name", table_name="assets_info")
op.drop_index("ix_assets_info_last_access_time", table_name="assets_info")
op.drop_index("ix_assets_info_created_at", table_name="assets_info")
op.drop_index("ix_assets_info_name", table_name="assets_info")
op.drop_index("ix_assets_info_asset_id", table_name="assets_info")
op.drop_index("ix_assets_info_owner_id", table_name="assets_info")
op.drop_table("assets_info")
def downgrade() -> None:
# This is a complex migration - downgrade would require careful data splitting
# For safety, we don't support automatic downgrade
raise NotImplementedError(
"Downgrade from 0005_merge_to_asset_references is not supported. "
"Please restore from backup if needed."
)

View File

@ -43,10 +43,10 @@ UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA
def get_query_dict(request: web.Request) -> dict[str, Any]: def get_query_dict(request: web.Request) -> dict[str, Any]:
""" """Gets a dictionary of query parameters from the request.
Gets a dictionary of query parameters from the request.
'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic. request.query is a MultiMapping[str], needs to be converted to a dict
to be validated by Pydantic.
""" """
query_dict = { query_dict = {
key: request.query.getall(key) key: request.query.getall(key)
@ -58,7 +58,8 @@ def get_query_dict(request: web.Request) -> dict[str, Any]:
# Note to any custom node developers reading this code: # Note to any custom node developers reading this code:
# The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same. # The assets system is not yet fully implemented,
# do not rely on the code in /app/assets remaining the same.
def register_assets_system( def register_assets_system(
@ -80,6 +81,7 @@ def _build_error_response(
def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response: def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response:
import json import json
errors = json.loads(ve.json()) errors = json.loads(ve.json())
return _build_error_response(400, code, "Validation failed.", {"errors": errors}) return _build_error_response(400, code, "Validation failed.", {"errors": errors})
@ -142,15 +144,15 @@ async def list_assets_route(request: web.Request) -> web.Response:
summaries = [ summaries = [
schemas_out.AssetSummary( schemas_out.AssetSummary(
id=item.info.id, id=item.ref.id,
name=item.info.name, name=item.ref.name,
asset_hash=item.asset.hash if item.asset else None, asset_hash=item.asset.hash if item.asset else None,
size=int(item.asset.size_bytes) if item.asset else None, size=int(item.asset.size_bytes) if item.asset else None,
mime_type=item.asset.mime_type if item.asset else None, mime_type=item.asset.mime_type if item.asset else None,
tags=item.tags, tags=item.tags,
created_at=item.info.created_at, created_at=item.ref.created_at,
updated_at=item.info.updated_at, updated_at=item.ref.updated_at,
last_access_time=item.info.last_access_time, last_access_time=item.ref.last_access_time,
) )
for item in result.items for item in result.items
] ]
@ -168,40 +170,40 @@ async def get_asset_route(request: web.Request) -> web.Response:
""" """
GET request to get an asset's info as JSON. GET request to get an asset's info as JSON.
""" """
asset_info_id = str(uuid.UUID(request.match_info["id"])) reference_id = str(uuid.UUID(request.match_info["id"]))
try: try:
result = get_asset_detail( result = get_asset_detail(
asset_info_id=asset_info_id, reference_id=reference_id,
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
if not result: if not result:
return _build_error_response( return _build_error_response(
404, 404,
"ASSET_NOT_FOUND", "ASSET_NOT_FOUND",
f"AssetInfo {asset_info_id} not found", f"AssetReference {reference_id} not found",
{"id": asset_info_id}, {"id": reference_id},
) )
payload = schemas_out.AssetDetail( payload = schemas_out.AssetDetail(
id=result.info.id, id=result.ref.id,
name=result.info.name, name=result.ref.name,
asset_hash=result.asset.hash if result.asset else None, asset_hash=result.asset.hash if result.asset else None,
size=int(result.asset.size_bytes) if result.asset else None, size=int(result.asset.size_bytes) if result.asset else None,
mime_type=result.asset.mime_type if result.asset else None, mime_type=result.asset.mime_type if result.asset else None,
tags=result.tags, tags=result.tags,
user_metadata=result.info.user_metadata or {}, user_metadata=result.ref.user_metadata or {},
preview_id=result.info.preview_id, preview_id=result.ref.preview_id,
created_at=result.info.created_at, created_at=result.ref.created_at,
last_access_time=result.info.last_access_time, last_access_time=result.ref.last_access_time,
) )
except ValueError as e: except ValueError as e:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id} 404, "ASSET_NOT_FOUND", str(e), {"id": reference_id}
) )
except Exception: except Exception:
logging.exception( logging.exception(
"get_asset failed for asset_info_id=%s, owner_id=%s", "get_asset failed for reference_id=%s, owner_id=%s",
asset_info_id, reference_id,
USER_MANAGER.get_request_user_id(request), USER_MANAGER.get_request_user_id(request),
) )
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
@ -216,7 +218,7 @@ async def download_asset_content(request: web.Request) -> web.Response:
try: try:
result = resolve_asset_for_download( result = resolve_asset_for_download(
asset_info_id=str(uuid.UUID(request.match_info["id"])), reference_id=str(uuid.UUID(request.match_info["id"])),
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
abs_path = result.abs_path abs_path = result.abs_path
@ -232,16 +234,14 @@ async def download_asset_content(request: web.Request) -> web.Response:
) )
quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'") quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{urllib.parse.quote(quoted)}" encoded = urllib.parse.quote(quoted)
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{encoded}"
file_size = os.path.getsize(abs_path) file_size = os.path.getsize(abs_path)
size_mb = file_size / (1024 * 1024)
logging.info( logging.info(
"download_asset_content: path=%s, size=%d bytes (%.2f MB), content_type=%s, filename=%s", "download_asset_content: path=%s, size=%d bytes (%.2f MB), type=%s, name=%s",
abs_path, abs_path, file_size, size_mb, content_type, filename,
file_size,
file_size / (1024 * 1024),
content_type,
filename,
) )
async def stream_file_chunks(): async def stream_file_chunks():
@ -288,16 +288,16 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response:
) )
payload_out = schemas_out.AssetCreated( payload_out = schemas_out.AssetCreated(
id=result.info.id, id=result.ref.id,
name=result.info.name, name=result.ref.name,
asset_hash=result.asset.hash, asset_hash=result.asset.hash,
size=int(result.asset.size_bytes), size=int(result.asset.size_bytes),
mime_type=result.asset.mime_type, mime_type=result.asset.mime_type,
tags=result.tags, tags=result.tags,
user_metadata=result.info.user_metadata or {}, user_metadata=result.ref.user_metadata or {},
preview_id=result.info.preview_id, preview_id=result.ref.preview_id,
created_at=result.info.created_at, created_at=result.ref.created_at,
last_access_time=result.info.last_access_time, last_access_time=result.ref.last_access_time,
created_new=result.created_new, created_new=result.created_new,
) )
return web.json_response(payload_out.model_dump(mode="json"), status=201) return web.json_response(payload_out.model_dump(mode="json"), status=201)
@ -340,7 +340,7 @@ async def upload_asset(request: web.Request) -> web.Response:
) )
try: try:
# Fast path: if a valid provided hash exists, create AssetInfo without writing anything # Fast path: hash exists, create AssetReference without writing anything
if spec.hash and parsed.provided_hash_exists is True: if spec.hash and parsed.provided_hash_exists is True:
result = create_from_hash( result = create_from_hash(
hash_str=spec.hash, hash_str=spec.hash,
@ -391,16 +391,16 @@ async def upload_asset(request: web.Request) -> web.Response:
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
payload = schemas_out.AssetCreated( payload = schemas_out.AssetCreated(
id=result.info.id, id=result.ref.id,
name=result.info.name, name=result.ref.name,
asset_hash=result.asset.hash, asset_hash=result.asset.hash,
size=int(result.asset.size_bytes), size=int(result.asset.size_bytes),
mime_type=result.asset.mime_type, mime_type=result.asset.mime_type,
tags=result.tags, tags=result.tags,
user_metadata=result.info.user_metadata or {}, user_metadata=result.ref.user_metadata or {},
preview_id=result.info.preview_id, preview_id=result.ref.preview_id,
created_at=result.info.created_at, created_at=result.ref.created_at,
last_access_time=result.info.last_access_time, last_access_time=result.ref.last_access_time,
created_new=result.created_new, created_new=result.created_new,
) )
status = 201 if result.created_new else 200 status = 201 if result.created_new else 200
@ -409,7 +409,7 @@ async def upload_asset(request: web.Request) -> web.Response:
@ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}") @ROUTES.put(f"/api/assets/{{id:{UUID_RE}}}")
async def update_asset_route(request: web.Request) -> web.Response: async def update_asset_route(request: web.Request) -> web.Response:
asset_info_id = str(uuid.UUID(request.match_info["id"])) reference_id = str(uuid.UUID(request.match_info["id"]))
try: try:
body = schemas_in.UpdateAssetBody.model_validate(await request.json()) body = schemas_in.UpdateAssetBody.model_validate(await request.json())
except ValidationError as ve: except ValidationError as ve:
@ -421,27 +421,27 @@ async def update_asset_route(request: web.Request) -> web.Response:
try: try:
result = update_asset_metadata( result = update_asset_metadata(
asset_info_id=asset_info_id, reference_id=reference_id,
name=body.name, name=body.name,
user_metadata=body.user_metadata, user_metadata=body.user_metadata,
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
payload = schemas_out.AssetUpdated( payload = schemas_out.AssetUpdated(
id=result.info.id, id=result.ref.id,
name=result.info.name, name=result.ref.name,
asset_hash=result.asset.hash if result.asset else None, asset_hash=result.asset.hash if result.asset else None,
tags=result.tags, tags=result.tags,
user_metadata=result.info.user_metadata or {}, user_metadata=result.ref.user_metadata or {},
updated_at=result.info.updated_at, updated_at=result.ref.updated_at,
) )
except (ValueError, PermissionError) as ve: except (ValueError, PermissionError) as ve:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id} 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
) )
except Exception: except Exception:
logging.exception( logging.exception(
"update_asset failed for asset_info_id=%s, owner_id=%s", "update_asset failed for reference_id=%s, owner_id=%s",
asset_info_id, reference_id,
USER_MANAGER.get_request_user_id(request), USER_MANAGER.get_request_user_id(request),
) )
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
@ -450,7 +450,7 @@ async def update_asset_route(request: web.Request) -> web.Response:
@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}") @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}")
async def delete_asset_route(request: web.Request) -> web.Response: async def delete_asset_route(request: web.Request) -> web.Response:
asset_info_id = str(uuid.UUID(request.match_info["id"])) reference_id = str(uuid.UUID(request.match_info["id"]))
delete_content_param = request.query.get("delete_content") delete_content_param = request.query.get("delete_content")
delete_content = ( delete_content = (
True True
@ -460,21 +460,21 @@ async def delete_asset_route(request: web.Request) -> web.Response:
try: try:
deleted = delete_asset_reference( deleted = delete_asset_reference(
asset_info_id=asset_info_id, reference_id=reference_id,
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
delete_content_if_orphan=delete_content, delete_content_if_orphan=delete_content,
) )
except Exception: except Exception:
logging.exception( logging.exception(
"delete_asset_reference failed for asset_info_id=%s, owner_id=%s", "delete_asset_reference failed for reference_id=%s, owner_id=%s",
asset_info_id, reference_id,
USER_MANAGER.get_request_user_id(request), USER_MANAGER.get_request_user_id(request),
) )
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
if not deleted: if not deleted:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found." 404, "ASSET_NOT_FOUND", f"AssetReference {reference_id} not found."
) )
return web.Response(status=204) return web.Response(status=204)
@ -490,8 +490,12 @@ async def get_tags(request: web.Request) -> web.Response:
query = schemas_in.TagsListQuery.model_validate(query_map) query = schemas_in.TagsListQuery.model_validate(query_map)
except ValidationError as e: except ValidationError as e:
import json import json
return _build_error_response( return _build_error_response(
400, "INVALID_QUERY", "Invalid query parameters", {"errors": json.loads(e.json())} 400,
"INVALID_QUERY",
"Invalid query parameters",
{"errors": json.loads(e.json())},
) )
rows, total = list_tags( rows, total = list_tags(
@ -515,7 +519,7 @@ async def get_tags(request: web.Request) -> web.Response:
@ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags") @ROUTES.post(f"/api/assets/{{id:{UUID_RE}}}/tags")
async def add_asset_tags(request: web.Request) -> web.Response: async def add_asset_tags(request: web.Request) -> web.Response:
asset_info_id = str(uuid.UUID(request.match_info["id"])) reference_id = str(uuid.UUID(request.match_info["id"]))
try: try:
json_payload = await request.json() json_payload = await request.json()
data = schemas_in.TagsAdd.model_validate(json_payload) data = schemas_in.TagsAdd.model_validate(json_payload)
@ -533,7 +537,7 @@ async def add_asset_tags(request: web.Request) -> web.Response:
try: try:
result = apply_tags( result = apply_tags(
asset_info_id=asset_info_id, reference_id=reference_id,
tags=data.tags, tags=data.tags,
origin="manual", origin="manual",
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
@ -545,12 +549,12 @@ async def add_asset_tags(request: web.Request) -> web.Response:
) )
except (ValueError, PermissionError) as ve: except (ValueError, PermissionError) as ve:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id} 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
) )
except Exception: except Exception:
logging.exception( logging.exception(
"add_tags_to_asset failed for asset_info_id=%s, owner_id=%s", "add_tags_to_asset failed for reference_id=%s, owner_id=%s",
asset_info_id, reference_id,
USER_MANAGER.get_request_user_id(request), USER_MANAGER.get_request_user_id(request),
) )
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
@ -560,7 +564,7 @@ async def add_asset_tags(request: web.Request) -> web.Response:
@ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags") @ROUTES.delete(f"/api/assets/{{id:{UUID_RE}}}/tags")
async def delete_asset_tags(request: web.Request) -> web.Response: async def delete_asset_tags(request: web.Request) -> web.Response:
asset_info_id = str(uuid.UUID(request.match_info["id"])) reference_id = str(uuid.UUID(request.match_info["id"]))
try: try:
json_payload = await request.json() json_payload = await request.json()
data = schemas_in.TagsRemove.model_validate(json_payload) data = schemas_in.TagsRemove.model_validate(json_payload)
@ -578,7 +582,7 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
try: try:
result = remove_tags( result = remove_tags(
asset_info_id=asset_info_id, reference_id=reference_id,
tags=data.tags, tags=data.tags,
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
@ -589,12 +593,12 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
) )
except ValueError as ve: except ValueError as ve:
return _build_error_response( return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id} 404, "ASSET_NOT_FOUND", str(ve), {"id": reference_id}
) )
except Exception: except Exception:
logging.exception( logging.exception(
"remove_tags_from_asset failed for asset_info_id=%s, owner_id=%s", "remove_tags_from_asset failed for reference_id=%s, owner_id=%s",
asset_info_id, reference_id,
USER_MANAGER.get_request_user_id(request), USER_MANAGER.get_request_user_id(request),
) )
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
@ -683,11 +687,11 @@ async def cancel_seed(request: web.Request) -> web.Response:
@ROUTES.post("/api/assets/prune") @ROUTES.post("/api/assets/prune")
async def mark_missing_assets(request: web.Request) -> web.Response: async def mark_missing_assets(request: web.Request) -> web.Response:
"""Mark assets as missing when their cache states point to files outside all known root prefixes. """Mark assets as missing when outside all known root prefixes.
This is a non-destructive soft-delete operation. Assets and their metadata This is a non-destructive soft-delete operation. Assets and metadata
are preserved, but cache states are flagged as missing. They can be restored are preserved, but references are flagged as missing. They can be
if the file reappears in a future scan. restored if the file reappears in a future scan.
Returns: Returns:
200 OK with count of marked assets 200 OK with count of marked assets

View File

@ -13,7 +13,7 @@ from pydantic import (
class UploadError(Exception): class UploadError(Exception):
"""Error during upload parsing with HTTP status and code (used in HTTP layer only).""" """Error during upload parsing with HTTP status and code."""
def __init__(self, status: int, code: str, message: str): def __init__(self, status: int, code: str, message: str):
super().__init__(message) super().__init__(message)
@ -216,14 +216,14 @@ class TagsRemove(TagsAdd):
class UploadAssetSpec(BaseModel): class UploadAssetSpec(BaseModel):
"""Upload Asset operation. """Upload Asset operation.
- tags: ordered; first is root ('models'|'input'|'output'); - tags: ordered; first is root ('models'|'input'|'output');
if root == 'models', second must be a valid category from folder_paths.folder_names_and_paths if root == 'models', second must be a valid category
- name: display name - name: display name
- user_metadata: arbitrary JSON object (optional) - user_metadata: arbitrary JSON object (optional)
- hash: optional canonical 'blake3:<hex>' provided by the client for validation / fast-path - hash: optional canonical 'blake3:<hex>' for validation / fast-path
Files created via this endpoint are stored on disk using the **content hash** as the filename stem Files are stored using the content hash as filename stem.
and the original extension is preserved when available.
""" """
model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)

View File

@ -95,7 +95,7 @@ async def parse_multipart_upload(
file_client_name = (field.filename or "").strip() file_client_name = (field.filename or "").strip()
if provided_hash and provided_hash_exists is True: if provided_hash and provided_hash_exists is True:
# If client supplied a hash that we know exists, drain but do not write to disk # Hash exists - drain file but don't write to disk
try: try:
while True: while True:
chunk = await field.read_chunk(8 * 1024 * 1024) chunk = await field.read_chunk(8 * 1024 * 1024)

View File

@ -16,7 +16,6 @@ from sqlalchemy import (
Numeric, Numeric,
String, String,
Text, Text,
UniqueConstraint,
) )
from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship
@ -37,29 +36,23 @@ class Asset(Base):
DateTime(timezone=False), nullable=False, default=get_utc_now DateTime(timezone=False), nullable=False, default=get_utc_now
) )
infos: Mapped[list[AssetInfo]] = relationship( references: Mapped[list[AssetReference]] = relationship(
"AssetInfo", "AssetReference",
back_populates="asset", back_populates="asset",
primaryjoin=lambda: Asset.id == foreign(AssetInfo.asset_id), primaryjoin=lambda: Asset.id == foreign(AssetReference.asset_id),
foreign_keys=lambda: [AssetInfo.asset_id], foreign_keys=lambda: [AssetReference.asset_id],
cascade="all,delete-orphan", cascade="all,delete-orphan",
passive_deletes=True, passive_deletes=True,
) )
preview_of: Mapped[list[AssetInfo]] = relationship( preview_of: Mapped[list[AssetReference]] = relationship(
"AssetInfo", "AssetReference",
back_populates="preview_asset", back_populates="preview_asset",
primaryjoin=lambda: Asset.id == foreign(AssetInfo.preview_id), primaryjoin=lambda: Asset.id == foreign(AssetReference.preview_id),
foreign_keys=lambda: [AssetInfo.preview_id], foreign_keys=lambda: [AssetReference.preview_id],
viewonly=True, viewonly=True,
) )
cache_states: Mapped[list[AssetCacheState]] = relationship(
back_populates="asset",
cascade="all, delete-orphan",
passive_deletes=True,
)
__table_args__ = ( __table_args__ = (
Index("uq_assets_hash", "hash", unique=True), Index("uq_assets_hash", "hash", unique=True),
Index("ix_assets_mime_type", "mime_type"), Index("ix_assets_mime_type", "mime_type"),
@ -73,54 +66,33 @@ class Asset(Base):
return f"<Asset id={self.id} hash={(self.hash or '')[:12]}>" return f"<Asset id={self.id} hash={(self.hash or '')[:12]}>"
class AssetCacheState(Base): class AssetReference(Base):
__tablename__ = "asset_cache_state" """Unified model combining file cache state and user-facing metadata.
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) Each row represents either:
- A filesystem reference (file_path is set) with cache state
- An API-created reference (file_path is NULL) without cache state
"""
__tablename__ = "asset_references"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
)
asset_id: Mapped[str] = mapped_column( asset_id: Mapped[str] = mapped_column(
String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False
) )
file_path: Mapped[str] = mapped_column(Text, nullable=False)
# Cache state fields (from former AssetCacheState)
file_path: Mapped[str | None] = mapped_column(Text, nullable=True)
mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True) mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_missing: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) is_missing: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
enrichment_level: Mapped[int] = mapped_column(Integer, nullable=False, default=0) enrichment_level: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
asset: Mapped[Asset] = relationship(back_populates="cache_states") # Info fields (from former AssetInfo)
__table_args__ = (
Index("ix_asset_cache_state_file_path", "file_path"),
Index("ix_asset_cache_state_asset_id", "asset_id"),
Index("ix_asset_cache_state_is_missing", "is_missing"),
Index("ix_asset_cache_state_enrichment_level", "enrichment_level"),
CheckConstraint(
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"
),
CheckConstraint(
"enrichment_level >= 0 AND enrichment_level <= 2",
name="ck_acs_enrichment_level_range",
),
UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"),
)
def to_dict(self, include_none: bool = False) -> dict[str, Any]:
return to_dict(self, include_none=include_none)
def __repr__(self) -> str:
return f"<AssetCacheState id={self.id} asset_id={self.asset_id} path={self.file_path!r}>"
class AssetInfo(Base):
__tablename__ = "assets_info"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
)
owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="") owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="")
name: Mapped[str] = mapped_column(String(512), nullable=False) name: Mapped[str] = mapped_column(String(512), nullable=False)
asset_id: Mapped[str] = mapped_column(
String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False
)
preview_id: Mapped[str | None] = mapped_column( preview_id: Mapped[str | None] = mapped_column(
String(36), ForeignKey("assets.id", ondelete="SET NULL") String(36), ForeignKey("assets.id", ondelete="SET NULL")
) )
@ -139,7 +111,7 @@ class AssetInfo(Base):
asset: Mapped[Asset] = relationship( asset: Mapped[Asset] = relationship(
"Asset", "Asset",
back_populates="infos", back_populates="references",
foreign_keys=[asset_id], foreign_keys=[asset_id],
lazy="selectin", lazy="selectin",
) )
@ -149,37 +121,44 @@ class AssetInfo(Base):
foreign_keys=[preview_id], foreign_keys=[preview_id],
) )
metadata_entries: Mapped[list[AssetInfoMeta]] = relationship( metadata_entries: Mapped[list[AssetReferenceMeta]] = relationship(
back_populates="asset_info", back_populates="asset_reference",
cascade="all,delete-orphan", cascade="all,delete-orphan",
passive_deletes=True, passive_deletes=True,
) )
tag_links: Mapped[list[AssetInfoTag]] = relationship( tag_links: Mapped[list[AssetReferenceTag]] = relationship(
back_populates="asset_info", back_populates="asset_reference",
cascade="all,delete-orphan", cascade="all,delete-orphan",
passive_deletes=True, passive_deletes=True,
overlaps="tags,asset_infos", overlaps="tags,asset_references",
) )
tags: Mapped[list[Tag]] = relationship( tags: Mapped[list[Tag]] = relationship(
secondary="asset_info_tags", secondary="asset_reference_tags",
back_populates="asset_infos", back_populates="asset_references",
lazy="selectin", lazy="selectin",
viewonly=True, viewonly=True,
overlaps="tag_links,asset_info_links,asset_infos,tag", overlaps="tag_links,asset_reference_links,asset_references,tag",
) )
__table_args__ = ( __table_args__ = (
UniqueConstraint( Index("uq_asset_references_file_path", "file_path", unique=True),
"asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name" Index("ix_asset_references_asset_id", "asset_id"),
Index("ix_asset_references_owner_id", "owner_id"),
Index("ix_asset_references_name", "name"),
Index("ix_asset_references_is_missing", "is_missing"),
Index("ix_asset_references_enrichment_level", "enrichment_level"),
Index("ix_asset_references_created_at", "created_at"),
Index("ix_asset_references_last_access_time", "last_access_time"),
Index("ix_asset_references_owner_name", "owner_id", "name"),
CheckConstraint(
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_ar_mtime_nonneg"
),
CheckConstraint(
"enrichment_level >= 0 AND enrichment_level <= 2",
name="ck_ar_enrichment_level_range",
), ),
Index("ix_assets_info_owner_name", "owner_id", "name"),
Index("ix_assets_info_owner_id", "owner_id"),
Index("ix_assets_info_asset_id", "asset_id"),
Index("ix_assets_info_name", "name"),
Index("ix_assets_info_created_at", "created_at"),
Index("ix_assets_info_last_access_time", "last_access_time"),
) )
def to_dict(self, include_none: bool = False) -> dict[str, Any]: def to_dict(self, include_none: bool = False) -> dict[str, Any]:
@ -188,14 +167,17 @@ class AssetInfo(Base):
return data return data
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<AssetInfo id={self.id} name={self.name!r} asset_id={self.asset_id}>" path_part = f" path={self.file_path!r}" if self.file_path else ""
return f"<AssetReference id={self.id} name={self.name!r}{path_part}>"
class AssetInfoMeta(Base): class AssetReferenceMeta(Base):
__tablename__ = "asset_info_meta" __tablename__ = "asset_reference_meta"
asset_info_id: Mapped[str] = mapped_column( asset_reference_id: Mapped[str] = mapped_column(
String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True String(36),
ForeignKey("asset_references.id", ondelete="CASCADE"),
primary_key=True,
) )
key: Mapped[str] = mapped_column(String(256), primary_key=True) key: Mapped[str] = mapped_column(String(256), primary_key=True)
ordinal: Mapped[int] = mapped_column(Integer, primary_key=True, default=0) ordinal: Mapped[int] = mapped_column(Integer, primary_key=True, default=0)
@ -205,21 +187,25 @@ class AssetInfoMeta(Base):
val_bool: Mapped[bool | None] = mapped_column(Boolean, nullable=True) val_bool: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
val_json: Mapped[Any | None] = mapped_column(JSON(none_as_null=True), nullable=True) val_json: Mapped[Any | None] = mapped_column(JSON(none_as_null=True), nullable=True)
asset_info: Mapped[AssetInfo] = relationship(back_populates="metadata_entries") asset_reference: Mapped[AssetReference] = relationship(
back_populates="metadata_entries"
)
__table_args__ = ( __table_args__ = (
Index("ix_asset_info_meta_key", "key"), Index("ix_asset_reference_meta_key", "key"),
Index("ix_asset_info_meta_key_val_str", "key", "val_str"), Index("ix_asset_reference_meta_key_val_str", "key", "val_str"),
Index("ix_asset_info_meta_key_val_num", "key", "val_num"), Index("ix_asset_reference_meta_key_val_num", "key", "val_num"),
Index("ix_asset_info_meta_key_val_bool", "key", "val_bool"), Index("ix_asset_reference_meta_key_val_bool", "key", "val_bool"),
) )
class AssetInfoTag(Base): class AssetReferenceTag(Base):
__tablename__ = "asset_info_tags" __tablename__ = "asset_reference_tags"
asset_info_id: Mapped[str] = mapped_column( asset_reference_id: Mapped[str] = mapped_column(
String(36), ForeignKey("assets_info.id", ondelete="CASCADE"), primary_key=True String(36),
ForeignKey("asset_references.id", ondelete="CASCADE"),
primary_key=True,
) )
tag_name: Mapped[str] = mapped_column( tag_name: Mapped[str] = mapped_column(
String(512), ForeignKey("tags.name", ondelete="RESTRICT"), primary_key=True String(512), ForeignKey("tags.name", ondelete="RESTRICT"), primary_key=True
@ -229,12 +215,12 @@ class AssetInfoTag(Base):
DateTime(timezone=False), nullable=False, default=get_utc_now DateTime(timezone=False), nullable=False, default=get_utc_now
) )
asset_info: Mapped[AssetInfo] = relationship(back_populates="tag_links") asset_reference: Mapped[AssetReference] = relationship(back_populates="tag_links")
tag: Mapped[Tag] = relationship(back_populates="asset_info_links") tag: Mapped[Tag] = relationship(back_populates="asset_reference_links")
__table_args__ = ( __table_args__ = (
Index("ix_asset_info_tags_tag_name", "tag_name"), Index("ix_asset_reference_tags_tag_name", "tag_name"),
Index("ix_asset_info_tags_asset_info_id", "asset_info_id"), Index("ix_asset_reference_tags_asset_reference_id", "asset_reference_id"),
) )
@ -244,15 +230,15 @@ class Tag(Base):
name: Mapped[str] = mapped_column(String(512), primary_key=True) name: Mapped[str] = mapped_column(String(512), primary_key=True)
tag_type: Mapped[str] = mapped_column(String(32), nullable=False, default="user") tag_type: Mapped[str] = mapped_column(String(32), nullable=False, default="user")
asset_info_links: Mapped[list[AssetInfoTag]] = relationship( asset_reference_links: Mapped[list[AssetReferenceTag]] = relationship(
back_populates="tag", back_populates="tag",
overlaps="asset_infos,tags", overlaps="asset_references,tags",
) )
asset_infos: Mapped[list[AssetInfo]] = relationship( asset_references: Mapped[list[AssetReference]] = relationship(
secondary="asset_info_tags", secondary="asset_reference_tags",
back_populates="tags", back_populates="tags",
viewonly=True, viewonly=True,
overlaps="asset_info_links,tag_links,tags,asset_info", overlaps="asset_reference_links,tag_links,tags,asset_reference",
) )
__table_args__ = (Index("ix_tags_tag_type", "tag_type"),) __table_args__ = (Index("ix_tags_tag_type", "tag_type"),)

View File

@ -3,59 +3,60 @@ from app.assets.database.queries.asset import (
bulk_insert_assets, bulk_insert_assets,
get_asset_by_hash, get_asset_by_hash,
get_existing_asset_ids, get_existing_asset_ids,
reassign_asset_references,
update_asset_hash_and_mime,
upsert_asset, upsert_asset,
) )
from app.assets.database.queries.asset_info import ( from app.assets.database.queries.asset_reference import (
asset_info_exists_for_asset_id,
bulk_insert_asset_infos_ignore_conflicts,
delete_asset_info_by_id,
fetch_asset_info_and_asset,
fetch_asset_info_asset_and_tags,
get_asset_info_by_id,
get_asset_info_ids_by_ids,
get_or_create_asset_info,
insert_asset_info,
list_asset_infos_page,
set_asset_info_metadata,
set_asset_info_preview,
update_asset_info_access_time,
update_asset_info_name,
update_asset_info_timestamps,
update_asset_info_updated_at,
)
from app.assets.database.queries.cache_state import (
CacheStateRow, CacheStateRow,
UnenrichedAssetRow, UnenrichedReferenceRow,
bulk_insert_cache_states_ignore_conflicts, bulk_insert_references_ignore_conflicts,
bulk_update_enrichment_level, bulk_update_enrichment_level,
bulk_update_is_missing, bulk_update_is_missing,
bulk_update_needs_verify, bulk_update_needs_verify,
convert_metadata_to_rows,
delete_assets_by_ids, delete_assets_by_ids,
delete_cache_states_by_ids,
delete_orphaned_seed_asset, delete_orphaned_seed_asset,
get_cache_states_by_paths_and_asset_ids, delete_reference_by_id,
get_cache_states_for_prefixes, delete_references_by_ids,
get_unenriched_cache_states, fetch_reference_and_asset,
fetch_reference_asset_and_tags,
get_or_create_reference,
get_reference_by_file_path,
get_reference_by_id,
get_reference_ids_by_ids,
get_references_by_paths_and_asset_ids,
get_references_for_prefixes,
get_unenriched_references,
get_unreferenced_unhashed_asset_ids, get_unreferenced_unhashed_asset_ids,
list_cache_states_by_asset_id, insert_reference,
mark_cache_states_missing_outside_prefixes, list_references_by_asset_id,
restore_cache_states_by_paths, list_references_page,
mark_references_missing_outside_prefixes,
reference_exists_for_asset_id,
restore_references_by_paths,
set_reference_metadata,
set_reference_preview,
update_enrichment_level, update_enrichment_level,
upsert_cache_state, update_reference_access_time,
update_reference_name,
update_reference_timestamps,
update_reference_updated_at,
upsert_reference,
) )
from app.assets.database.queries.tags import ( from app.assets.database.queries.tags import (
AddTagsDict, AddTagsDict,
RemoveTagsDict, RemoveTagsDict,
SetTagsDict, SetTagsDict,
add_missing_tag_for_asset_id, add_missing_tag_for_asset_id,
add_tags_to_asset_info, add_tags_to_reference,
bulk_insert_tags_and_meta, bulk_insert_tags_and_meta,
ensure_tags_exist, ensure_tags_exist,
get_asset_tags, get_reference_tags,
list_tags_with_usage, list_tags_with_usage,
remove_missing_tag_for_asset_id, remove_missing_tag_for_asset_id,
remove_tags_from_asset_info, remove_tags_from_reference,
set_asset_info_tags, set_reference_tags,
) )
__all__ = [ __all__ = [
@ -63,51 +64,54 @@ __all__ = [
"CacheStateRow", "CacheStateRow",
"RemoveTagsDict", "RemoveTagsDict",
"SetTagsDict", "SetTagsDict",
"UnenrichedAssetRow", "UnenrichedReferenceRow",
"add_missing_tag_for_asset_id", "add_missing_tag_for_asset_id",
"add_tags_to_asset_info", "add_tags_to_reference",
"asset_exists_by_hash", "asset_exists_by_hash",
"asset_info_exists_for_asset_id",
"bulk_insert_asset_infos_ignore_conflicts",
"bulk_insert_assets", "bulk_insert_assets",
"bulk_insert_cache_states_ignore_conflicts", "bulk_insert_references_ignore_conflicts",
"bulk_insert_tags_and_meta", "bulk_insert_tags_and_meta",
"bulk_update_enrichment_level", "bulk_update_enrichment_level",
"bulk_update_is_missing", "bulk_update_is_missing",
"bulk_update_needs_verify", "bulk_update_needs_verify",
"delete_asset_info_by_id", "convert_metadata_to_rows",
"delete_assets_by_ids", "delete_assets_by_ids",
"delete_cache_states_by_ids",
"delete_orphaned_seed_asset", "delete_orphaned_seed_asset",
"delete_reference_by_id",
"delete_references_by_ids",
"ensure_tags_exist", "ensure_tags_exist",
"fetch_asset_info_and_asset", "fetch_reference_and_asset",
"fetch_asset_info_asset_and_tags", "fetch_reference_asset_and_tags",
"get_asset_by_hash", "get_asset_by_hash",
"get_existing_asset_ids", "get_existing_asset_ids",
"get_asset_info_by_id", "get_or_create_reference",
"get_asset_info_ids_by_ids", "get_reference_by_file_path",
"get_asset_tags", "get_reference_by_id",
"get_cache_states_by_paths_and_asset_ids", "get_reference_ids_by_ids",
"get_cache_states_for_prefixes", "get_reference_tags",
"get_or_create_asset_info", "get_references_by_paths_and_asset_ids",
"get_unenriched_cache_states", "get_references_for_prefixes",
"get_unenriched_references",
"get_unreferenced_unhashed_asset_ids", "get_unreferenced_unhashed_asset_ids",
"insert_asset_info", "insert_reference",
"list_asset_infos_page", "list_references_by_asset_id",
"list_cache_states_by_asset_id", "list_references_page",
"list_tags_with_usage", "list_tags_with_usage",
"mark_cache_states_missing_outside_prefixes", "mark_references_missing_outside_prefixes",
"reassign_asset_references",
"reference_exists_for_asset_id",
"remove_missing_tag_for_asset_id", "remove_missing_tag_for_asset_id",
"remove_tags_from_asset_info", "remove_tags_from_reference",
"restore_cache_states_by_paths", "restore_references_by_paths",
"set_asset_info_metadata", "set_reference_metadata",
"set_asset_info_preview", "set_reference_preview",
"set_asset_info_tags", "set_reference_tags",
"update_asset_info_access_time", "update_asset_hash_and_mime",
"update_asset_info_name",
"update_asset_info_timestamps",
"update_asset_info_updated_at",
"update_enrichment_level", "update_enrichment_level",
"update_reference_access_time",
"update_reference_name",
"update_reference_timestamps",
"update_reference_updated_at",
"upsert_asset", "upsert_asset",
"upsert_cache_state", "upsert_reference",
] ]

View File

@ -82,7 +82,7 @@ def bulk_insert_assets(
session: Session, session: Session,
rows: list[dict], rows: list[dict],
) -> None: ) -> None:
"""Bulk insert Asset rows. Each dict should have: id, hash, size_bytes, mime_type, created_at.""" """Bulk insert Asset rows with ON CONFLICT DO NOTHING on hash."""
if not rows: if not rows:
return return
ins = sqlite.insert(Asset).on_conflict_do_nothing(index_elements=[Asset.hash]) ins = sqlite.insert(Asset).on_conflict_do_nothing(index_elements=[Asset.hash])
@ -101,3 +101,39 @@ def get_existing_asset_ids(
select(Asset.id).where(Asset.id.in_(asset_ids)) select(Asset.id).where(Asset.id.in_(asset_ids))
).fetchall() ).fetchall()
return {row[0] for row in rows} return {row[0] for row in rows}
def update_asset_hash_and_mime(
session: Session,
asset_id: str,
asset_hash: str | None = None,
mime_type: str | None = None,
) -> bool:
"""Update asset hash and/or mime_type. Returns True if asset was found."""
asset = session.get(Asset, asset_id)
if not asset:
return False
if asset_hash is not None:
asset.hash = asset_hash
if mime_type is not None:
asset.mime_type = mime_type
return True
def reassign_asset_references(
session: Session,
from_asset_id: str,
to_asset_id: str,
reference_id: str,
) -> None:
"""Reassign a reference from one asset to another.
Used when merging a stub asset into an existing asset with the same hash.
"""
from app.assets.database.models import AssetReference
ref = session.get(AssetReference, reference_id)
if ref:
ref.asset_id = to_asset_id
session.flush()

View File

@ -1,527 +0,0 @@
from collections import defaultdict
from datetime import datetime
from decimal import Decimal
from typing import Sequence
import sqlalchemy as sa
from sqlalchemy import delete, exists, select
from sqlalchemy.dialects import sqlite
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, contains_eager, noload
from app.assets.database.models import (
Asset,
AssetInfo,
AssetInfoMeta,
AssetInfoTag,
Tag,
)
from app.assets.database.queries.common import (
MAX_BIND_PARAMS,
build_visible_owner_clause,
calculate_rows_per_statement,
iter_chunks,
)
from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
def _check_is_scalar(v):
if v is None:
return True
if isinstance(v, bool):
return True
if isinstance(v, (int, float, Decimal, str)):
return True
return False
def _scalar_to_row(key: str, ordinal: int, value) -> dict:
"""Convert a scalar value to a typed projection row."""
if value is None:
return {
"key": key,
"ordinal": ordinal,
"val_str": None,
"val_num": None,
"val_bool": None,
"val_json": None,
}
if isinstance(value, bool):
return {"key": key, "ordinal": ordinal, "val_bool": bool(value)}
if isinstance(value, (int, float, Decimal)):
num = value if isinstance(value, Decimal) else Decimal(str(value))
return {"key": key, "ordinal": ordinal, "val_num": num}
if isinstance(value, str):
return {"key": key, "ordinal": ordinal, "val_str": value}
return {"key": key, "ordinal": ordinal, "val_json": value}
def convert_metadata_to_rows(key: str, value) -> list[dict]:
"""
Turn a metadata key/value into typed projection rows.
Returns list[dict] with keys:
key, ordinal, and one of val_str / val_num / val_bool / val_json (others None)
"""
if value is None:
return [_scalar_to_row(key, 0, None)]
if _check_is_scalar(value):
return [_scalar_to_row(key, 0, value)]
if isinstance(value, list):
if all(_check_is_scalar(x) for x in value):
return [_scalar_to_row(key, i, x) for i, x in enumerate(value)]
return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)]
return [{"key": key, "ordinal": 0, "val_json": value}]
def _apply_tag_filters(
stmt: sa.sql.Select,
include_tags: Sequence[str] | None = None,
exclude_tags: Sequence[str] | None = None,
) -> sa.sql.Select:
"""include_tags: every tag must be present; exclude_tags: none may be present."""
include_tags = normalize_tags(include_tags)
exclude_tags = normalize_tags(exclude_tags)
if include_tags:
for tag_name in include_tags:
stmt = stmt.where(
exists().where(
(AssetInfoTag.asset_info_id == AssetInfo.id)
& (AssetInfoTag.tag_name == tag_name)
)
)
if exclude_tags:
stmt = stmt.where(
~exists().where(
(AssetInfoTag.asset_info_id == AssetInfo.id)
& (AssetInfoTag.tag_name.in_(exclude_tags))
)
)
return stmt
def _apply_metadata_filter(
stmt: sa.sql.Select,
metadata_filter: dict | None = None,
) -> sa.sql.Select:
"""Apply filters using asset_info_meta projection table."""
if not metadata_filter:
return stmt
def _exists_for_pred(key: str, *preds) -> sa.sql.ClauseElement:
return sa.exists().where(
AssetInfoMeta.asset_info_id == AssetInfo.id,
AssetInfoMeta.key == key,
*preds,
)
def _exists_clause_for_value(key: str, value) -> sa.sql.ClauseElement:
if value is None:
no_row_for_key = sa.not_(
sa.exists().where(
AssetInfoMeta.asset_info_id == AssetInfo.id,
AssetInfoMeta.key == key,
)
)
null_row = _exists_for_pred(
key,
AssetInfoMeta.val_json.is_(None),
AssetInfoMeta.val_str.is_(None),
AssetInfoMeta.val_num.is_(None),
AssetInfoMeta.val_bool.is_(None),
)
return sa.or_(no_row_for_key, null_row)
if isinstance(value, bool):
return _exists_for_pred(key, AssetInfoMeta.val_bool == bool(value))
if isinstance(value, (int, float)):
num = value if isinstance(value, Decimal) else Decimal(str(value))
return _exists_for_pred(key, AssetInfoMeta.val_num == num)
if isinstance(value, str):
return _exists_for_pred(key, AssetInfoMeta.val_str == value)
return _exists_for_pred(key, AssetInfoMeta.val_json == value)
for k, v in metadata_filter.items():
if isinstance(v, list):
ors = [_exists_clause_for_value(k, elem) for elem in v]
if ors:
stmt = stmt.where(sa.or_(*ors))
else:
stmt = stmt.where(_exists_clause_for_value(k, v))
return stmt
def asset_info_exists_for_asset_id(
session: Session,
asset_id: str,
) -> bool:
q = (
select(sa.literal(True))
.select_from(AssetInfo)
.where(AssetInfo.asset_id == asset_id)
.limit(1)
)
return (session.execute(q)).first() is not None
def get_asset_info_by_id(
session: Session,
asset_info_id: str,
) -> AssetInfo | None:
return session.get(AssetInfo, asset_info_id)
def insert_asset_info(
session: Session,
asset_id: str,
owner_id: str,
name: str,
preview_id: str | None = None,
) -> AssetInfo | None:
"""Insert a new AssetInfo. Returns None if unique constraint violated."""
now = get_utc_now()
try:
with session.begin_nested():
info = AssetInfo(
owner_id=owner_id,
name=name,
asset_id=asset_id,
preview_id=preview_id,
created_at=now,
updated_at=now,
last_access_time=now,
)
session.add(info)
session.flush()
return info
except IntegrityError:
return None
def get_or_create_asset_info(
session: Session,
asset_id: str,
owner_id: str,
name: str,
preview_id: str | None = None,
) -> tuple[AssetInfo, bool]:
"""Get existing or create new AssetInfo. Returns (info, created)."""
info = insert_asset_info(
session,
asset_id=asset_id,
owner_id=owner_id,
name=name,
preview_id=preview_id,
)
if info:
return info, True
existing = (
session.execute(
select(AssetInfo)
.where(
AssetInfo.asset_id == asset_id,
AssetInfo.name == name,
AssetInfo.owner_id == owner_id,
)
.limit(1)
)
.unique()
.scalar_one_or_none()
)
if not existing:
raise RuntimeError("Failed to find AssetInfo after insert conflict.")
return existing, False
def update_asset_info_timestamps(
session: Session,
asset_info: AssetInfo,
preview_id: str | None = None,
) -> None:
"""Update timestamps and optionally preview_id on existing AssetInfo."""
now = get_utc_now()
if preview_id and asset_info.preview_id != preview_id:
asset_info.preview_id = preview_id
asset_info.updated_at = now
if asset_info.last_access_time < now:
asset_info.last_access_time = now
session.flush()
def list_asset_infos_page(
session: Session,
owner_id: str = "",
include_tags: Sequence[str] | None = None,
exclude_tags: Sequence[str] | None = None,
name_contains: str | None = None,
metadata_filter: dict | None = None,
limit: int = 20,
offset: int = 0,
sort: str = "created_at",
order: str = "desc",
) -> tuple[list[AssetInfo], dict[str, list[str]], int]:
base = (
select(AssetInfo)
.join(Asset, Asset.id == AssetInfo.asset_id)
.options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags))
.where(build_visible_owner_clause(owner_id))
)
if name_contains:
escaped, esc = escape_sql_like_string(name_contains)
base = base.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
base = _apply_tag_filters(base, include_tags, exclude_tags)
base = _apply_metadata_filter(base, metadata_filter)
sort = (sort or "created_at").lower()
order = (order or "desc").lower()
sort_map = {
"name": AssetInfo.name,
"created_at": AssetInfo.created_at,
"updated_at": AssetInfo.updated_at,
"last_access_time": AssetInfo.last_access_time,
"size": Asset.size_bytes,
}
sort_col = sort_map.get(sort, AssetInfo.created_at)
sort_exp = sort_col.desc() if order == "desc" else sort_col.asc()
base = base.order_by(sort_exp).limit(limit).offset(offset)
count_stmt = (
select(sa.func.count())
.select_from(AssetInfo)
.join(Asset, Asset.id == AssetInfo.asset_id)
.where(build_visible_owner_clause(owner_id))
)
if name_contains:
escaped, esc = escape_sql_like_string(name_contains)
count_stmt = count_stmt.where(AssetInfo.name.ilike(f"%{escaped}%", escape=esc))
count_stmt = _apply_tag_filters(count_stmt, include_tags, exclude_tags)
count_stmt = _apply_metadata_filter(count_stmt, metadata_filter)
total = int((session.execute(count_stmt)).scalar_one() or 0)
infos = (session.execute(base)).unique().scalars().all()
id_list: list[str] = [i.id for i in infos]
tag_map: dict[str, list[str]] = defaultdict(list)
if id_list:
rows = session.execute(
select(AssetInfoTag.asset_info_id, Tag.name)
.join(Tag, Tag.name == AssetInfoTag.tag_name)
.where(AssetInfoTag.asset_info_id.in_(id_list))
.order_by(AssetInfoTag.added_at)
)
for aid, tag_name in rows.all():
tag_map[aid].append(tag_name)
return infos, tag_map, total
def fetch_asset_info_asset_and_tags(
session: Session,
asset_info_id: str,
owner_id: str = "",
) -> tuple[AssetInfo, Asset, list[str]] | None:
stmt = (
select(AssetInfo, Asset, Tag.name)
.join(Asset, Asset.id == AssetInfo.asset_id)
.join(AssetInfoTag, AssetInfoTag.asset_info_id == AssetInfo.id, isouter=True)
.join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True)
.where(
AssetInfo.id == asset_info_id,
build_visible_owner_clause(owner_id),
)
.options(noload(AssetInfo.tags))
.order_by(Tag.name.asc())
)
rows = (session.execute(stmt)).all()
if not rows:
return None
first_info, first_asset, _ = rows[0]
tags: list[str] = []
seen: set[str] = set()
for _info, _asset, tag_name in rows:
if tag_name and tag_name not in seen:
seen.add(tag_name)
tags.append(tag_name)
return first_info, first_asset, tags
def fetch_asset_info_and_asset(
session: Session,
asset_info_id: str,
owner_id: str = "",
) -> tuple[AssetInfo, Asset] | None:
stmt = (
select(AssetInfo, Asset)
.join(Asset, Asset.id == AssetInfo.asset_id)
.where(
AssetInfo.id == asset_info_id,
build_visible_owner_clause(owner_id),
)
.limit(1)
.options(noload(AssetInfo.tags))
)
row = session.execute(stmt)
pair = row.first()
if not pair:
return None
return pair[0], pair[1]
def update_asset_info_access_time(
session: Session,
asset_info_id: str,
ts: datetime | None = None,
only_if_newer: bool = True,
) -> None:
ts = ts or get_utc_now()
stmt = sa.update(AssetInfo).where(AssetInfo.id == asset_info_id)
if only_if_newer:
stmt = stmt.where(
sa.or_(
AssetInfo.last_access_time.is_(None), AssetInfo.last_access_time < ts
)
)
session.execute(stmt.values(last_access_time=ts))
def update_asset_info_name(
session: Session,
asset_info_id: str,
name: str,
) -> None:
"""Update the name of an AssetInfo."""
now = get_utc_now()
session.execute(
sa.update(AssetInfo)
.where(AssetInfo.id == asset_info_id)
.values(name=name, updated_at=now)
)
def update_asset_info_updated_at(
session: Session,
asset_info_id: str,
ts: datetime | None = None,
) -> None:
"""Update the updated_at timestamp of an AssetInfo."""
ts = ts or get_utc_now()
session.execute(
sa.update(AssetInfo).where(AssetInfo.id == asset_info_id).values(updated_at=ts)
)
def set_asset_info_metadata(
session: Session,
asset_info_id: str,
user_metadata: dict | None = None,
) -> None:
info = session.get(AssetInfo, asset_info_id)
if not info:
raise ValueError(f"AssetInfo {asset_info_id} not found")
info.user_metadata = user_metadata or {}
info.updated_at = get_utc_now()
session.flush()
session.execute(
delete(AssetInfoMeta).where(AssetInfoMeta.asset_info_id == asset_info_id)
)
session.flush()
if not user_metadata:
return
rows: list[AssetInfoMeta] = []
for k, v in user_metadata.items():
for r in convert_metadata_to_rows(k, v):
rows.append(
AssetInfoMeta(
asset_info_id=asset_info_id,
key=r["key"],
ordinal=int(r["ordinal"]),
val_str=r.get("val_str"),
val_num=r.get("val_num"),
val_bool=r.get("val_bool"),
val_json=r.get("val_json"),
)
)
if rows:
session.add_all(rows)
session.flush()
def delete_asset_info_by_id(
session: Session,
asset_info_id: str,
owner_id: str,
) -> bool:
stmt = sa.delete(AssetInfo).where(
AssetInfo.id == asset_info_id,
build_visible_owner_clause(owner_id),
)
return int((session.execute(stmt)).rowcount or 0) > 0
def set_asset_info_preview(
session: Session,
asset_info_id: str,
preview_asset_id: str | None = None,
) -> None:
"""Set or clear preview_id and bump updated_at. Raises on unknown IDs."""
info = session.get(AssetInfo, asset_info_id)
if not info:
raise ValueError(f"AssetInfo {asset_info_id} not found")
if preview_asset_id is None:
info.preview_id = None
else:
if not session.get(Asset, preview_asset_id):
raise ValueError(f"Preview Asset {preview_asset_id} not found")
info.preview_id = preview_asset_id
info.updated_at = get_utc_now()
session.flush()
def bulk_insert_asset_infos_ignore_conflicts(
session: Session,
rows: list[dict],
) -> None:
"""Bulk insert AssetInfo rows with ON CONFLICT DO NOTHING.
Each dict should have: id, owner_id, name, asset_id, preview_id,
user_metadata, created_at, updated_at, last_access_time
"""
if not rows:
return
ins = sqlite.insert(AssetInfo).on_conflict_do_nothing(
index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name]
)
for chunk in iter_chunks(rows, calculate_rows_per_statement(9)):
session.execute(ins, chunk)
def get_asset_info_ids_by_ids(
session: Session,
info_ids: list[str],
) -> set[str]:
"""Query to find which AssetInfo IDs exist in the database."""
if not info_ids:
return set()
found: set[str] = set()
for chunk in iter_chunks(info_ids, MAX_BIND_PARAMS):
result = session.execute(select(AssetInfo.id).where(AssetInfo.id.in_(chunk)))
found.update(result.scalars().all())
return found

File diff suppressed because it is too large Load Diff

View File

@ -1,451 +0,0 @@
import os
from typing import NamedTuple, Sequence
import sqlalchemy as sa
from sqlalchemy import select
from sqlalchemy.dialects import sqlite
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo
from app.assets.database.queries.common import (
MAX_BIND_PARAMS,
calculate_rows_per_statement,
iter_chunks,
)
from app.assets.helpers import escape_sql_like_string
class CacheStateRow(NamedTuple):
"""Row from cache state query with joined asset data."""
state_id: int
file_path: str
mtime_ns: int | None
needs_verify: bool
asset_id: str
asset_hash: str | None
size_bytes: int
def list_cache_states_by_asset_id(
session: Session, *, asset_id: str
) -> Sequence[AssetCacheState]:
return (
(
session.execute(
select(AssetCacheState)
.where(AssetCacheState.asset_id == asset_id)
.order_by(AssetCacheState.id.asc())
)
)
.scalars()
.all()
)
def upsert_cache_state(
session: Session,
asset_id: str,
file_path: str,
mtime_ns: int,
) -> tuple[bool, bool]:
"""Upsert a cache state by file_path. Returns (created, updated).
Also restores cache states that were previously marked as missing.
"""
vals = {
"asset_id": asset_id,
"file_path": file_path,
"mtime_ns": int(mtime_ns),
"is_missing": False,
}
ins = (
sqlite.insert(AssetCacheState)
.values(**vals)
.on_conflict_do_nothing(index_elements=[AssetCacheState.file_path])
)
res = session.execute(ins)
created = int(res.rowcount or 0) > 0
if created:
return True, False
upd = (
sa.update(AssetCacheState)
.where(AssetCacheState.file_path == file_path)
.where(
sa.or_(
AssetCacheState.asset_id != asset_id,
AssetCacheState.mtime_ns.is_(None),
AssetCacheState.mtime_ns != int(mtime_ns),
AssetCacheState.is_missing == True, # noqa: E712
)
)
.values(asset_id=asset_id, mtime_ns=int(mtime_ns), is_missing=False)
)
res2 = session.execute(upd)
updated = int(res2.rowcount or 0) > 0
return False, updated
def mark_cache_states_missing_outside_prefixes(
session: Session, valid_prefixes: list[str]
) -> int:
"""Mark cache states as missing when file_path doesn't match any valid prefix.
This is a non-destructive soft-delete that preserves user metadata.
Cache states can be restored if the file reappears in a future scan.
Args:
session: Database session
valid_prefixes: List of absolute directory prefixes that are valid
Returns:
Number of cache states marked as missing
"""
if not valid_prefixes:
return 0
def make_prefix_condition(prefix: str):
base = prefix if prefix.endswith(os.sep) else prefix + os.sep
escaped, esc = escape_sql_like_string(base)
return AssetCacheState.file_path.like(escaped + "%", escape=esc)
matches_valid_prefix = sa.or_(*[make_prefix_condition(p) for p in valid_prefixes])
result = session.execute(
sa.update(AssetCacheState)
.where(~matches_valid_prefix)
.where(AssetCacheState.is_missing == False) # noqa: E712
.values(is_missing=True)
)
return result.rowcount
def restore_cache_states_by_paths(session: Session, file_paths: list[str]) -> int:
"""Restore cache states that were previously marked as missing.
Called when a file path is re-scanned and found to exist.
Args:
session: Database session
file_paths: List of file paths that exist and should be restored
Returns:
Number of cache states restored
"""
if not file_paths:
return 0
result = session.execute(
sa.update(AssetCacheState)
.where(AssetCacheState.file_path.in_(file_paths))
.where(AssetCacheState.is_missing == True) # noqa: E712
.values(is_missing=False)
)
return result.rowcount
def get_unreferenced_unhashed_asset_ids(session: Session) -> list[str]:
"""Get IDs of unhashed assets (hash=None) with no active cache states.
An asset is considered unreferenced if it has no cache states,
or all its cache states are marked as missing.
Returns:
List of asset IDs that are unreferenced
"""
active_cache_state_exists = (
sa.select(sa.literal(1))
.where(AssetCacheState.asset_id == Asset.id)
.where(AssetCacheState.is_missing == False) # noqa: E712
.correlate(Asset)
.exists()
)
unreferenced_subq = sa.select(Asset.id).where(
Asset.hash.is_(None), ~active_cache_state_exists
)
return [row[0] for row in session.execute(unreferenced_subq).all()]
def delete_assets_by_ids(session: Session, asset_ids: list[str]) -> int:
"""Delete assets and their AssetInfos by ID.
Args:
session: Database session
asset_ids: List of asset IDs to delete
Returns:
Number of assets deleted
"""
if not asset_ids:
return 0
session.execute(sa.delete(AssetInfo).where(AssetInfo.asset_id.in_(asset_ids)))
result = session.execute(sa.delete(Asset).where(Asset.id.in_(asset_ids)))
return result.rowcount
def get_cache_states_for_prefixes(
session: Session,
prefixes: list[str],
*,
include_missing: bool = False,
) -> list[CacheStateRow]:
"""Get all cache states with paths matching any of the given prefixes.
Args:
session: Database session
prefixes: List of absolute directory prefixes to match
include_missing: If False (default), exclude cache states marked as missing
Returns:
List of cache state rows with joined asset data, ordered by asset_id, state_id
"""
if not prefixes:
return []
conds = []
for p in prefixes:
base = os.path.abspath(p)
if not base.endswith(os.sep):
base += os.sep
escaped, esc = escape_sql_like_string(base)
conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc))
query = (
sa.select(
AssetCacheState.id,
AssetCacheState.file_path,
AssetCacheState.mtime_ns,
AssetCacheState.needs_verify,
AssetCacheState.asset_id,
Asset.hash,
Asset.size_bytes,
)
.join(Asset, Asset.id == AssetCacheState.asset_id)
.where(sa.or_(*conds))
)
if not include_missing:
query = query.where(AssetCacheState.is_missing == False) # noqa: E712
rows = session.execute(
query.order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc())
).all()
return [
CacheStateRow(
state_id=row[0],
file_path=row[1],
mtime_ns=row[2],
needs_verify=row[3],
asset_id=row[4],
asset_hash=row[5],
size_bytes=int(row[6] or 0),
)
for row in rows
]
def bulk_update_needs_verify(session: Session, state_ids: list[int], value: bool) -> int:
"""Set needs_verify flag for multiple cache states.
Returns: Number of rows updated
"""
if not state_ids:
return 0
result = session.execute(
sa.update(AssetCacheState)
.where(AssetCacheState.id.in_(state_ids))
.values(needs_verify=value)
)
return result.rowcount
def bulk_update_is_missing(session: Session, state_ids: list[int], value: bool) -> int:
"""Set is_missing flag for multiple cache states.
Returns: Number of rows updated
"""
if not state_ids:
return 0
result = session.execute(
sa.update(AssetCacheState)
.where(AssetCacheState.id.in_(state_ids))
.values(is_missing=value)
)
return result.rowcount
def delete_cache_states_by_ids(session: Session, state_ids: list[int]) -> int:
"""Delete cache states by their IDs.
Returns: Number of rows deleted
"""
if not state_ids:
return 0
result = session.execute(
sa.delete(AssetCacheState).where(AssetCacheState.id.in_(state_ids))
)
return result.rowcount
def delete_orphaned_seed_asset(session: Session, asset_id: str) -> bool:
"""Delete a seed asset (hash is None) and its AssetInfos.
Returns: True if asset was deleted, False if not found
"""
session.execute(sa.delete(AssetInfo).where(AssetInfo.asset_id == asset_id))
asset = session.get(Asset, asset_id)
if asset:
session.delete(asset)
return True
return False
class UnenrichedAssetRow(NamedTuple):
"""Row for assets needing enrichment."""
cache_state_id: int
asset_id: str
asset_info_id: str
file_path: str
enrichment_level: int
def get_unenriched_cache_states(
session: Session,
prefixes: list[str],
max_level: int = 0,
limit: int = 1000,
) -> list[UnenrichedAssetRow]:
"""Get cache states that need enrichment (enrichment_level <= max_level).
Args:
session: Database session
prefixes: List of absolute directory prefixes to scan
max_level: Maximum enrichment level to include (0=stubs, 1=metadata done)
limit: Maximum number of rows to return
Returns:
List of unenriched asset rows with file paths
"""
if not prefixes:
return []
conds = []
for p in prefixes:
base = os.path.abspath(p)
if not base.endswith(os.sep):
base += os.sep
escaped, esc = escape_sql_like_string(base)
conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc))
query = (
sa.select(
AssetCacheState.id,
AssetCacheState.asset_id,
AssetInfo.id,
AssetCacheState.file_path,
AssetCacheState.enrichment_level,
)
.join(Asset, Asset.id == AssetCacheState.asset_id)
.join(AssetInfo, AssetInfo.asset_id == Asset.id)
.where(sa.or_(*conds))
.where(AssetCacheState.is_missing == False) # noqa: E712
.where(AssetCacheState.enrichment_level <= max_level)
.order_by(AssetCacheState.id.asc())
.limit(limit)
)
rows = session.execute(query).all()
return [
UnenrichedAssetRow(
cache_state_id=row[0],
asset_id=row[1],
asset_info_id=row[2],
file_path=row[3],
enrichment_level=row[4],
)
for row in rows
]
def update_enrichment_level(
session: Session,
cache_state_id: int,
level: int,
) -> None:
"""Update the enrichment level for a cache state."""
session.execute(
sa.update(AssetCacheState)
.where(AssetCacheState.id == cache_state_id)
.values(enrichment_level=level)
)
def bulk_update_enrichment_level(
session: Session,
cache_state_ids: list[int],
level: int,
) -> int:
"""Update enrichment level for multiple cache states.
Returns: Number of rows updated
"""
if not cache_state_ids:
return 0
result = session.execute(
sa.update(AssetCacheState)
.where(AssetCacheState.id.in_(cache_state_ids))
.values(enrichment_level=level)
)
return result.rowcount
def bulk_insert_cache_states_ignore_conflicts(
session: Session,
rows: list[dict],
) -> None:
"""Bulk insert cache state rows with ON CONFLICT DO NOTHING on file_path.
Each dict should have: asset_id, file_path, mtime_ns
The is_missing field is automatically set to False for new inserts.
"""
if not rows:
return
enriched_rows = [{**row, "is_missing": False} for row in rows]
ins = sqlite.insert(AssetCacheState).on_conflict_do_nothing(
index_elements=[AssetCacheState.file_path]
)
for chunk in iter_chunks(enriched_rows, calculate_rows_per_statement(4)):
session.execute(ins, chunk)
def get_cache_states_by_paths_and_asset_ids(
session: Session,
path_to_asset: dict[str, str],
) -> set[str]:
"""Query cache states to find paths where our asset_id won the insert.
Args:
path_to_asset: Mapping of file_path -> asset_id we tried to insert
Returns:
Set of file_paths where our asset_id is present
"""
if not path_to_asset:
return set()
paths = list(path_to_asset.keys())
winners: set[str] = set()
for chunk in iter_chunks(paths, MAX_BIND_PARAMS):
result = session.execute(
select(AssetCacheState.file_path).where(
AssetCacheState.file_path.in_(chunk),
AssetCacheState.asset_id.in_([path_to_asset[p] for p in chunk]),
)
)
winners.update(result.scalars().all())
return winners

View File

@ -4,7 +4,7 @@ from typing import Iterable
import sqlalchemy as sa import sqlalchemy as sa
from app.assets.database.models import AssetInfo from app.assets.database.models import AssetReference
MAX_BIND_PARAMS = 800 MAX_BIND_PARAMS = 800
@ -30,8 +30,11 @@ def iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]
def build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement: def build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
"""Build owner visibility predicate for reads. Owner-less rows are visible to everyone.""" """Build owner visibility predicate for reads.
Owner-less rows are visible to everyone.
"""
owner_id = (owner_id or "").strip() owner_id = (owner_id or "").strip()
if owner_id == "": if owner_id == "":
return AssetInfo.owner_id == "" return AssetReference.owner_id == ""
return AssetInfo.owner_id.in_(["", owner_id]) return AssetReference.owner_id.in_(["", owner_id])

View File

@ -6,7 +6,12 @@ from sqlalchemy.dialects import sqlite
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import AssetInfo, AssetInfoMeta, AssetInfoTag, Tag from app.assets.database.models import (
AssetReference,
AssetReferenceMeta,
AssetReferenceTag,
Tag,
)
from app.assets.database.queries.common import ( from app.assets.database.queries.common import (
build_visible_owner_clause, build_visible_owner_clause,
iter_row_chunks, iter_row_chunks,
@ -47,22 +52,22 @@ def ensure_tags_exist(
session.execute(ins) session.execute(ins)
def get_asset_tags(session: Session, asset_info_id: str) -> list[str]: def get_reference_tags(session: Session, reference_id: str) -> list[str]:
return [ return [
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
select(AssetInfoTag.tag_name).where( select(AssetReferenceTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id AssetReferenceTag.asset_reference_id == reference_id
) )
) )
).all() ).all()
] ]
def set_asset_info_tags( def set_reference_tags(
session: Session, session: Session,
asset_info_id: str, reference_id: str,
tags: Sequence[str], tags: Sequence[str],
origin: str = "manual", origin: str = "manual",
) -> SetTagsDict: ) -> SetTagsDict:
@ -72,8 +77,8 @@ def set_asset_info_tags(
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
select(AssetInfoTag.tag_name).where( select(AssetReferenceTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id AssetReferenceTag.asset_reference_id == reference_id
) )
) )
).all() ).all()
@ -86,8 +91,8 @@ def set_asset_info_tags(
ensure_tags_exist(session, to_add, tag_type="user") ensure_tags_exist(session, to_add, tag_type="user")
session.add_all( session.add_all(
[ [
AssetInfoTag( AssetReferenceTag(
asset_info_id=asset_info_id, asset_reference_id=reference_id,
tag_name=t, tag_name=t,
origin=origin, origin=origin,
added_at=get_utc_now(), added_at=get_utc_now(),
@ -99,9 +104,9 @@ def set_asset_info_tags(
if to_remove: if to_remove:
session.execute( session.execute(
delete(AssetInfoTag).where( delete(AssetReferenceTag).where(
AssetInfoTag.asset_info_id == asset_info_id, AssetReferenceTag.asset_reference_id == reference_id,
AssetInfoTag.tag_name.in_(to_remove), AssetReferenceTag.tag_name.in_(to_remove),
) )
) )
session.flush() session.flush()
@ -109,22 +114,22 @@ def set_asset_info_tags(
return {"added": to_add, "removed": to_remove, "total": desired} return {"added": to_add, "removed": to_remove, "total": desired}
def add_tags_to_asset_info( def add_tags_to_reference(
session: Session, session: Session,
asset_info_id: str, reference_id: str,
tags: Sequence[str], tags: Sequence[str],
origin: str = "manual", origin: str = "manual",
create_if_missing: bool = True, create_if_missing: bool = True,
asset_info_row: AssetInfo | None = None, reference_row: AssetReference | None = None,
) -> AddTagsDict: ) -> AddTagsDict:
if not asset_info_row: if not reference_row:
info = session.get(AssetInfo, asset_info_id) ref = session.get(AssetReference, reference_id)
if not info: if not ref:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
norm = normalize_tags(tags) norm = normalize_tags(tags)
if not norm: if not norm:
total = get_asset_tags(session, asset_info_id=asset_info_id) total = get_reference_tags(session, reference_id=reference_id)
return {"added": [], "already_present": [], "total_tags": total} return {"added": [], "already_present": [], "total_tags": total}
if create_if_missing: if create_if_missing:
@ -134,8 +139,8 @@ def add_tags_to_asset_info(
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
sa.select(AssetInfoTag.tag_name).where( sa.select(AssetReferenceTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id AssetReferenceTag.asset_reference_id == reference_id
) )
) )
).all() ).all()
@ -149,8 +154,8 @@ def add_tags_to_asset_info(
try: try:
session.add_all( session.add_all(
[ [
AssetInfoTag( AssetReferenceTag(
asset_info_id=asset_info_id, asset_reference_id=reference_id,
tag_name=t, tag_name=t,
origin=origin, origin=origin,
added_at=get_utc_now(), added_at=get_utc_now(),
@ -162,7 +167,7 @@ def add_tags_to_asset_info(
except IntegrityError: except IntegrityError:
nested.rollback() nested.rollback()
after = set(get_asset_tags(session, asset_info_id=asset_info_id)) after = set(get_reference_tags(session, reference_id=reference_id))
return { return {
"added": sorted(((after - current) & want)), "added": sorted(((after - current) & want)),
"already_present": sorted(want & current), "already_present": sorted(want & current),
@ -170,26 +175,26 @@ def add_tags_to_asset_info(
} }
def remove_tags_from_asset_info( def remove_tags_from_reference(
session: Session, session: Session,
asset_info_id: str, reference_id: str,
tags: Sequence[str], tags: Sequence[str],
) -> RemoveTagsDict: ) -> RemoveTagsDict:
info = session.get(AssetInfo, asset_info_id) ref = session.get(AssetReference, reference_id)
if not info: if not ref:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
norm = normalize_tags(tags) norm = normalize_tags(tags)
if not norm: if not norm:
total = get_asset_tags(session, asset_info_id=asset_info_id) total = get_reference_tags(session, reference_id=reference_id)
return {"removed": [], "not_present": [], "total_tags": total} return {"removed": [], "not_present": [], "total_tags": total}
existing = { existing = {
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
sa.select(AssetInfoTag.tag_name).where( sa.select(AssetReferenceTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id AssetReferenceTag.asset_reference_id == reference_id
) )
) )
).all() ).all()
@ -200,14 +205,14 @@ def remove_tags_from_asset_info(
if to_remove: if to_remove:
session.execute( session.execute(
delete(AssetInfoTag).where( delete(AssetReferenceTag).where(
AssetInfoTag.asset_info_id == asset_info_id, AssetReferenceTag.asset_reference_id == reference_id,
AssetInfoTag.tag_name.in_(to_remove), AssetReferenceTag.tag_name.in_(to_remove),
) )
) )
session.flush() session.flush()
total = get_asset_tags(session, asset_info_id=asset_info_id) total = get_reference_tags(session, reference_id=reference_id)
return {"removed": to_remove, "not_present": not_present, "total_tags": total} return {"removed": to_remove, "not_present": not_present, "total_tags": total}
@ -218,29 +223,32 @@ def add_missing_tag_for_asset_id(
) -> None: ) -> None:
select_rows = ( select_rows = (
sa.select( sa.select(
AssetInfo.id.label("asset_info_id"), AssetReference.id.label("asset_reference_id"),
sa.literal("missing").label("tag_name"), sa.literal("missing").label("tag_name"),
sa.literal(origin).label("origin"), sa.literal(origin).label("origin"),
sa.literal(get_utc_now()).label("added_at"), sa.literal(get_utc_now()).label("added_at"),
) )
.where(AssetInfo.asset_id == asset_id) .where(AssetReference.asset_id == asset_id)
.where( .where(
sa.not_( sa.not_(
sa.exists().where( sa.exists().where(
(AssetInfoTag.asset_info_id == AssetInfo.id) (AssetReferenceTag.asset_reference_id == AssetReference.id)
& (AssetInfoTag.tag_name == "missing") & (AssetReferenceTag.tag_name == "missing")
) )
) )
) )
) )
session.execute( session.execute(
sqlite.insert(AssetInfoTag) sqlite.insert(AssetReferenceTag)
.from_select( .from_select(
["asset_info_id", "tag_name", "origin", "added_at"], ["asset_reference_id", "tag_name", "origin", "added_at"],
select_rows, select_rows,
) )
.on_conflict_do_nothing( .on_conflict_do_nothing(
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name] index_elements=[
AssetReferenceTag.asset_reference_id,
AssetReferenceTag.tag_name,
]
) )
) )
@ -250,11 +258,11 @@ def remove_missing_tag_for_asset_id(
asset_id: str, asset_id: str,
) -> None: ) -> None:
session.execute( session.execute(
sa.delete(AssetInfoTag).where( sa.delete(AssetReferenceTag).where(
AssetInfoTag.asset_info_id.in_( AssetReferenceTag.asset_reference_id.in_(
sa.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id) sa.select(AssetReference.id).where(AssetReference.asset_id == asset_id)
), ),
AssetInfoTag.tag_name == "missing", AssetReferenceTag.tag_name == "missing",
) )
) )
@ -270,13 +278,13 @@ def list_tags_with_usage(
) -> tuple[list[tuple[str, str, int]], int]: ) -> tuple[list[tuple[str, str, int]], int]:
counts_sq = ( counts_sq = (
select( select(
AssetInfoTag.tag_name.label("tag_name"), AssetReferenceTag.tag_name.label("tag_name"),
func.count(AssetInfoTag.asset_info_id).label("cnt"), func.count(AssetReferenceTag.asset_reference_id).label("cnt"),
) )
.select_from(AssetInfoTag) .select_from(AssetReferenceTag)
.join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id) .join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
.where(build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
.group_by(AssetInfoTag.tag_name) .group_by(AssetReferenceTag.tag_name)
.subquery() .subquery()
) )
@ -308,7 +316,9 @@ def list_tags_with_usage(
total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc)) total_q = total_q.where(Tag.name.like(escaped + "%", escape=esc))
if not include_zero: if not include_zero:
total_q = total_q.where( total_q = total_q.where(
Tag.name.in_(select(AssetInfoTag.tag_name).group_by(AssetInfoTag.tag_name)) Tag.name.in_(
select(AssetReferenceTag.tag_name).group_by(AssetReferenceTag.tag_name)
)
) )
rows = (session.execute(q.limit(limit).offset(offset))).all() rows = (session.execute(q.limit(limit).offset(offset))).all()
@ -323,26 +333,31 @@ def bulk_insert_tags_and_meta(
tag_rows: list[dict], tag_rows: list[dict],
meta_rows: list[dict], meta_rows: list[dict],
) -> None: ) -> None:
"""Batch insert into asset_info_tags and asset_info_meta with ON CONFLICT DO NOTHING. """Batch insert into asset_reference_tags and asset_reference_meta.
Uses ON CONFLICT DO NOTHING.
Args: Args:
session: Database session session: Database session
tag_rows: List of dicts with keys: asset_info_id, tag_name, origin, added_at tag_rows: Dicts with: asset_reference_id, tag_name, origin, added_at
meta_rows: List of dicts with keys: asset_info_id, key, ordinal, val_str, val_num, val_bool, val_json meta_rows: Dicts with: asset_reference_id, key, ordinal, val_*
""" """
if tag_rows: if tag_rows:
ins_tags = sqlite.insert(AssetInfoTag).on_conflict_do_nothing( ins_tags = sqlite.insert(AssetReferenceTag).on_conflict_do_nothing(
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name] index_elements=[
AssetReferenceTag.asset_reference_id,
AssetReferenceTag.tag_name,
]
) )
for chunk in iter_row_chunks(tag_rows, cols_per_row=4): for chunk in iter_row_chunks(tag_rows, cols_per_row=4):
session.execute(ins_tags, chunk) session.execute(ins_tags, chunk)
if meta_rows: if meta_rows:
ins_meta = sqlite.insert(AssetInfoMeta).on_conflict_do_nothing( ins_meta = sqlite.insert(AssetReferenceMeta).on_conflict_do_nothing(
index_elements=[ index_elements=[
AssetInfoMeta.asset_info_id, AssetReferenceMeta.asset_reference_id,
AssetInfoMeta.key, AssetReferenceMeta.key,
AssetInfoMeta.ordinal, AssetReferenceMeta.ordinal,
] ]
) )
for chunk in iter_row_chunks(meta_rows, cols_per_row=7): for chunk in iter_row_chunks(meta_rows, cols_per_row=7):

View File

@ -31,8 +31,9 @@ ALLOWED_ROOTS: tuple[Literal["models", "input", "output"], ...] = (
def escape_sql_like_string(s: str, escape: str = "!") -> tuple[str, str]: def escape_sql_like_string(s: str, escape: str = "!") -> tuple[str, str]:
"""Escapes %, _ and the escape char itself in a LIKE prefix. """Escapes %, _ and the escape char in a LIKE prefix.
Returns (escaped_prefix, escape_char). Caller should append '%' and pass escape=escape_char to .like().
Returns (escaped_prefix, escape_char).
""" """
s = s.replace(escape, escape + escape) # escape the escape char first s = s.replace(escape, escape + escape) # escape the escape char first
s = s.replace("%", escape + "%").replace("_", escape + "_") # escape LIKE wildcards s = s.replace("%", escape + "%").replace("_", escape + "_") # escape LIKE wildcards

View File

@ -10,13 +10,16 @@ from app.assets.database.queries import (
bulk_update_enrichment_level, bulk_update_enrichment_level,
bulk_update_is_missing, bulk_update_is_missing,
bulk_update_needs_verify, bulk_update_needs_verify,
delete_cache_states_by_ids,
delete_orphaned_seed_asset, delete_orphaned_seed_asset,
delete_references_by_ids,
ensure_tags_exist, ensure_tags_exist,
get_cache_states_for_prefixes, get_asset_by_hash,
get_unenriched_cache_states, get_references_for_prefixes,
get_unenriched_references,
reassign_asset_references,
remove_missing_tag_for_asset_id, remove_missing_tag_for_asset_id,
set_asset_info_metadata, set_reference_metadata,
update_asset_hash_and_mime,
) )
from app.assets.services.bulk_ingest import ( from app.assets.services.bulk_ingest import (
SeedAssetSpec, SeedAssetSpec,
@ -38,8 +41,8 @@ from app.assets.services.path_utils import (
from app.database.db import create_session, dependencies_available from app.database.db import create_session, dependencies_available
class _StateInfo(TypedDict): class _RefInfo(TypedDict):
sid: int ref_id: str
fp: str fp: str
exists: bool exists: bool
fast_ok: bool fast_ok: bool
@ -49,7 +52,7 @@ class _StateInfo(TypedDict):
class _AssetAccumulator(TypedDict): class _AssetAccumulator(TypedDict):
hash: str | None hash: str | None
size_db: int size_db: int
states: list[_StateInfo] refs: list[_RefInfo]
RootType = Literal["models", "input", "output"] RootType = Literal["models", "input", "output"]
@ -97,17 +100,17 @@ def collect_models_files() -> list[str]:
return out return out
def sync_cache_states_with_filesystem( def sync_references_with_filesystem(
session, session,
root: RootType, root: RootType,
collect_existing_paths: bool = False, collect_existing_paths: bool = False,
update_missing_tags: bool = False, update_missing_tags: bool = False,
) -> set[str] | None: ) -> set[str] | None:
"""Reconcile cache states with filesystem for a root. """Reconcile asset references with filesystem for a root.
- Toggle needs_verify per state using fast mtime/size check - Toggle needs_verify per reference using fast mtime/size check
- For hashed assets with at least one fast-ok state in this root: delete stale missing states - For hashed assets with at least one fast-ok ref: delete stale missing refs
- For seed assets with all states missing: delete Asset and its AssetInfos - For seed assets with all refs missing: delete Asset and its references
- Optionally add/remove 'missing' tags based on fast-ok in this root - Optionally add/remove 'missing' tags based on fast-ok in this root
- Optionally return surviving absolute paths - Optionally return surviving absolute paths
@ -124,7 +127,7 @@ def sync_cache_states_with_filesystem(
if not prefixes: if not prefixes:
return set() if collect_existing_paths else None return set() if collect_existing_paths else None
rows = get_cache_states_for_prefixes( rows = get_references_for_prefixes(
session, prefixes, include_missing=update_missing_tags session, prefixes, include_missing=update_missing_tags
) )
@ -132,7 +135,7 @@ def sync_cache_states_with_filesystem(
for row in rows: for row in rows:
acc = by_asset.get(row.asset_id) acc = by_asset.get(row.asset_id)
if acc is None: if acc is None:
acc = {"hash": row.asset_hash, "size_db": row.size_bytes, "states": []} acc = {"hash": row.asset_hash, "size_db": row.size_bytes, "refs": []}
by_asset[row.asset_id] = acc by_asset[row.asset_id] = acc
fast_ok = False fast_ok = False
@ -152,9 +155,9 @@ def sync_cache_states_with_filesystem(
exists = False exists = False
logging.debug("OSError checking %s: %s", row.file_path, e) logging.debug("OSError checking %s: %s", row.file_path, e)
acc["states"].append( acc["refs"].append(
{ {
"sid": row.state_id, "ref_id": row.reference_id,
"fp": row.file_path, "fp": row.file_path,
"exists": exists, "exists": exists,
"fast_ok": fast_ok, "fast_ok": fast_ok,
@ -162,61 +165,63 @@ def sync_cache_states_with_filesystem(
} }
) )
to_set_verify: list[int] = [] to_set_verify: list[str] = []
to_clear_verify: list[int] = [] to_clear_verify: list[str] = []
stale_state_ids: list[int] = [] stale_ref_ids: list[str] = []
to_mark_missing: list[int] = [] to_mark_missing: list[str] = []
to_clear_missing: list[int] = [] to_clear_missing: list[str] = []
survivors: set[str] = set() survivors: set[str] = set()
for aid, acc in by_asset.items(): for aid, acc in by_asset.items():
a_hash = acc["hash"] a_hash = acc["hash"]
states = acc["states"] refs = acc["refs"]
any_fast_ok = any(s["fast_ok"] for s in states) any_fast_ok = any(r["fast_ok"] for r in refs)
all_missing = all(not s["exists"] for s in states) all_missing = all(not r["exists"] for r in refs)
for s in states: for r in refs:
if not s["exists"]: if not r["exists"]:
to_mark_missing.append(s["sid"]) to_mark_missing.append(r["ref_id"])
continue continue
if s["fast_ok"]: if r["fast_ok"]:
to_clear_missing.append(s["sid"]) to_clear_missing.append(r["ref_id"])
if s["needs_verify"]: if r["needs_verify"]:
to_clear_verify.append(s["sid"]) to_clear_verify.append(r["ref_id"])
if not s["fast_ok"] and not s["needs_verify"]: if not r["fast_ok"] and not r["needs_verify"]:
to_set_verify.append(s["sid"]) to_set_verify.append(r["ref_id"])
if a_hash is None: if a_hash is None:
if states and all_missing: if refs and all_missing:
delete_orphaned_seed_asset(session, aid) delete_orphaned_seed_asset(session, aid)
else: else:
for s in states: for r in refs:
if s["exists"]: if r["exists"]:
survivors.add(os.path.abspath(s["fp"])) survivors.add(os.path.abspath(r["fp"]))
continue continue
if any_fast_ok: if any_fast_ok:
for s in states: for r in refs:
if not s["exists"]: if not r["exists"]:
stale_state_ids.append(s["sid"]) stale_ref_ids.append(r["ref_id"])
if update_missing_tags: if update_missing_tags:
try: try:
remove_missing_tag_for_asset_id(session, asset_id=aid) remove_missing_tag_for_asset_id(session, asset_id=aid)
except Exception as e: except Exception as e:
logging.warning("Failed to remove missing tag for asset %s: %s", aid, e) logging.warning(
"Failed to remove missing tag for asset %s: %s", aid, e
)
elif update_missing_tags: elif update_missing_tags:
try: try:
add_missing_tag_for_asset_id(session, asset_id=aid, origin="automatic") add_missing_tag_for_asset_id(session, asset_id=aid, origin="automatic")
except Exception as e: except Exception as e:
logging.warning("Failed to add missing tag for asset %s: %s", aid, e) logging.warning("Failed to add missing tag for asset %s: %s", aid, e)
for s in states: for r in refs:
if s["exists"]: if r["exists"]:
survivors.add(os.path.abspath(s["fp"])) survivors.add(os.path.abspath(r["fp"]))
delete_cache_states_by_ids(session, stale_state_ids) delete_references_by_ids(session, stale_ref_ids)
stale_set = set(stale_state_ids) stale_set = set(stale_ref_ids)
to_mark_missing = [sid for sid in to_mark_missing if sid not in stale_set] to_mark_missing = [ref_id for ref_id in to_mark_missing if ref_id not in stale_set]
bulk_update_is_missing(session, to_mark_missing, value=True) bulk_update_is_missing(session, to_mark_missing, value=True)
bulk_update_is_missing(session, to_clear_missing, value=False) bulk_update_is_missing(session, to_clear_missing, value=False)
bulk_update_needs_verify(session, to_set_verify, value=True) bulk_update_needs_verify(session, to_set_verify, value=True)
@ -226,13 +231,13 @@ def sync_cache_states_with_filesystem(
def sync_root_safely(root: RootType) -> set[str]: def sync_root_safely(root: RootType) -> set[str]:
"""Sync a single root's cache states with the filesystem. """Sync a single root's references with the filesystem.
Returns survivors (existing paths) or empty set on failure. Returns survivors (existing paths) or empty set on failure.
""" """
try: try:
with create_session() as sess: with create_session() as sess:
survivors = sync_cache_states_with_filesystem( survivors = sync_references_with_filesystem(
sess, sess,
root, root,
collect_existing_paths=True, collect_existing_paths=True,
@ -246,7 +251,7 @@ def sync_root_safely(root: RootType) -> set[str]:
def mark_missing_outside_prefixes_safely(prefixes: list[str]) -> int: def mark_missing_outside_prefixes_safely(prefixes: list[str]) -> int:
"""Mark cache states as missing when outside the given prefixes. """Mark references as missing when outside the given prefixes.
This is a non-destructive soft-delete. Returns count marked or 0 on failure. This is a non-destructive soft-delete. Returns count marked or 0 on failure.
""" """
@ -283,8 +288,8 @@ def build_asset_specs(
Args: Args:
paths: List of file paths to process paths: List of file paths to process
existing_paths: Set of paths that already exist in the database existing_paths: Set of paths that already exist in the database
enable_metadata_extraction: If True, extract tier 1 & 2 metadata from files enable_metadata_extraction: If True, extract tier 1 & 2 metadata
compute_hashes: If True, compute blake3 hashes for each file (slow for large files) compute_hashes: If True, compute blake3 hashes (slow for large files)
""" """
specs: list[SeedAssetSpec] = [] specs: list[SeedAssetSpec] = []
tag_pool: set[str] = set() tag_pool: set[str] = set()
@ -398,7 +403,7 @@ def build_stub_specs(
def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int: def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
"""Insert asset specs into database, returning count of created infos.""" """Insert asset specs into database, returning count of created refs."""
if not specs: if not specs:
return 0 return 0
with create_session() as sess: with create_session() as sess:
@ -406,7 +411,7 @@ def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
ensure_tags_exist(sess, tag_pool, tag_type="user") ensure_tags_exist(sess, tag_pool, tag_type="user")
result = batch_insert_seed_assets(sess, specs=specs, owner_id="") result = batch_insert_seed_assets(sess, specs=specs, owner_id="")
sess.commit() sess.commit()
return result.inserted_infos return result.inserted_refs
def seed_assets( def seed_assets(
@ -419,10 +424,10 @@ def seed_assets(
Args: Args:
roots: Tuple of root types to scan (models, input, output) roots: Tuple of root types to scan (models, input, output)
enable_logging: If True, log progress and completion messages enable_logging: If True, log progress and completion messages
compute_hashes: If True, compute blake3 hashes for each file (slow for large files) compute_hashes: If True, compute blake3 hashes (slow for large files)
Note: This function does not mark missing assets. Call mark_missing_outside_prefixes_safely Note: This function does not mark missing assets.
separately if cleanup is needed. Call mark_missing_outside_prefixes_safely separately if cleanup is needed.
""" """
if not dependencies_available(): if not dependencies_available():
if enable_logging: if enable_logging:
@ -443,7 +448,8 @@ def seed_assets(
if enable_logging: if enable_logging:
logging.info( logging.info(
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)", "Assets scan(roots=%s) completed in %.3fs "
"(created=%d, skipped_existing=%d, total_seen=%d)",
roots, roots,
time.perf_counter() - t_start, time.perf_counter() - t_start,
created, created,
@ -471,7 +477,7 @@ def get_unenriched_assets_for_roots(
limit: Maximum number of rows to return limit: Maximum number of rows to return
Returns: Returns:
List of UnenrichedAssetRow List of UnenrichedReferenceRow
""" """
prefixes: list[str] = [] prefixes: list[str] = []
for root in roots: for root in roots:
@ -481,13 +487,15 @@ def get_unenriched_assets_for_roots(
return [] return []
with create_session() as sess: with create_session() as sess:
return get_unenriched_cache_states(sess, prefixes, max_level=max_level, limit=limit) return get_unenriched_references(
sess, prefixes, max_level=max_level, limit=limit
)
def enrich_asset( def enrich_asset(
file_path: str, file_path: str,
cache_state_id: int, reference_id: str,
asset_info_id: str, asset_id: str,
extract_metadata: bool = True, extract_metadata: bool = True,
compute_hash: bool = False, compute_hash: bool = False,
) -> int: ) -> int:
@ -495,8 +503,8 @@ def enrich_asset(
Args: Args:
file_path: Absolute path to the file file_path: Absolute path to the file
cache_state_id: ID of the cache state to update reference_id: ID of the reference to update
asset_info_id: ID of the asset info to update asset_id: ID of the asset to update (for mime_type and hash)
extract_metadata: If True, extract safetensors header and mime type extract_metadata: If True, extract safetensors header and mime type
compute_hash: If True, compute blake3 hash compute_hash: If True, compute blake3 hash
@ -511,30 +519,46 @@ def enrich_asset(
return new_level return new_level
rel_fname = compute_relative_filename(file_path) rel_fname = compute_relative_filename(file_path)
mime_type: str | None = None
if extract_metadata:
metadata = extract_file_metadata(
file_path,
stat_result=stat_p,
enable_safetensors=True,
relative_filename=rel_fname,
)
if metadata:
mime_type = metadata.content_type
new_level = ENRICHMENT_METADATA
full_hash: str | None = None
if compute_hash:
try:
digest = compute_blake3_hash(file_path)
full_hash = f"blake3:{digest}"
new_level = ENRICHMENT_HASHED
except Exception as e:
logging.warning("Failed to hash %s: %s", file_path, e)
with create_session() as sess: with create_session() as sess:
if extract_metadata: if extract_metadata and metadata:
metadata = extract_file_metadata( user_metadata = metadata.to_user_metadata()
file_path, set_reference_metadata(sess, reference_id, user_metadata)
stat_result=stat_p,
enable_safetensors=True,
relative_filename=rel_fname,
)
if metadata:
user_metadata = metadata.to_user_metadata()
set_asset_info_metadata(sess, asset_info_id, user_metadata)
new_level = ENRICHMENT_METADATA
if compute_hash: if full_hash:
try: existing = get_asset_by_hash(sess, full_hash)
digest = compute_blake3_hash(file_path) if existing and existing.id != asset_id:
# TODO: Update asset.hash field reassign_asset_references(sess, asset_id, existing.id, reference_id)
# For now just mark the enrichment level delete_orphaned_seed_asset(sess, asset_id)
new_level = ENRICHMENT_HASHED if mime_type:
except Exception as e: update_asset_hash_and_mime(sess, existing.id, mime_type=mime_type)
logging.warning("Failed to hash %s: %s", file_path, e) else:
update_asset_hash_and_mime(sess, asset_id, full_hash, mime_type)
elif mime_type:
update_asset_hash_and_mime(sess, asset_id, mime_type=mime_type)
bulk_update_enrichment_level(sess, [cache_state_id], new_level) bulk_update_enrichment_level(sess, [reference_id], new_level)
sess.commit() sess.commit()
return new_level return new_level
@ -548,7 +572,7 @@ def enrich_assets_batch(
"""Enrich a batch of assets. """Enrich a batch of assets.
Args: Args:
rows: List of UnenrichedAssetRow from get_unenriched_assets_for_roots rows: List of UnenrichedReferenceRow from get_unenriched_assets_for_roots
extract_metadata: If True, extract metadata for each asset extract_metadata: If True, extract metadata for each asset
compute_hash: If True, compute hash for each asset compute_hash: If True, compute hash for each asset
@ -562,8 +586,8 @@ def enrich_assets_batch(
try: try:
new_level = enrich_asset( new_level = enrich_asset(
file_path=row.file_path, file_path=row.file_path,
cache_state_id=row.cache_state_id, reference_id=row.reference_id,
asset_info_id=row.asset_info_id, asset_id=row.asset_id,
extract_metadata=extract_metadata, extract_metadata=extract_metadata,
compute_hash=compute_hash, compute_hash=compute_hash,
) )

View File

@ -128,7 +128,7 @@ class AssetSeeder:
phase: Scan phase to run (FAST, ENRICH, or FULL for both) phase: Scan phase to run (FAST, ENRICH, or FULL for both)
progress_callback: Optional callback called with progress updates progress_callback: Optional callback called with progress updates
prune_first: If True, prune orphaned assets before scanning prune_first: If True, prune orphaned assets before scanning
compute_hashes: If True, compute blake3 hashes for each file (slow for large files) compute_hashes: If True, compute blake3 hashes (slow)
Returns: Returns:
True if scan was started, False if already running True if scan was started, False if already running
@ -136,7 +136,7 @@ class AssetSeeder:
if self._disabled: if self._disabled:
logging.debug("Asset seeder is disabled, skipping start") logging.debug("Asset seeder is disabled, skipping start")
return False return False
logging.info("Asset seeder start requested (roots=%s, phase=%s)", roots, phase.value) logging.info("Seeder start (roots=%s, phase=%s)", roots, phase.value)
with self._lock: with self._lock:
if self._state != State.IDLE: if self._state != State.IDLE:
logging.info("Asset seeder already running, skipping start") logging.info("Asset seeder already running, skipping start")
@ -295,12 +295,15 @@ class AssetSeeder:
if not self.wait(timeout=timeout): if not self.wait(timeout=timeout):
return False return False
cb = progress_callback if progress_callback is not None else prev_callback
return self.start( return self.start(
roots=roots if roots is not None else prev_roots, roots=roots if roots is not None else prev_roots,
phase=phase if phase is not None else prev_phase, phase=phase if phase is not None else prev_phase,
progress_callback=progress_callback if progress_callback is not None else prev_callback, progress_callback=cb,
prune_first=prune_first if prune_first is not None else prev_prune, prune_first=prune_first if prune_first is not None else prev_prune,
compute_hashes=compute_hashes if compute_hashes is not None else prev_hashes, compute_hashes=(
compute_hashes if compute_hashes is not None else prev_hashes
),
) )
def wait(self, timeout: float | None = None) -> bool: def wait(self, timeout: float | None = None) -> bool:
@ -497,7 +500,7 @@ class AssetSeeder:
all_prefixes = get_all_known_prefixes() all_prefixes = get_all_known_prefixes()
marked = mark_missing_outside_prefixes_safely(all_prefixes) marked = mark_missing_outside_prefixes_safely(all_prefixes)
if marked > 0: if marked > 0:
logging.info("Marked %d cache states as missing before scan", marked) logging.info("Marked %d refs as missing before scan", marked)
if self._check_pause_and_cancel(): if self._check_pause_and_cancel():
logging.info("Asset scan cancelled after pruning phase") logging.info("Asset scan cancelled after pruning phase")
@ -508,7 +511,8 @@ class AssetSeeder:
# Phase 1: Fast scan (stub records) # Phase 1: Fast scan (stub records)
if phase in (ScanPhase.FAST, ScanPhase.FULL): if phase in (ScanPhase.FAST, ScanPhase.FULL):
total_created, skipped_existing, total_paths = self._run_fast_phase(roots) created, skipped, paths = self._run_fast_phase(roots)
total_created, skipped_existing, total_paths = created, skipped, paths
if self._check_pause_and_cancel(): if self._check_pause_and_cancel():
cancelled = True cancelled = True
@ -542,12 +546,8 @@ class AssetSeeder:
elapsed = time.perf_counter() - t_start elapsed = time.perf_counter() - t_start
logging.info( logging.info(
"Asset scan(roots=%s, phase=%s) completed in %.3fs (created=%d, enriched=%d, skipped=%d)", "Scan(%s, %s) done %.3fs: created=%d enriched=%d skipped=%d",
roots, roots, phase.value, elapsed, total_created, total_enriched,
phase.value,
elapsed,
total_created,
total_enriched,
skipped_existing, skipped_existing,
) )
@ -668,7 +668,10 @@ class AssetSeeder:
progress_interval = 1.0 progress_interval = 1.0
# Get the target enrichment level based on compute_hashes # Get the target enrichment level based on compute_hashes
target_max_level = ENRICHMENT_STUB if not self._compute_hashes else ENRICHMENT_METADATA if not self._compute_hashes:
target_max_level = ENRICHMENT_STUB
else:
target_max_level = ENRICHMENT_METADATA
self._emit_event( self._emit_event(
"assets.seed.started", "assets.seed.started",

View File

@ -30,11 +30,11 @@ from app.assets.services.schemas import (
AddTagsResult, AddTagsResult,
AssetData, AssetData,
AssetDetailResult, AssetDetailResult,
AssetInfoData,
AssetSummaryData, AssetSummaryData,
DownloadResolutionResult, DownloadResolutionResult,
IngestResult, IngestResult,
ListAssetsResult, ListAssetsResult,
ReferenceData,
RegisterAssetResult, RegisterAssetResult,
RemoveTagsResult, RemoveTagsResult,
SetTagsResult, SetTagsResult,
@ -52,8 +52,8 @@ __all__ = [
"AddTagsResult", "AddTagsResult",
"AssetData", "AssetData",
"AssetDetailResult", "AssetDetailResult",
"AssetInfoData",
"AssetSummaryData", "AssetSummaryData",
"ReferenceData",
"BulkInsertResult", "BulkInsertResult",
"DependencyMissingError", "DependencyMissingError",
"DownloadResolutionResult", "DownloadResolutionResult",

View File

@ -7,23 +7,23 @@ from typing import Sequence
from app.assets.database.models import Asset from app.assets.database.models import Asset
from app.assets.database.queries import ( from app.assets.database.queries import (
asset_exists_by_hash, asset_exists_by_hash,
asset_info_exists_for_asset_id, reference_exists_for_asset_id,
delete_asset_info_by_id, delete_reference_by_id,
fetch_asset_info_and_asset, fetch_reference_and_asset,
fetch_asset_info_asset_and_tags, fetch_reference_asset_and_tags,
get_asset_by_hash as queries_get_asset_by_hash, get_asset_by_hash as queries_get_asset_by_hash,
get_asset_info_by_id, get_reference_by_id,
list_asset_infos_page, list_references_page,
list_cache_states_by_asset_id, list_references_by_asset_id,
set_asset_info_metadata, set_reference_metadata,
set_asset_info_preview, set_reference_preview,
set_asset_info_tags, set_reference_tags,
update_asset_info_access_time, update_reference_access_time,
update_asset_info_name, update_reference_name,
update_asset_info_updated_at, update_reference_updated_at,
) )
from app.assets.helpers import select_best_live_path from app.assets.helpers import select_best_live_path
from app.assets.services.path_utils import compute_filename_for_asset from app.assets.services.path_utils import compute_filename_for_reference
from app.assets.services.schemas import ( from app.assets.services.schemas import (
AssetData, AssetData,
AssetDetailResult, AssetDetailResult,
@ -32,34 +32,34 @@ from app.assets.services.schemas import (
ListAssetsResult, ListAssetsResult,
UserMetadata, UserMetadata,
extract_asset_data, extract_asset_data,
extract_info_data, extract_reference_data,
) )
from app.database.db import create_session from app.database.db import create_session
def get_asset_detail( def get_asset_detail(
asset_info_id: str, reference_id: str,
owner_id: str = "", owner_id: str = "",
) -> AssetDetailResult | None: ) -> AssetDetailResult | None:
with create_session() as session: with create_session() as session:
result = fetch_asset_info_asset_and_tags( result = fetch_reference_asset_and_tags(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
owner_id=owner_id, owner_id=owner_id,
) )
if not result: if not result:
return None return None
info, asset, tags = result ref, asset, tags = result
return AssetDetailResult( return AssetDetailResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tags, tags=tags,
) )
def update_asset_metadata( def update_asset_metadata(
asset_info_id: str, reference_id: str,
name: str | None = None, name: str | None = None,
tags: Sequence[str] | None = None, tags: Sequence[str] | None = None,
user_metadata: UserMetadata = None, user_metadata: UserMetadata = None,
@ -67,58 +67,58 @@ def update_asset_metadata(
owner_id: str = "", owner_id: str = "",
) -> AssetDetailResult: ) -> AssetDetailResult:
with create_session() as session: with create_session() as session:
info = get_asset_info_by_id(session, asset_info_id=asset_info_id) ref = get_reference_by_id(session, reference_id=reference_id)
if not info: if not ref:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
if info.owner_id and info.owner_id != owner_id: if ref.owner_id and ref.owner_id != owner_id:
raise PermissionError("not owner") raise PermissionError("not owner")
touched = False touched = False
if name is not None and name != info.name: if name is not None and name != ref.name:
update_asset_info_name(session, asset_info_id=asset_info_id, name=name) update_reference_name(session, reference_id=reference_id, name=name)
touched = True touched = True
computed_filename = compute_filename_for_asset(session, info.asset_id) computed_filename = compute_filename_for_reference(session, ref)
new_meta: dict | None = None new_meta: dict | None = None
if user_metadata is not None: if user_metadata is not None:
new_meta = dict(user_metadata) new_meta = dict(user_metadata)
elif computed_filename: elif computed_filename:
current_meta = info.user_metadata or {} current_meta = ref.user_metadata or {}
if current_meta.get("filename") != computed_filename: if current_meta.get("filename") != computed_filename:
new_meta = dict(current_meta) new_meta = dict(current_meta)
if new_meta is not None: if new_meta is not None:
if computed_filename: if computed_filename:
new_meta["filename"] = computed_filename new_meta["filename"] = computed_filename
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=asset_info_id, user_metadata=new_meta session, reference_id=reference_id, user_metadata=new_meta
) )
touched = True touched = True
if tags is not None: if tags is not None:
set_asset_info_tags( set_reference_tags(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
tags=tags, tags=tags,
origin=tag_origin, origin=tag_origin,
) )
touched = True touched = True
if touched and user_metadata is None: if touched and user_metadata is None:
update_asset_info_updated_at(session, asset_info_id=asset_info_id) update_reference_updated_at(session, reference_id=reference_id)
result = fetch_asset_info_asset_and_tags( result = fetch_reference_asset_and_tags(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
owner_id=owner_id, owner_id=owner_id,
) )
if not result: if not result:
raise RuntimeError("State changed during update") raise RuntimeError("State changed during update")
info, asset, tag_list = result ref, asset, tag_list = result
detail = AssetDetailResult( detail = AssetDetailResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tag_list, tags=tag_list,
) )
@ -128,16 +128,17 @@ def update_asset_metadata(
def delete_asset_reference( def delete_asset_reference(
asset_info_id: str, reference_id: str,
owner_id: str, owner_id: str,
delete_content_if_orphan: bool = True, delete_content_if_orphan: bool = True,
) -> bool: ) -> bool:
with create_session() as session: with create_session() as session:
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id) ref_row = get_reference_by_id(session, reference_id=reference_id)
asset_id = info_row.asset_id if info_row else None asset_id = ref_row.asset_id if ref_row else None
file_path = ref_row.file_path if ref_row else None
deleted = delete_asset_info_by_id( deleted = delete_reference_by_id(
session, asset_info_id=asset_info_id, owner_id=owner_id session, reference_id=reference_id, owner_id=owner_id
) )
if not deleted: if not deleted:
session.commit() session.commit()
@ -147,16 +148,19 @@ def delete_asset_reference(
session.commit() session.commit()
return True return True
still_exists = asset_info_exists_for_asset_id(session, asset_id=asset_id) still_exists = reference_exists_for_asset_id(session, asset_id=asset_id)
if still_exists: if still_exists:
session.commit() session.commit()
return True return True
# Orphaned asset - delete it and its files # Orphaned asset - delete it and its files
states = list_cache_states_by_asset_id(session, asset_id=asset_id) refs = list_references_by_asset_id(session, asset_id=asset_id)
file_paths = [ file_paths = [
s.file_path for s in (states or []) if getattr(s, "file_path", None) r.file_path for r in (refs or []) if getattr(r, "file_path", None)
] ]
# Also include the just-deleted file path
if file_path:
file_paths.append(file_path)
asset_row = session.get(Asset, asset_id) asset_row = session.get(Asset, asset_id)
if asset_row is not None: if asset_row is not None:
@ -174,32 +178,32 @@ def delete_asset_reference(
def set_asset_preview( def set_asset_preview(
asset_info_id: str, reference_id: str,
preview_asset_id: str | None = None, preview_asset_id: str | None = None,
owner_id: str = "", owner_id: str = "",
) -> AssetDetailResult: ) -> AssetDetailResult:
with create_session() as session: with create_session() as session:
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id) ref_row = get_reference_by_id(session, reference_id=reference_id)
if not info_row: if not ref_row:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
if info_row.owner_id and info_row.owner_id != owner_id: if ref_row.owner_id and ref_row.owner_id != owner_id:
raise PermissionError("not owner") raise PermissionError("not owner")
set_asset_info_preview( set_reference_preview(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
preview_asset_id=preview_asset_id, preview_asset_id=preview_asset_id,
) )
result = fetch_asset_info_asset_and_tags( result = fetch_reference_asset_and_tags(
session, asset_info_id=asset_info_id, owner_id=owner_id session, reference_id=reference_id, owner_id=owner_id
) )
if not result: if not result:
raise RuntimeError("State changed during preview update") raise RuntimeError("State changed during preview update")
info, asset, tags = result ref, asset, tags = result
detail = AssetDetailResult( detail = AssetDetailResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tags, tags=tags,
) )
@ -231,7 +235,7 @@ def list_assets_page(
order: str = "desc", order: str = "desc",
) -> ListAssetsResult: ) -> ListAssetsResult:
with create_session() as session: with create_session() as session:
infos, tag_map, total = list_asset_infos_page( refs, tag_map, total = list_references_page(
session, session,
owner_id=owner_id, owner_id=owner_id,
include_tags=include_tags, include_tags=include_tags,
@ -245,12 +249,12 @@ def list_assets_page(
) )
items: list[AssetSummaryData] = [] items: list[AssetSummaryData] = []
for info in infos: for ref in refs:
items.append( items.append(
AssetSummaryData( AssetSummaryData(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(info.asset), asset=extract_asset_data(ref.asset),
tags=tag_map.get(info.id, []), tags=tag_map.get(ref.id, []),
) )
) )
@ -258,33 +262,40 @@ def list_assets_page(
def resolve_asset_for_download( def resolve_asset_for_download(
asset_info_id: str, reference_id: str,
owner_id: str = "", owner_id: str = "",
) -> DownloadResolutionResult: ) -> DownloadResolutionResult:
with create_session() as session: with create_session() as session:
pair = fetch_asset_info_and_asset( pair = fetch_reference_and_asset(
session, asset_info_id=asset_info_id, owner_id=owner_id session, reference_id=reference_id, owner_id=owner_id
) )
if not pair: if not pair:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
info, asset = pair ref, asset = pair
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
abs_path = select_best_live_path(states)
if not abs_path:
raise FileNotFoundError(
f"No live path for AssetInfo {asset_info_id} (asset id={asset.id}, name={info.name})"
)
update_asset_info_access_time(session, asset_info_id=asset_info_id) # For references with file_path, use that directly
if ref.file_path and os.path.isfile(ref.file_path):
abs_path = ref.file_path
else:
# For API-created refs without file_path, find a path from other refs
refs = list_references_by_asset_id(session, asset_id=asset.id)
abs_path = select_best_live_path(refs)
if not abs_path:
raise FileNotFoundError(
f"No live path for AssetReference {reference_id} "
f"(asset id={asset.id}, name={ref.name})"
)
update_reference_access_time(session, reference_id=reference_id)
session.commit() session.commit()
ctype = ( ctype = (
asset.mime_type asset.mime_type
or mimetypes.guess_type(info.name or abs_path)[0] or mimetypes.guess_type(ref.name or abs_path)[0]
or "application/octet-stream" or "application/octet-stream"
) )
download_name = info.name or os.path.basename(abs_path) download_name = ref.name or os.path.basename(abs_path)
return DownloadResolutionResult( return DownloadResolutionResult(
abs_path=abs_path, abs_path=abs_path,
content_type=ctype, content_type=ctype,

View File

@ -1,6 +1,5 @@
from __future__ import annotations from __future__ import annotations
import logging
import os import os
import uuid import uuid
from dataclasses import dataclass from dataclasses import dataclass
@ -10,17 +9,16 @@ from typing import TYPE_CHECKING, Any, TypedDict
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.queries import ( from app.assets.database.queries import (
bulk_insert_asset_infos_ignore_conflicts,
bulk_insert_assets, bulk_insert_assets,
bulk_insert_cache_states_ignore_conflicts, bulk_insert_references_ignore_conflicts,
bulk_insert_tags_and_meta, bulk_insert_tags_and_meta,
delete_assets_by_ids, delete_assets_by_ids,
get_asset_info_ids_by_ids,
get_cache_states_by_paths_and_asset_ids,
get_existing_asset_ids, get_existing_asset_ids,
get_reference_ids_by_ids,
get_references_by_paths_and_asset_ids,
get_unreferenced_unhashed_asset_ids, get_unreferenced_unhashed_asset_ids,
mark_cache_states_missing_outside_prefixes, mark_references_missing_outside_prefixes,
restore_cache_states_by_paths, restore_references_by_paths,
) )
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
@ -52,21 +50,15 @@ class AssetRow(TypedDict):
created_at: datetime created_at: datetime
class CacheStateRow(TypedDict): class ReferenceRow(TypedDict):
"""Row data for inserting a CacheState.""" """Row data for inserting an AssetReference."""
id: str
asset_id: str asset_id: str
file_path: str file_path: str
mtime_ns: int mtime_ns: int
class AssetInfoRow(TypedDict):
"""Row data for inserting an AssetInfo."""
id: str
owner_id: str owner_id: str
name: str name: str
asset_id: str
preview_id: str | None preview_id: str | None
user_metadata: dict[str, Any] | None user_metadata: dict[str, Any] | None
created_at: datetime created_at: datetime
@ -74,27 +66,10 @@ class AssetInfoRow(TypedDict):
last_access_time: datetime last_access_time: datetime
class AssetInfoRowInternal(TypedDict):
"""Internal row data for AssetInfo with extra tracking fields."""
id: str
owner_id: str
name: str
asset_id: str
preview_id: str | None
user_metadata: dict[str, Any] | None
created_at: datetime
updated_at: datetime
last_access_time: datetime
_tags: list[str]
_filename: str
_extracted_metadata: ExtractedMetadata | None
class TagRow(TypedDict): class TagRow(TypedDict):
"""Row data for inserting a Tag.""" """Row data for inserting a Tag."""
asset_info_id: str asset_reference_id: str
tag_name: str tag_name: str
origin: str origin: str
added_at: datetime added_at: datetime
@ -103,7 +78,7 @@ class TagRow(TypedDict):
class MetadataRow(TypedDict): class MetadataRow(TypedDict):
"""Row data for inserting asset metadata.""" """Row data for inserting asset metadata."""
asset_info_id: str asset_reference_id: str
key: str key: str
ordinal: int ordinal: int
val_str: str | None val_str: str | None
@ -116,9 +91,9 @@ class MetadataRow(TypedDict):
class BulkInsertResult: class BulkInsertResult:
"""Result of bulk asset insertion.""" """Result of bulk asset insertion."""
inserted_infos: int inserted_refs: int
won_states: int won_paths: int
lost_states: int lost_paths: int
def batch_insert_seed_assets( def batch_insert_seed_assets(
@ -138,29 +113,28 @@ def batch_insert_seed_assets(
This function orchestrates: This function orchestrates:
1. Insert seed Assets (hash=NULL) 1. Insert seed Assets (hash=NULL)
2. Claim cache states with ON CONFLICT DO NOTHING 2. Claim references with ON CONFLICT DO NOTHING on file_path
3. Query to find winners (paths where our asset_id was inserted) 3. Query to find winners (paths where our asset_id was inserted)
4. Delete Assets for losers (path already claimed by another asset) 4. Delete Assets for losers (path already claimed by another asset)
5. Insert AssetInfo for winners 5. Insert tags and metadata for successfully inserted references
6. Insert tags and metadata for successfully inserted AssetInfos
Returns: Returns:
BulkInsertResult with inserted_infos, won_states, lost_states BulkInsertResult with inserted_refs, won_paths, lost_paths
""" """
if not specs: if not specs:
return BulkInsertResult(inserted_infos=0, won_states=0, lost_states=0) return BulkInsertResult(inserted_refs=0, won_paths=0, lost_paths=0)
current_time = get_utc_now() current_time = get_utc_now()
asset_rows: list[AssetRow] = [] asset_rows: list[AssetRow] = []
cache_state_rows: list[CacheStateRow] = [] reference_rows: list[ReferenceRow] = []
path_to_asset_id: dict[str, str] = {} path_to_asset_id: dict[str, str] = {}
asset_id_to_info: dict[str, AssetInfoRowInternal] = {} asset_id_to_ref_data: dict[str, dict] = {}
absolute_path_list: list[str] = [] absolute_path_list: list[str] = []
for spec in specs: for spec in specs:
absolute_path = os.path.abspath(spec["abs_path"]) absolute_path = os.path.abspath(spec["abs_path"])
asset_id = str(uuid.uuid4()) asset_id = str(uuid.uuid4())
asset_info_id = str(uuid.uuid4()) reference_id = str(uuid.uuid4())
absolute_path_list.append(absolute_path) absolute_path_list.append(absolute_path)
path_to_asset_id[absolute_path] = asset_id path_to_asset_id[absolute_path] = asset_id
@ -174,13 +148,7 @@ def batch_insert_seed_assets(
"created_at": current_time, "created_at": current_time,
} }
) )
cache_state_rows.append(
{
"asset_id": asset_id,
"file_path": absolute_path,
"mtime_ns": spec["mtime_ns"],
}
)
# Build user_metadata from extracted metadata or fallback to filename # Build user_metadata from extracted metadata or fallback to filename
extracted_metadata = spec.get("metadata") extracted_metadata = spec.get("metadata")
if extracted_metadata: if extracted_metadata:
@ -190,35 +158,43 @@ def batch_insert_seed_assets(
else: else:
user_metadata = None user_metadata = None
asset_id_to_info[asset_id] = { reference_rows.append(
"id": asset_info_id, {
"owner_id": owner_id, "id": reference_id,
"name": spec["info_name"], "asset_id": asset_id,
"asset_id": asset_id, "file_path": absolute_path,
"preview_id": None, "mtime_ns": spec["mtime_ns"],
"user_metadata": user_metadata, "owner_id": owner_id,
"created_at": current_time, "name": spec["info_name"],
"updated_at": current_time, "preview_id": None,
"last_access_time": current_time, "user_metadata": user_metadata,
"_tags": spec["tags"], "created_at": current_time,
"_filename": spec["fname"], "updated_at": current_time,
"_extracted_metadata": extracted_metadata, "last_access_time": current_time,
}
)
asset_id_to_ref_data[asset_id] = {
"reference_id": reference_id,
"tags": spec["tags"],
"filename": spec["fname"],
"extracted_metadata": extracted_metadata,
} }
bulk_insert_assets(session, asset_rows) bulk_insert_assets(session, asset_rows)
# Filter cache states to only those whose assets were actually inserted # Filter reference rows to only those whose assets were actually inserted
# (assets with duplicate hashes are silently dropped by ON CONFLICT DO NOTHING) # (assets with duplicate hashes are silently dropped by ON CONFLICT DO NOTHING)
inserted_asset_ids = get_existing_asset_ids( inserted_asset_ids = get_existing_asset_ids(
session, [r["asset_id"] for r in cache_state_rows] session, [r["asset_id"] for r in reference_rows]
) )
cache_state_rows = [ reference_rows = [
r for r in cache_state_rows if r["asset_id"] in inserted_asset_ids r for r in reference_rows if r["asset_id"] in inserted_asset_ids
] ]
bulk_insert_cache_states_ignore_conflicts(session, cache_state_rows) bulk_insert_references_ignore_conflicts(session, reference_rows)
restore_cache_states_by_paths(session, absolute_path_list) restore_references_by_paths(session, absolute_path_list)
winning_paths = get_cache_states_by_paths_and_asset_ids(session, path_to_asset_id) winning_paths = get_references_by_paths_and_asset_ids(session, path_to_asset_id)
all_paths_set = set(absolute_path_list) all_paths_set = set(absolute_path_list)
losing_paths = all_paths_set - winning_paths losing_paths = all_paths_set - winning_paths
@ -229,44 +205,34 @@ def batch_insert_seed_assets(
if not winning_paths: if not winning_paths:
return BulkInsertResult( return BulkInsertResult(
inserted_infos=0, inserted_refs=0,
won_states=0, won_paths=0,
lost_states=len(losing_paths), lost_paths=len(losing_paths),
) )
winner_info_rows = [ # Get reference IDs for winners
asset_id_to_info[path_to_asset_id[path]] for path in winning_paths winning_ref_ids = [
asset_id_to_ref_data[path_to_asset_id[path]]["reference_id"]
for path in winning_paths
] ]
database_info_rows: list[AssetInfoRow] = [ inserted_ref_ids = get_reference_ids_by_ids(session, winning_ref_ids)
{
"id": info_row["id"],
"owner_id": info_row["owner_id"],
"name": info_row["name"],
"asset_id": info_row["asset_id"],
"preview_id": info_row["preview_id"],
"user_metadata": info_row["user_metadata"],
"created_at": info_row["created_at"],
"updated_at": info_row["updated_at"],
"last_access_time": info_row["last_access_time"],
}
for info_row in winner_info_rows
]
bulk_insert_asset_infos_ignore_conflicts(session, database_info_rows)
all_info_ids = [info_row["id"] for info_row in winner_info_rows]
inserted_info_ids = get_asset_info_ids_by_ids(session, all_info_ids)
tag_rows: list[TagRow] = [] tag_rows: list[TagRow] = []
metadata_rows: list[MetadataRow] = [] metadata_rows: list[MetadataRow] = []
if inserted_info_ids:
for info_row in winner_info_rows: if inserted_ref_ids:
info_id = info_row["id"] for path in winning_paths:
if info_id not in inserted_info_ids: asset_id = path_to_asset_id[path]
ref_data = asset_id_to_ref_data[asset_id]
ref_id = ref_data["reference_id"]
if ref_id not in inserted_ref_ids:
continue continue
for tag in info_row["_tags"]:
for tag in ref_data["tags"]:
tag_rows.append( tag_rows.append(
{ {
"asset_info_id": info_id, "asset_reference_id": ref_id,
"tag_name": tag, "tag_name": tag,
"origin": "automatic", "origin": "automatic",
"added_at": current_time, "added_at": current_time,
@ -274,17 +240,17 @@ def batch_insert_seed_assets(
) )
# Use extracted metadata for meta rows if available # Use extracted metadata for meta rows if available
extracted_metadata = info_row.get("_extracted_metadata") extracted_metadata = ref_data.get("extracted_metadata")
if extracted_metadata: if extracted_metadata:
metadata_rows.extend(extracted_metadata.to_meta_rows(info_id)) metadata_rows.extend(extracted_metadata.to_meta_rows(ref_id))
elif info_row["_filename"]: elif ref_data["filename"]:
# Fallback: just store filename # Fallback: just store filename
metadata_rows.append( metadata_rows.append(
{ {
"asset_info_id": info_id, "asset_reference_id": ref_id,
"key": "filename", "key": "filename",
"ordinal": 0, "ordinal": 0,
"val_str": info_row["_filename"], "val_str": ref_data["filename"],
"val_num": None, "val_num": None,
"val_bool": None, "val_bool": None,
"val_json": None, "val_json": None,
@ -294,40 +260,36 @@ def batch_insert_seed_assets(
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=metadata_rows) bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=metadata_rows)
return BulkInsertResult( return BulkInsertResult(
inserted_infos=len(inserted_info_ids), inserted_refs=len(inserted_ref_ids),
won_states=len(winning_paths), won_paths=len(winning_paths),
lost_states=len(losing_paths), lost_paths=len(losing_paths),
) )
def mark_assets_missing_outside_prefixes( def mark_assets_missing_outside_prefixes(
session: Session, valid_prefixes: list[str] session: Session, valid_prefixes: list[str]
) -> int: ) -> int:
"""Mark cache states as missing when outside valid prefixes. """Mark references as missing when outside valid prefixes.
This is a non-destructive operation that soft-deletes cache states This is a non-destructive operation that soft-deletes references
by setting is_missing=True. User metadata is preserved and assets by setting is_missing=True. User metadata is preserved and assets
can be restored if the file reappears in a future scan. can be restored if the file reappears in a future scan.
Note: This does NOT delete
unreferenced unhashed assets. Those are preserved so user metadata
remains intact even when base directories change.
Args: Args:
session: Database session session: Database session
valid_prefixes: List of absolute directory prefixes that are valid valid_prefixes: List of absolute directory prefixes that are valid
Returns: Returns:
Number of cache states marked as missing Number of references marked as missing
""" """
return mark_cache_states_missing_outside_prefixes(session, valid_prefixes) return mark_references_missing_outside_prefixes(session, valid_prefixes)
def cleanup_unreferenced_assets(session: Session) -> int: def cleanup_unreferenced_assets(session: Session) -> int:
"""Hard-delete unhashed assets with no active cache states. """Hard-delete unhashed assets with no active references.
This is a destructive operation intended for explicit cleanup. This is a destructive operation intended for explicit cleanup.
Only deletes assets where hash=None and all cache states are missing. Only deletes assets where hash=None and all references are missing.
Returns: Returns:
Number of assets deleted Number of assets deleted

View File

@ -8,24 +8,23 @@ from sqlalchemy import select
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
import app.assets.services.hashing as hashing import app.assets.services.hashing as hashing
from app.assets.database.models import Asset, AssetInfo, Tag from app.assets.database.models import Asset, AssetReference, Tag
from app.assets.database.queries import ( from app.assets.database.queries import (
add_tags_to_asset_info, add_tags_to_reference,
fetch_asset_info_and_asset, fetch_reference_and_asset,
get_asset_by_hash, get_asset_by_hash,
get_asset_tags, get_reference_tags,
get_or_create_asset_info, get_or_create_reference,
remove_missing_tag_for_asset_id, remove_missing_tag_for_asset_id,
set_asset_info_metadata, set_reference_metadata,
set_asset_info_tags, set_reference_tags,
update_asset_info_timestamps,
upsert_asset, upsert_asset,
upsert_cache_state, upsert_reference,
) )
from app.assets.helpers import normalize_tags from app.assets.helpers import normalize_tags
from app.assets.services.file_utils import get_size_and_mtime_ns from app.assets.services.file_utils import get_size_and_mtime_ns
from app.assets.services.path_utils import ( from app.assets.services.path_utils import (
compute_filename_for_asset, compute_filename_for_reference,
resolve_destination_from_tags, resolve_destination_from_tags,
validate_path_within_base, validate_path_within_base,
) )
@ -35,7 +34,7 @@ from app.assets.services.schemas import (
UploadResult, UploadResult,
UserMetadata, UserMetadata,
extract_asset_data, extract_asset_data,
extract_info_data, extract_reference_data,
) )
from app.database.db import create_session from app.database.db import create_session
@ -58,9 +57,9 @@ def _ingest_file_from_path(
asset_created = False asset_created = False
asset_updated = False asset_updated = False
state_created = False ref_created = False
state_updated = False ref_updated = False
asset_info_id: str | None = None reference_id: str | None = None
with create_session() as session: with create_session() as session:
if preview_id: if preview_id:
@ -74,49 +73,42 @@ def _ingest_file_from_path(
mime_type=mime_type, mime_type=mime_type,
) )
state_created, state_updated = upsert_cache_state( ref_created, ref_updated = upsert_reference(
session, session,
asset_id=asset.id, asset_id=asset.id,
file_path=locator, file_path=locator,
name=info_name or os.path.basename(locator),
mtime_ns=mtime_ns, mtime_ns=mtime_ns,
owner_id=owner_id,
) )
if info_name: # Get the reference we just created/updated
info, info_created = get_or_create_asset_info( from app.assets.database.queries import get_reference_by_file_path
session, ref = get_reference_by_file_path(session, locator)
asset_id=asset.id, if ref:
owner_id=owner_id, reference_id = ref.id
name=info_name,
preview_id=preview_id, if preview_id and ref.preview_id != preview_id:
) ref.preview_id = preview_id
if info_created:
asset_info_id = info.id
else:
update_asset_info_timestamps(
session, asset_info=info, preview_id=preview_id
)
asset_info_id = info.id
norm = normalize_tags(list(tags)) norm = normalize_tags(list(tags))
if norm and asset_info_id: if norm:
if require_existing_tags: if require_existing_tags:
_validate_tags_exist(session, norm) _validate_tags_exist(session, norm)
add_tags_to_asset_info( add_tags_to_reference(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
tags=norm, tags=norm,
origin=tag_origin, origin=tag_origin,
create_if_missing=not require_existing_tags, create_if_missing=not require_existing_tags,
) )
if asset_info_id: _update_metadata_with_filename(
_update_metadata_with_filename( session,
session, reference_id=reference_id,
asset_info_id=asset_info_id, ref=ref,
asset_id=asset.id, user_metadata=user_metadata,
info=info, )
user_metadata=user_metadata,
)
try: try:
remove_missing_tag_for_asset_id(session, asset_id=asset.id) remove_missing_tag_for_asset_id(session, asset_id=asset.id)
@ -128,9 +120,9 @@ def _ingest_file_from_path(
return IngestResult( return IngestResult(
asset_created=asset_created, asset_created=asset_created,
asset_updated=asset_updated, asset_updated=asset_updated,
state_created=state_created, ref_created=ref_created,
state_updated=state_updated, ref_updated=ref_updated,
asset_info_id=asset_info_id, reference_id=reference_id,
) )
@ -147,18 +139,17 @@ def _register_existing_asset(
if not asset: if not asset:
raise ValueError(f"No asset with hash {asset_hash}") raise ValueError(f"No asset with hash {asset_hash}")
info, info_created = get_or_create_asset_info( ref, ref_created = get_or_create_reference(
session, session,
asset_id=asset.id, asset_id=asset.id,
owner_id=owner_id, owner_id=owner_id,
name=name, name=name,
preview_id=None,
) )
if not info_created: if not ref_created:
tag_names = get_asset_tags(session, asset_info_id=info.id) tag_names = get_reference_tags(session, reference_id=ref.id)
result = RegisterAssetResult( result = RegisterAssetResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tag_names, tags=tag_names,
created=False, created=False,
@ -167,29 +158,29 @@ def _register_existing_asset(
return result return result
new_meta = dict(user_metadata or {}) new_meta = dict(user_metadata or {})
computed_filename = compute_filename_for_asset(session, asset.id) computed_filename = compute_filename_for_reference(session, ref)
if computed_filename: if computed_filename:
new_meta["filename"] = computed_filename new_meta["filename"] = computed_filename
if new_meta: if new_meta:
set_asset_info_metadata( set_reference_metadata(
session, session,
asset_info_id=info.id, reference_id=ref.id,
user_metadata=new_meta, user_metadata=new_meta,
) )
if tags is not None: if tags is not None:
set_asset_info_tags( set_reference_tags(
session, session,
asset_info_id=info.id, reference_id=ref.id,
tags=tags, tags=tags,
origin=tag_origin, origin=tag_origin,
) )
tag_names = get_asset_tags(session, asset_info_id=info.id) tag_names = get_reference_tags(session, reference_id=ref.id)
session.refresh(info) session.refresh(ref)
result = RegisterAssetResult( result = RegisterAssetResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tag_names, tags=tag_names,
created=True, created=True,
@ -211,14 +202,13 @@ def _validate_tags_exist(session: Session, tags: list[str]) -> None:
def _update_metadata_with_filename( def _update_metadata_with_filename(
session: Session, session: Session,
asset_info_id: str, reference_id: str,
asset_id: str, ref: AssetReference,
info: AssetInfo,
user_metadata: UserMetadata, user_metadata: UserMetadata,
) -> None: ) -> None:
computed_filename = compute_filename_for_asset(session, asset_id) computed_filename = compute_filename_for_reference(session, ref)
current_meta = info.user_metadata or {} current_meta = ref.user_metadata or {}
new_meta = dict(current_meta) new_meta = dict(current_meta)
if user_metadata: if user_metadata:
for k, v in user_metadata.items(): for k, v in user_metadata.items():
@ -227,9 +217,9 @@ def _update_metadata_with_filename(
new_meta["filename"] = computed_filename new_meta["filename"] = computed_filename
if new_meta != current_meta: if new_meta != current_meta:
set_asset_info_metadata( set_reference_metadata(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
user_metadata=new_meta, user_metadata=new_meta,
) )
@ -287,7 +277,7 @@ def upload_from_temp_path(
owner_id=owner_id, owner_id=owner_id,
) )
return UploadResult( return UploadResult(
info=result.info, ref=result.ref,
asset=result.asset, asset=result.asset,
tags=result.tags, tags=result.tags,
created_new=False, created_new=False,
@ -334,21 +324,21 @@ def upload_from_temp_path(
tag_origin="manual", tag_origin="manual",
require_existing_tags=False, require_existing_tags=False,
) )
info_id = ingest_result.asset_info_id reference_id = ingest_result.reference_id
if not info_id: if not reference_id:
raise RuntimeError("failed to create asset metadata") raise RuntimeError("failed to create asset reference")
with create_session() as session: with create_session() as session:
pair = fetch_asset_info_and_asset( pair = fetch_reference_and_asset(
session, asset_info_id=info_id, owner_id=owner_id session, reference_id=reference_id, owner_id=owner_id
) )
if not pair: if not pair:
raise RuntimeError("inconsistent DB state after ingest") raise RuntimeError("inconsistent DB state after ingest")
info, asset = pair ref, asset = pair
tag_names = get_asset_tags(session, asset_info_id=info.id) tag_names = get_reference_tags(session, reference_id=ref.id)
return UploadResult( return UploadResult(
info=extract_info_data(info), ref=extract_reference_data(ref),
asset=extract_asset_data(asset), asset=extract_asset_data(asset),
tags=tag_names, tags=tag_names,
created_new=ingest_result.asset_created, created_new=ingest_result.asset_created,
@ -381,7 +371,7 @@ def create_from_hash(
) )
return UploadResult( return UploadResult(
info=result.info, ref=result.ref,
asset=result.asset, asset=result.asset,
tags=result.tags, tags=result.tags,
created_new=False, created_new=False,

View File

@ -52,6 +52,7 @@ class ExtractedMetadata:
# Tier 1: Filesystem (always available) # Tier 1: Filesystem (always available)
filename: str = "" filename: str = ""
file_path: str = "" # Full absolute path to the file
content_length: int = 0 content_length: int = 0
content_type: str | None = None content_type: str | None = None
format: str = "" # file extension without dot format: str = "" # file extension without dot
@ -76,12 +77,14 @@ class ExtractedMetadata:
resolve_url: str | None = None resolve_url: str | None = None
def to_user_metadata(self) -> dict[str, Any]: def to_user_metadata(self) -> dict[str, Any]:
"""Convert to user_metadata dict for AssetInfo.user_metadata JSON field.""" """Convert to user_metadata dict for AssetReference.user_metadata JSON field."""
data: dict[str, Any] = { data: dict[str, Any] = {
"filename": self.filename, "filename": self.filename,
"content_length": self.content_length, "content_length": self.content_length,
"format": self.format, "format": self.format,
} }
if self.file_path:
data["file_path"] = self.file_path
if self.content_type: if self.content_type:
data["content_type"] = self.content_type data["content_type"] = self.content_type
@ -119,14 +122,14 @@ class ExtractedMetadata:
return data return data
def to_meta_rows(self, asset_info_id: str) -> list[dict]: def to_meta_rows(self, reference_id: str) -> list[dict]:
"""Convert to asset_info_meta rows for typed/indexed querying.""" """Convert to asset_reference_meta rows for typed/indexed querying."""
rows: list[dict] = [] rows: list[dict] = []
def add_str(key: str, val: str | None, ordinal: int = 0) -> None: def add_str(key: str, val: str | None, ordinal: int = 0) -> None:
if val: if val:
rows.append({ rows.append({
"asset_info_id": asset_info_id, "asset_reference_id": reference_id,
"key": key, "key": key,
"ordinal": ordinal, "ordinal": ordinal,
"val_str": val[:2048] if len(val) > 2048 else val, "val_str": val[:2048] if len(val) > 2048 else val,
@ -138,7 +141,7 @@ class ExtractedMetadata:
def add_num(key: str, val: int | float | None) -> None: def add_num(key: str, val: int | float | None) -> None:
if val is not None: if val is not None:
rows.append({ rows.append({
"asset_info_id": asset_info_id, "asset_reference_id": reference_id,
"key": key, "key": key,
"ordinal": 0, "ordinal": 0,
"val_str": None, "val_str": None,
@ -150,7 +153,7 @@ class ExtractedMetadata:
def add_bool(key: str, val: bool | None) -> None: def add_bool(key: str, val: bool | None) -> None:
if val is not None: if val is not None:
rows.append({ rows.append({
"asset_info_id": asset_info_id, "asset_reference_id": reference_id,
"key": key, "key": key,
"ordinal": 0, "ordinal": 0,
"val_str": None, "val_str": None,
@ -168,7 +171,8 @@ class ExtractedMetadata:
# Tier 2 # Tier 2
add_str("base_model", self.base_model) add_str("base_model", self.base_model)
add_str("air", self.air) add_str("air", self.air)
add_bool("has_preview_images", self.has_preview_images if self.has_preview_images else None) has_previews = self.has_preview_images if self.has_preview_images else None
add_bool("has_preview_images", has_previews)
# trained_words as multiple rows with ordinals # trained_words as multiple rows with ordinals
if self.trained_words: if self.trained_words:
@ -191,7 +195,9 @@ class ExtractedMetadata:
return rows return rows
def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE) -> dict[str, Any] | None: def _read_safetensors_header(
path: str, max_size: int = MAX_SAFETENSORS_HEADER_SIZE
) -> dict[str, Any] | None:
"""Read only the JSON header from a safetensors file. """Read only the JSON header from a safetensors file.
This is very fast - reads 8 bytes for header length, then the JSON header. This is very fast - reads 8 bytes for header length, then the JSON header.
@ -220,7 +226,9 @@ def _read_safetensors_header(path: str, max_size: int = MAX_SAFETENSORS_HEADER_S
return None return None
def _extract_safetensors_metadata(header: dict[str, Any], meta: ExtractedMetadata) -> None: def _extract_safetensors_metadata(
header: dict[str, Any], meta: ExtractedMetadata
) -> None:
"""Extract metadata from safetensors header __metadata__ section. """Extract metadata from safetensors header __metadata__ section.
Modifies meta in-place. Modifies meta in-place.
@ -230,7 +238,11 @@ def _extract_safetensors_metadata(header: dict[str, Any], meta: ExtractedMetadat
return return
# Common model metadata # Common model metadata
meta.base_model = st_meta.get("ss_base_model_version") or st_meta.get("modelspec.base_model") or st_meta.get("base_model") meta.base_model = (
st_meta.get("ss_base_model_version")
or st_meta.get("modelspec.base_model")
or st_meta.get("base_model")
)
# Trained words / trigger words # Trained words / trigger words
trained_words = st_meta.get("ss_tag_frequency") trained_words = st_meta.get("ss_tag_frequency")
@ -304,8 +316,8 @@ def extract_file_metadata(
meta = ExtractedMetadata() meta = ExtractedMetadata()
# Tier 1: Filesystem metadata # Tier 1: Filesystem metadata
# Use relative_filename if provided (for backward compatibility with existing behavior) meta.filename = relative_filename or os.path.basename(abs_path)
meta.filename = relative_filename if relative_filename else os.path.basename(abs_path) meta.file_path = abs_path
_, ext = os.path.splitext(abs_path) _, ext = os.path.splitext(abs_path)
meta.format = ext.lstrip(".").lower() if ext else "" meta.format = ext.lstrip(".").lower() if ext else ""
@ -333,6 +345,6 @@ def extract_file_metadata(
try: try:
_extract_safetensors_metadata(header, meta) _extract_safetensors_metadata(header, meta)
except Exception as e: except Exception as e:
logging.debug("Failed to extract safetensors metadata from %s: %s", abs_path, e) logging.debug("Safetensors meta extract failed %s: %s", abs_path, e)
return meta return meta

View File

@ -7,18 +7,15 @@ from app.assets.helpers import normalize_tags
def get_comfy_models_folders() -> list[tuple[str, list[str]]]: def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
"""Build a list of (folder_name, base_paths[]) categories that are configured for model locations. """Build list of (folder_name, base_paths[]) for model locations.
We trust `folder_paths.folder_names_and_paths` and include a category if Includes a category if any of its base paths lies under models_dir.
*any* of its base paths lies under the Comfy `models_dir`.
""" """
targets: list[tuple[str, list[str]]] = [] targets: list[tuple[str, list[str]]] = []
models_root = os.path.abspath(folder_paths.models_dir) models_root = os.path.abspath(folder_paths.models_dir)
for name, values in folder_paths.folder_names_and_paths.items(): for name, values in folder_paths.folder_names_and_paths.items():
paths, _exts = ( # Unpack carefully to handle nodepacks that modify folder_paths
values[0], paths, _exts = values[0], values[1]
values[1],
) # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI
if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths): if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths):
targets.append((name, paths)) targets.append((name, paths))
return targets return targets
@ -70,7 +67,6 @@ def compute_relative_filename(file_path: str) -> str | None:
/.../models/text_encoders/clip_g.safetensors -> "clip_g.safetensors" /.../models/text_encoders/clip_g.safetensors -> "clip_g.safetensors"
For non-model paths, returns None. For non-model paths, returns None.
NOTE: this is a temporary helper, used only for initializing metadata["filename"] field.
""" """
try: try:
root_category, rel_path = get_asset_category_and_relative_path(file_path) root_category, rel_path = get_asset_category_and_relative_path(file_path)
@ -92,18 +88,18 @@ def compute_relative_filename(file_path: str) -> str | None:
def get_asset_category_and_relative_path( def get_asset_category_and_relative_path(
file_path: str, file_path: str,
) -> tuple[Literal["input", "output", "models"], str]: ) -> tuple[Literal["input", "output", "models"], str]:
"""Given an absolute or relative file path, determine which root category the path belongs to: """Determine which root category a file path belongs to.
- 'input' if the file resides under `folder_paths.get_input_directory()`
- 'output' if the file resides under `folder_paths.get_output_directory()` Categories:
- 'models' if the file resides under any base path of categories returned by `get_comfy_models_folders()` - 'input': under folder_paths.get_input_directory()
- 'output': under folder_paths.get_output_directory()
- 'models': under any base path from get_comfy_models_folders()
Returns: Returns:
(root_category, relative_path_inside_that_root) (root_category, relative_path_inside_that_root)
For 'models', the relative path is prefixed with the category name:
e.g. ('models', 'vae/test/sub/ae.safetensors')
Raises: Raises:
ValueError: if the path does not belong to input, output, or configured model bases. ValueError: path does not belong to any known root.
""" """
fp_abs = os.path.abspath(file_path) fp_abs = os.path.abspath(file_path)
@ -149,32 +145,35 @@ def get_asset_category_and_relative_path(
) )
def compute_filename_for_reference(session, ref) -> str | None:
"""Compute the relative filename for an asset reference.
Uses the file_path from the reference if available.
"""
if ref.file_path:
return compute_relative_filename(ref.file_path)
return None
def compute_filename_for_asset(session, asset_id: str) -> str | None: def compute_filename_for_asset(session, asset_id: str) -> str | None:
"""Compute the relative filename for an asset from its best live cache state path.""" """Compute the relative filename for an asset from its best live reference path."""
from app.assets.database.queries import list_cache_states_by_asset_id from app.assets.database.queries import list_references_by_asset_id
from app.assets.helpers import select_best_live_path from app.assets.helpers import select_best_live_path
primary_path = select_best_live_path( primary_path = select_best_live_path(
list_cache_states_by_asset_id(session, asset_id=asset_id) list_references_by_asset_id(session, asset_id=asset_id)
) )
return compute_relative_filename(primary_path) if primary_path else None return compute_relative_filename(primary_path) if primary_path else None
def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
"""Return a tuple (name, tags) derived from a filesystem path. """Return (name, tags) derived from a filesystem path.
Semantics: - name: base filename with extension
- Root category is determined by `get_asset_category_and_relative_path`. - tags: [root_category] + parent folder names in order
- The returned `name` is the base filename with extension from the relative path.
- The returned `tags` are:
[root_category] + parent folders of the relative path (in order)
For 'models', this means:
file '/.../ModelsDir/vae/test_tag/ae.safetensors'
-> root_category='models', some_path='vae/test_tag/ae.safetensors'
-> name='ae.safetensors', tags=['models', 'vae', 'test_tag']
Raises: Raises:
ValueError: if the path does not belong to input, output, or configured model bases. ValueError: path does not belong to any known root.
""" """
root_category, some_path = get_asset_category_and_relative_path(file_path) root_category, some_path = get_asset_category_and_relative_path(file_path)
p = Path(some_path) p = Path(some_path)

View File

@ -2,7 +2,7 @@ from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from typing import Any, NamedTuple from typing import Any, NamedTuple
from app.assets.database.models import Asset, AssetInfo from app.assets.database.models import Asset, AssetReference
UserMetadata = dict[str, Any] | None UserMetadata = dict[str, Any] | None
@ -15,9 +15,12 @@ class AssetData:
@dataclass(frozen=True) @dataclass(frozen=True)
class AssetInfoData: class ReferenceData:
"""Data transfer object for AssetReference."""
id: str id: str
name: str name: str
file_path: str | None
user_metadata: UserMetadata user_metadata: UserMetadata
preview_id: str | None preview_id: str | None
created_at: datetime created_at: datetime
@ -27,14 +30,14 @@ class AssetInfoData:
@dataclass(frozen=True) @dataclass(frozen=True)
class AssetDetailResult: class AssetDetailResult:
info: AssetInfoData ref: ReferenceData
asset: AssetData | None asset: AssetData | None
tags: list[str] tags: list[str]
@dataclass(frozen=True) @dataclass(frozen=True)
class RegisterAssetResult: class RegisterAssetResult:
info: AssetInfoData ref: ReferenceData
asset: AssetData asset: AssetData
tags: list[str] tags: list[str]
created: bool created: bool
@ -44,9 +47,9 @@ class RegisterAssetResult:
class IngestResult: class IngestResult:
asset_created: bool asset_created: bool
asset_updated: bool asset_updated: bool
state_created: bool ref_created: bool
state_updated: bool ref_updated: bool
asset_info_id: str | None reference_id: str | None
@dataclass(frozen=True) @dataclass(frozen=True)
@ -78,7 +81,7 @@ class TagUsage(NamedTuple):
@dataclass(frozen=True) @dataclass(frozen=True)
class AssetSummaryData: class AssetSummaryData:
info: AssetInfoData ref: ReferenceData
asset: AssetData | None asset: AssetData | None
tags: list[str] tags: list[str]
@ -98,21 +101,22 @@ class DownloadResolutionResult:
@dataclass(frozen=True) @dataclass(frozen=True)
class UploadResult: class UploadResult:
info: AssetInfoData ref: ReferenceData
asset: AssetData asset: AssetData
tags: list[str] tags: list[str]
created_new: bool created_new: bool
def extract_info_data(info: AssetInfo) -> AssetInfoData: def extract_reference_data(ref: AssetReference) -> ReferenceData:
return AssetInfoData( return ReferenceData(
id=info.id, id=ref.id,
name=info.name, name=ref.name,
user_metadata=info.user_metadata, file_path=ref.file_path,
preview_id=info.preview_id, user_metadata=ref.user_metadata,
created_at=info.created_at, preview_id=ref.preview_id,
updated_at=info.updated_at, created_at=ref.created_at,
last_access_time=info.last_access_time, updated_at=ref.updated_at,
last_access_time=ref.last_access_time,
) )

View File

@ -1,33 +1,33 @@
from app.assets.database.queries import ( from app.assets.database.queries import (
add_tags_to_asset_info, add_tags_to_reference,
get_asset_info_by_id, get_reference_by_id,
list_tags_with_usage, list_tags_with_usage,
remove_tags_from_asset_info, remove_tags_from_reference,
) )
from app.assets.services.schemas import AddTagsResult, RemoveTagsResult, TagUsage from app.assets.services.schemas import AddTagsResult, RemoveTagsResult, TagUsage
from app.database.db import create_session from app.database.db import create_session
def apply_tags( def apply_tags(
asset_info_id: str, reference_id: str,
tags: list[str], tags: list[str],
origin: str = "manual", origin: str = "manual",
owner_id: str = "", owner_id: str = "",
) -> AddTagsResult: ) -> AddTagsResult:
with create_session() as session: with create_session() as session:
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id) ref_row = get_reference_by_id(session, reference_id=reference_id)
if not info_row: if not ref_row:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
if info_row.owner_id and info_row.owner_id != owner_id: if ref_row.owner_id and ref_row.owner_id != owner_id:
raise PermissionError("not owner") raise PermissionError("not owner")
data = add_tags_to_asset_info( data = add_tags_to_reference(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
tags=tags, tags=tags,
origin=origin, origin=origin,
create_if_missing=True, create_if_missing=True,
asset_info_row=info_row, reference_row=ref_row,
) )
session.commit() session.commit()
@ -39,20 +39,20 @@ def apply_tags(
def remove_tags( def remove_tags(
asset_info_id: str, reference_id: str,
tags: list[str], tags: list[str],
owner_id: str = "", owner_id: str = "",
) -> RemoveTagsResult: ) -> RemoveTagsResult:
with create_session() as session: with create_session() as session:
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id) ref_row = get_reference_by_id(session, reference_id=reference_id)
if not info_row: if not ref_row:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetReference {reference_id} not found")
if info_row.owner_id and info_row.owner_id != owner_id: if ref_row.owner_id and ref_row.owner_id != owner_id:
raise PermissionError("not owner") raise PermissionError("not owner")
data = remove_tags_from_asset_info( data = remove_tags_from_reference(
session, session,
asset_info_id=asset_info_id, reference_id=reference_id,
tags=tags, tags=tags,
) )
session.commit() session.commit()

View File

@ -3,24 +3,24 @@ import uuid
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
from app.assets.database.queries import ( from app.assets.database.queries import (
asset_info_exists_for_asset_id, reference_exists_for_asset_id,
get_asset_info_by_id, get_reference_by_id,
insert_asset_info, insert_reference,
get_or_create_asset_info, get_or_create_reference,
update_asset_info_timestamps, update_reference_timestamps,
list_asset_infos_page, list_references_page,
fetch_asset_info_asset_and_tags, fetch_reference_asset_and_tags,
fetch_asset_info_and_asset, fetch_reference_and_asset,
update_asset_info_access_time, update_reference_access_time,
set_asset_info_metadata, set_reference_metadata,
delete_asset_info_by_id, delete_reference_by_id,
set_asset_info_preview, set_reference_preview,
bulk_insert_asset_infos_ignore_conflicts, bulk_insert_references_ignore_conflicts,
get_asset_info_ids_by_ids, get_reference_ids_by_ids,
ensure_tags_exist, ensure_tags_exist,
add_tags_to_asset_info, add_tags_to_reference,
) )
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
@ -32,14 +32,14 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
return asset return asset
def _make_asset_info( def _make_reference(
session: Session, session: Session,
asset: Asset, asset: Asset,
name: str = "test", name: str = "test",
owner_id: str = "", owner_id: str = "",
) -> AssetInfo: ) -> AssetReference:
now = get_utc_now() now = get_utc_now()
info = AssetInfo( ref = AssetReference(
owner_id=owner_id, owner_id=owner_id,
name=name, name=name,
asset_id=asset.id, asset_id=asset.id,
@ -47,381 +47,386 @@ def _make_asset_info(
updated_at=now, updated_at=now,
last_access_time=now, last_access_time=now,
) )
session.add(info) session.add(ref)
session.flush() session.flush()
return info return ref
class TestAssetInfoExistsForAssetId: class TestReferenceExistsForAssetId:
def test_returns_false_when_no_info(self, session: Session): def test_returns_false_when_no_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is False assert reference_exists_for_asset_id(session, asset_id=asset.id) is False
def test_returns_true_when_info_exists(self, session: Session): def test_returns_true_when_reference_exists(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset) _make_reference(session, asset)
assert asset_info_exists_for_asset_id(session, asset_id=asset.id) is True assert reference_exists_for_asset_id(session, asset_id=asset.id) is True
class TestGetAssetInfoById: class TestGetReferenceById:
def test_returns_none_for_nonexistent(self, session: Session): def test_returns_none_for_nonexistent(self, session: Session):
assert get_asset_info_by_id(session, asset_info_id="nonexistent") is None assert get_reference_by_id(session, reference_id="nonexistent") is None
def test_returns_info(self, session: Session): def test_returns_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="myfile.txt") ref = _make_reference(session, asset, name="myfile.txt")
result = get_asset_info_by_id(session, asset_info_id=info.id) result = get_reference_by_id(session, reference_id=ref.id)
assert result is not None assert result is not None
assert result.name == "myfile.txt" assert result.name == "myfile.txt"
class TestListAssetInfosPage: class TestListReferencesPage:
def test_empty_db(self, session: Session): def test_empty_db(self, session: Session):
infos, tag_map, total = list_asset_infos_page(session) refs, tag_map, total = list_references_page(session)
assert infos == [] assert refs == []
assert tag_map == {} assert tag_map == {}
assert total == 0 assert total == 0
def test_returns_infos_with_tags(self, session: Session): def test_returns_references_with_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="test.bin") ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["alpha", "beta"]) ensure_tags_exist(session, ["alpha", "beta"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"]) add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
session.commit() session.commit()
infos, tag_map, total = list_asset_infos_page(session) refs, tag_map, total = list_references_page(session)
assert len(infos) == 1 assert len(refs) == 1
assert infos[0].id == info.id assert refs[0].id == ref.id
assert set(tag_map[info.id]) == {"alpha", "beta"} assert set(tag_map[ref.id]) == {"alpha", "beta"}
assert total == 1 assert total == 1
def test_name_contains_filter(self, session: Session): def test_name_contains_filter(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="model_v1.safetensors") _make_reference(session, asset, name="model_v1.safetensors")
_make_asset_info(session, asset, name="config.json") _make_reference(session, asset, name="config.json")
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, name_contains="model") refs, _, total = list_references_page(session, name_contains="model")
assert total == 1 assert total == 1
assert infos[0].name == "model_v1.safetensors" assert refs[0].name == "model_v1.safetensors"
def test_owner_visibility(self, session: Session): def test_owner_visibility(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="public", owner_id="") _make_reference(session, asset, name="public", owner_id="")
_make_asset_info(session, asset, name="private", owner_id="user1") _make_reference(session, asset, name="private", owner_id="user1")
session.commit() session.commit()
# Empty owner sees only public # Empty owner sees only public
infos, _, total = list_asset_infos_page(session, owner_id="") refs, _, total = list_references_page(session, owner_id="")
assert total == 1 assert total == 1
assert infos[0].name == "public" assert refs[0].name == "public"
# Owner sees both # Owner sees both
infos, _, total = list_asset_infos_page(session, owner_id="user1") refs, _, total = list_references_page(session, owner_id="user1")
assert total == 2 assert total == 2
def test_include_tags_filter(self, session: Session): def test_include_tags_filter(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info1 = _make_asset_info(session, asset, name="tagged") ref1 = _make_reference(session, asset, name="tagged")
_make_asset_info(session, asset, name="untagged") _make_reference(session, asset, name="untagged")
ensure_tags_exist(session, ["wanted"]) ensure_tags_exist(session, ["wanted"])
add_tags_to_asset_info(session, asset_info_id=info1.id, tags=["wanted"]) add_tags_to_reference(session, reference_id=ref1.id, tags=["wanted"])
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, include_tags=["wanted"]) refs, _, total = list_references_page(session, include_tags=["wanted"])
assert total == 1 assert total == 1
assert infos[0].name == "tagged" assert refs[0].name == "tagged"
def test_exclude_tags_filter(self, session: Session): def test_exclude_tags_filter(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="keep") _make_reference(session, asset, name="keep")
info_exclude = _make_asset_info(session, asset, name="exclude") ref_exclude = _make_reference(session, asset, name="exclude")
ensure_tags_exist(session, ["bad"]) ensure_tags_exist(session, ["bad"])
add_tags_to_asset_info(session, asset_info_id=info_exclude.id, tags=["bad"]) add_tags_to_reference(session, reference_id=ref_exclude.id, tags=["bad"])
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, exclude_tags=["bad"]) refs, _, total = list_references_page(session, exclude_tags=["bad"])
assert total == 1 assert total == 1
assert infos[0].name == "keep" assert refs[0].name == "keep"
def test_sorting(self, session: Session): def test_sorting(self, session: Session):
asset = _make_asset(session, "hash1", size=100) asset = _make_asset(session, "hash1", size=100)
asset2 = _make_asset(session, "hash2", size=500) asset2 = _make_asset(session, "hash2", size=500)
_make_asset_info(session, asset, name="small") _make_reference(session, asset, name="small")
_make_asset_info(session, asset2, name="large") _make_reference(session, asset2, name="large")
session.commit() session.commit()
infos, _, _ = list_asset_infos_page(session, sort="size", order="desc") refs, _, _ = list_references_page(session, sort="size", order="desc")
assert infos[0].name == "large" assert refs[0].name == "large"
infos, _, _ = list_asset_infos_page(session, sort="name", order="asc") refs, _, _ = list_references_page(session, sort="name", order="asc")
assert infos[0].name == "large" assert refs[0].name == "large"
class TestFetchAssetInfoAssetAndTags: class TestFetchReferenceAssetAndTags:
def test_returns_none_for_nonexistent(self, session: Session): def test_returns_none_for_nonexistent(self, session: Session):
result = fetch_asset_info_asset_and_tags(session, "nonexistent") result = fetch_reference_asset_and_tags(session, "nonexistent")
assert result is None assert result is None
def test_returns_tuple(self, session: Session): def test_returns_tuple(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, name="test.bin") ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["tag1"]) ensure_tags_exist(session, ["tag1"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["tag1"]) add_tags_to_reference(session, reference_id=ref.id, tags=["tag1"])
session.commit() session.commit()
result = fetch_asset_info_asset_and_tags(session, info.id) result = fetch_reference_asset_and_tags(session, ref.id)
assert result is not None assert result is not None
ret_info, ret_asset, ret_tags = result ret_ref, ret_asset, ret_tags = result
assert ret_info.id == info.id assert ret_ref.id == ref.id
assert ret_asset.id == asset.id assert ret_asset.id == asset.id
assert ret_tags == ["tag1"] assert ret_tags == ["tag1"]
class TestFetchAssetInfoAndAsset: class TestFetchReferenceAndAsset:
def test_returns_none_for_nonexistent(self, session: Session): def test_returns_none_for_nonexistent(self, session: Session):
result = fetch_asset_info_and_asset(session, asset_info_id="nonexistent") result = fetch_reference_and_asset(session, reference_id="nonexistent")
assert result is None assert result is None
def test_returns_tuple(self, session: Session): def test_returns_tuple(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
result = fetch_asset_info_and_asset(session, asset_info_id=info.id) result = fetch_reference_and_asset(session, reference_id=ref.id)
assert result is not None assert result is not None
ret_info, ret_asset = result ret_ref, ret_asset = result
assert ret_info.id == info.id assert ret_ref.id == ref.id
assert ret_asset.id == asset.id assert ret_asset.id == asset.id
class TestUpdateAssetInfoAccessTime: class TestUpdateReferenceAccessTime:
def test_updates_last_access_time(self, session: Session): def test_updates_last_access_time(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
original_time = info.last_access_time original_time = ref.last_access_time
session.commit() session.commit()
import time import time
time.sleep(0.01) time.sleep(0.01)
update_asset_info_access_time(session, asset_info_id=info.id) update_reference_access_time(session, reference_id=ref.id)
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.last_access_time > original_time assert ref.last_access_time > original_time
class TestDeleteAssetInfoById: class TestDeleteReferenceById:
def test_deletes_existing(self, session: Session): def test_deletes_existing(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="") result = delete_reference_by_id(session, reference_id=ref.id, owner_id="")
assert result is True assert result is True
assert get_asset_info_by_id(session, asset_info_id=info.id) is None assert get_reference_by_id(session, reference_id=ref.id) is None
def test_returns_false_for_nonexistent(self, session: Session): def test_returns_false_for_nonexistent(self, session: Session):
result = delete_asset_info_by_id(session, asset_info_id="nonexistent", owner_id="") result = delete_reference_by_id(session, reference_id="nonexistent", owner_id="")
assert result is False assert result is False
def test_respects_owner_visibility(self, session: Session): def test_respects_owner_visibility(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
result = delete_asset_info_by_id(session, asset_info_id=info.id, owner_id="user2") result = delete_reference_by_id(session, reference_id=ref.id, owner_id="user2")
assert result is False assert result is False
assert get_asset_info_by_id(session, asset_info_id=info.id) is not None assert get_reference_by_id(session, reference_id=ref.id) is not None
class TestSetAssetInfoPreview: class TestSetReferencePreview:
def test_sets_preview(self, session: Session): def test_sets_preview(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash") preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=preview_asset.id) set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id)
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.preview_id == preview_asset.id assert ref.preview_id == preview_asset.id
def test_clears_preview(self, session: Session): def test_clears_preview(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash") preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
info.preview_id = preview_asset.id ref.preview_id = preview_asset.id
session.commit() session.commit()
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id=None) set_reference_preview(session, reference_id=ref.id, preview_asset_id=None)
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.preview_id is None assert ref.preview_id is None
def test_raises_for_nonexistent_info(self, session: Session): def test_raises_for_nonexistent_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
set_asset_info_preview(session, asset_info_id="nonexistent", preview_asset_id=None) set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None)
def test_raises_for_nonexistent_preview(self, session: Session): def test_raises_for_nonexistent_preview(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
with pytest.raises(ValueError, match="Preview Asset"): with pytest.raises(ValueError, match="Preview Asset"):
set_asset_info_preview(session, asset_info_id=info.id, preview_asset_id="nonexistent") set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent")
class TestInsertAssetInfo: class TestInsertReference:
def test_creates_new_info(self, session: Session): def test_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = insert_asset_info( ref = insert_reference(
session, asset_id=asset.id, owner_id="user1", name="test.bin" session, asset_id=asset.id, owner_id="user1", name="test.bin"
) )
session.commit() session.commit()
assert info is not None assert ref is not None
assert info.name == "test.bin" assert ref.name == "test.bin"
assert info.owner_id == "user1" assert ref.owner_id == "user1"
def test_returns_none_on_conflict(self, session: Session): def test_allows_duplicate_names(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
insert_asset_info(session, asset_id=asset.id, owner_id="user1", name="dup.bin") ref1 = insert_reference(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
session.commit() session.commit()
# Attempt duplicate with same (asset_id, owner_id, name) # Duplicate names are now allowed
result = insert_asset_info( ref2 = insert_reference(
session, asset_id=asset.id, owner_id="user1", name="dup.bin" session, asset_id=asset.id, owner_id="user1", name="dup.bin"
) )
assert result is None session.commit()
assert ref1 is not None
assert ref2 is not None
assert ref1.id != ref2.id
class TestGetOrCreateAssetInfo: class TestGetOrCreateReference:
def test_creates_new_info(self, session: Session): def test_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info, created = get_or_create_asset_info( ref, created = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="new.bin" session, asset_id=asset.id, owner_id="user1", name="new.bin"
) )
session.commit() session.commit()
assert created is True assert created is True
assert info.name == "new.bin" assert ref.name == "new.bin"
def test_returns_existing_info(self, session: Session): def test_always_creates_new_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info1, created1 = get_or_create_asset_info( ref1, created1 = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="existing.bin" session, asset_id=asset.id, owner_id="user1", name="existing.bin"
) )
session.commit() session.commit()
info2, created2 = get_or_create_asset_info( # Duplicate names are allowed, so always creates new
ref2, created2 = get_or_create_reference(
session, asset_id=asset.id, owner_id="user1", name="existing.bin" session, asset_id=asset.id, owner_id="user1", name="existing.bin"
) )
session.commit() session.commit()
assert created1 is True assert created1 is True
assert created2 is False assert created2 is True
assert info1.id == info2.id assert ref1.id != ref2.id
class TestUpdateAssetInfoTimestamps: class TestUpdateReferenceTimestamps:
def test_updates_timestamps(self, session: Session): def test_updates_timestamps(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
original_updated_at = info.updated_at original_updated_at = ref.updated_at
session.commit() session.commit()
time.sleep(0.01) time.sleep(0.01)
update_asset_info_timestamps(session, info) update_reference_timestamps(session, ref)
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.updated_at > original_updated_at assert ref.updated_at > original_updated_at
def test_updates_preview_id(self, session: Session): def test_updates_preview_id(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
preview_asset = _make_asset(session, "preview_hash") preview_asset = _make_asset(session, "preview_hash")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
update_asset_info_timestamps(session, info, preview_id=preview_asset.id) update_reference_timestamps(session, ref, preview_id=preview_asset.id)
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.preview_id == preview_asset.id assert ref.preview_id == preview_asset.id
class TestSetAssetInfoMetadata: class TestSetReferenceMetadata:
def test_sets_metadata(self, session: Session): def test_sets_metadata(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=info.id, user_metadata={"key": "value"} session, reference_id=ref.id, user_metadata={"key": "value"}
) )
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.user_metadata == {"key": "value"} assert ref.user_metadata == {"key": "value"}
# Check metadata table # Check metadata table
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all() meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1 assert len(meta) == 1
assert meta[0].key == "key" assert meta[0].key == "key"
assert meta[0].val_str == "value" assert meta[0].val_str == "value"
def test_replaces_existing_metadata(self, session: Session): def test_replaces_existing_metadata(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=info.id, user_metadata={"old": "data"} session, reference_id=ref.id, user_metadata={"old": "data"}
) )
session.commit() session.commit()
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=info.id, user_metadata={"new": "data"} session, reference_id=ref.id, user_metadata={"new": "data"}
) )
session.commit() session.commit()
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all() meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1 assert len(meta) == 1
assert meta[0].key == "new" assert meta[0].key == "new"
def test_clears_metadata_with_empty_dict(self, session: Session): def test_clears_metadata_with_empty_dict(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=info.id, user_metadata={"key": "value"} session, reference_id=ref.id, user_metadata={"key": "value"}
) )
session.commit() session.commit()
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id=info.id, user_metadata={} session, reference_id=ref.id, user_metadata={}
) )
session.commit() session.commit()
session.refresh(info) session.refresh(ref)
assert info.user_metadata == {} assert ref.user_metadata == {}
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all() meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 0 assert len(meta) == 0
def test_raises_for_nonexistent(self, session: Session): def test_raises_for_nonexistent(self, session: Session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
set_asset_info_metadata( set_reference_metadata(
session, asset_info_id="nonexistent", user_metadata={"key": "value"} session, reference_id="nonexistent", user_metadata={"key": "value"}
) )
class TestBulkInsertAssetInfosIgnoreConflicts: class TestBulkInsertReferencesIgnoreConflicts:
def test_inserts_multiple_infos(self, session: Session): def test_inserts_multiple_references(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
now = get_utc_now() now = get_utc_now()
rows = [ rows = [
@ -448,15 +453,15 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
"last_access_time": now, "last_access_time": now,
}, },
] ]
bulk_insert_asset_infos_ignore_conflicts(session, rows) bulk_insert_references_ignore_conflicts(session, rows)
session.commit() session.commit()
infos = session.query(AssetInfo).all() refs = session.query(AssetReference).all()
assert len(infos) == 2 assert len(refs) == 2
def test_ignores_conflicts(self, session: Session): def test_allows_duplicate_names(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, name="existing.bin", owner_id="") _make_reference(session, asset, name="existing.bin", owner_id="")
session.commit() session.commit()
now = get_utc_now() now = get_utc_now()
@ -484,28 +489,29 @@ class TestBulkInsertAssetInfosIgnoreConflicts:
"last_access_time": now, "last_access_time": now,
}, },
] ]
bulk_insert_asset_infos_ignore_conflicts(session, rows) bulk_insert_references_ignore_conflicts(session, rows)
session.commit() session.commit()
infos = session.query(AssetInfo).all() # Duplicate names allowed, so all 3 rows exist
assert len(infos) == 2 # existing + new, not 3 refs = session.query(AssetReference).all()
assert len(refs) == 3
def test_empty_list_is_noop(self, session: Session): def test_empty_list_is_noop(self, session: Session):
bulk_insert_asset_infos_ignore_conflicts(session, []) bulk_insert_references_ignore_conflicts(session, [])
assert session.query(AssetInfo).count() == 0 assert session.query(AssetReference).count() == 0
class TestGetAssetInfoIdsByIds: class TestGetReferenceIdsByIds:
def test_returns_existing_ids(self, session: Session): def test_returns_existing_ids(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info1 = _make_asset_info(session, asset, name="a.bin") ref1 = _make_reference(session, asset, name="a.bin")
info2 = _make_asset_info(session, asset, name="b.bin") ref2 = _make_reference(session, asset, name="b.bin")
session.commit() session.commit()
found = get_asset_info_ids_by_ids(session, [info1.id, info2.id, "nonexistent"]) found = get_reference_ids_by_ids(session, [ref1.id, ref2.id, "nonexistent"])
assert found == {info1.id, info2.id} assert found == {ref1.id, ref2.id}
def test_empty_list_returns_empty(self, session: Session): def test_empty_list_returns_empty(self, session: Session):
found = get_asset_info_ids_by_ids(session, []) found = get_reference_ids_by_ids(session, [])
assert found == set() assert found == set()

View File

@ -1,21 +1,21 @@
"""Tests for cache_state query functions.""" """Tests for cache_state (AssetReference file path) query functions."""
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import ( from app.assets.database.queries import (
list_cache_states_by_asset_id, list_references_by_asset_id,
upsert_cache_state, upsert_reference,
get_unreferenced_unhashed_asset_ids, get_unreferenced_unhashed_asset_ids,
delete_assets_by_ids, delete_assets_by_ids,
get_cache_states_for_prefixes, get_references_for_prefixes,
bulk_update_needs_verify, bulk_update_needs_verify,
delete_cache_states_by_ids, delete_references_by_ids,
delete_orphaned_seed_asset, delete_orphaned_seed_asset,
bulk_insert_cache_states_ignore_conflicts, bulk_insert_references_ignore_conflicts,
get_cache_states_by_paths_and_asset_ids, get_references_by_paths_and_asset_ids,
mark_cache_states_missing_outside_prefixes, mark_references_missing_outside_prefixes,
restore_cache_states_by_paths, restore_references_by_paths,
) )
from app.assets.helpers import select_best_live_path, get_utc_now from app.assets.helpers import select_best_live_path, get_utc_now
@ -27,49 +27,55 @@ def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024)
return asset return asset
def _make_cache_state( def _make_reference(
session: Session, session: Session,
asset: Asset, asset: Asset,
file_path: str, file_path: str,
name: str = "test",
mtime_ns: int | None = None, mtime_ns: int | None = None,
needs_verify: bool = False, needs_verify: bool = False,
) -> AssetCacheState: ) -> AssetReference:
state = AssetCacheState( now = get_utc_now()
ref = AssetReference(
asset_id=asset.id, asset_id=asset.id,
file_path=file_path, file_path=file_path,
name=name,
mtime_ns=mtime_ns, mtime_ns=mtime_ns,
needs_verify=needs_verify, needs_verify=needs_verify,
created_at=now,
updated_at=now,
last_access_time=now,
) )
session.add(state) session.add(ref)
session.flush() session.flush()
return state return ref
class TestListCacheStatesByAssetId: class TestListReferencesByAssetId:
def test_returns_empty_for_no_states(self, session: Session): def test_returns_empty_for_no_references(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
states = list_cache_states_by_asset_id(session, asset_id=asset.id) refs = list_references_by_asset_id(session, asset_id=asset.id)
assert list(states) == [] assert list(refs) == []
def test_returns_states_for_asset(self, session: Session): def test_returns_references_for_asset(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/path/a.bin") _make_reference(session, asset, "/path/a.bin", name="a")
_make_cache_state(session, asset, "/path/b.bin") _make_reference(session, asset, "/path/b.bin", name="b")
session.commit() session.commit()
states = list_cache_states_by_asset_id(session, asset_id=asset.id) refs = list_references_by_asset_id(session, asset_id=asset.id)
paths = [s.file_path for s in states] paths = [r.file_path for r in refs]
assert set(paths) == {"/path/a.bin", "/path/b.bin"} assert set(paths) == {"/path/a.bin", "/path/b.bin"}
def test_does_not_return_other_assets_states(self, session: Session): def test_does_not_return_other_assets_references(self, session: Session):
asset1 = _make_asset(session, "hash1") asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2") asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path/asset1.bin") _make_reference(session, asset1, "/path/asset1.bin", name="a1")
_make_cache_state(session, asset2, "/path/asset2.bin") _make_reference(session, asset2, "/path/asset2.bin", name="a2")
session.commit() session.commit()
states = list_cache_states_by_asset_id(session, asset_id=asset1.id) refs = list_references_by_asset_id(session, asset_id=asset1.id)
paths = [s.file_path for s in states] paths = [r.file_path for r in refs]
assert paths == ["/path/asset1.bin"] assert paths == ["/path/asset1.bin"]
@ -80,10 +86,10 @@ class TestSelectBestLivePath:
def test_returns_empty_when_no_files_exist(self, session: Session): def test_returns_empty_when_no_files_exist(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
state = _make_cache_state(session, asset, "/nonexistent/path.bin") ref = _make_reference(session, asset, "/nonexistent/path.bin")
session.commit() session.commit()
result = select_best_live_path([state]) result = select_best_live_path([ref])
assert result == "" assert result == ""
def test_prefers_verified_path(self, session: Session, tmp_path): def test_prefers_verified_path(self, session: Session, tmp_path):
@ -96,124 +102,125 @@ class TestSelectBestLivePath:
unverified_file = tmp_path / "unverified.bin" unverified_file = tmp_path / "unverified.bin"
unverified_file.write_bytes(b"data") unverified_file.write_bytes(b"data")
state_verified = _make_cache_state( ref_verified = _make_reference(
session, asset, str(verified_file), needs_verify=False session, asset, str(verified_file), name="verified", needs_verify=False
) )
state_unverified = _make_cache_state( ref_unverified = _make_reference(
session, asset, str(unverified_file), needs_verify=True session, asset, str(unverified_file), name="unverified", needs_verify=True
) )
session.commit() session.commit()
states = [state_unverified, state_verified] refs = [ref_unverified, ref_verified]
result = select_best_live_path(states) result = select_best_live_path(refs)
assert result == str(verified_file) assert result == str(verified_file)
def test_falls_back_to_existing_unverified(self, session: Session, tmp_path): def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
"""If all states need verification, return first existing path.""" """If all references need verification, return first existing path."""
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
existing_file = tmp_path / "exists.bin" existing_file = tmp_path / "exists.bin"
existing_file.write_bytes(b"data") existing_file.write_bytes(b"data")
state = _make_cache_state(session, asset, str(existing_file), needs_verify=True) ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
session.commit() session.commit()
result = select_best_live_path([state]) result = select_best_live_path([ref])
assert result == str(existing_file) assert result == str(existing_file)
class TestSelectBestLivePathWithMocking: class TestSelectBestLivePathWithMocking:
def test_handles_missing_file_path_attr(self): def test_handles_missing_file_path_attr(self):
"""Gracefully handle states with None file_path.""" """Gracefully handle references with None file_path."""
class MockState: class MockRef:
file_path = None file_path = None
needs_verify = False needs_verify = False
result = select_best_live_path([MockState()]) result = select_best_live_path([MockRef()])
assert result == "" assert result == ""
class TestUpsertCacheState: class TestUpsertReference:
@pytest.mark.parametrize( @pytest.mark.parametrize(
"initial_mtime,second_mtime,expect_created,expect_updated,final_mtime", "initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
[ [
# New state creation # New reference creation
(None, 12345, True, False, 12345), (None, 12345, True, False, 12345),
# Existing state, same mtime - no update # Existing reference, same mtime - no update
(100, 100, False, False, 100), (100, 100, False, False, 100),
# Existing state, different mtime - update # Existing reference, different mtime - update
(100, 200, False, True, 200), (100, 200, False, True, 200),
], ],
ids=["new_state", "existing_no_change", "existing_update_mtime"], ids=["new_reference", "existing_no_change", "existing_update_mtime"],
) )
def test_upsert_scenarios( def test_upsert_scenarios(
self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
): ):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
file_path = f"/path_{initial_mtime}_{second_mtime}.bin" file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
name = f"file_{initial_mtime}_{second_mtime}"
# Create initial state if needed # Create initial reference if needed
if initial_mtime is not None: if initial_mtime is not None:
upsert_cache_state(session, asset_id=asset.id, file_path=file_path, mtime_ns=initial_mtime) upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
session.commit() session.commit()
# The upsert call we're testing # The upsert call we're testing
created, updated = upsert_cache_state( created, updated = upsert_reference(
session, asset_id=asset.id, file_path=file_path, mtime_ns=second_mtime session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
) )
session.commit() session.commit()
assert created is expect_created assert created is expect_created
assert updated is expect_updated assert updated is expect_updated
state = session.query(AssetCacheState).filter_by(file_path=file_path).one() ref = session.query(AssetReference).filter_by(file_path=file_path).one()
assert state.mtime_ns == final_mtime assert ref.mtime_ns == final_mtime
def test_upsert_restores_missing_state(self, session: Session): def test_upsert_restores_missing_reference(self, session: Session):
"""Upserting a cache state that was marked missing should restore it.""" """Upserting a reference that was marked missing should restore it."""
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
file_path = "/restored/file.bin" file_path = "/restored/file.bin"
state = _make_cache_state(session, asset, file_path, mtime_ns=100) ref = _make_reference(session, asset, file_path, mtime_ns=100)
state.is_missing = True ref.is_missing = True
session.commit() session.commit()
created, updated = upsert_cache_state( created, updated = upsert_reference(
session, asset_id=asset.id, file_path=file_path, mtime_ns=100 session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
) )
session.commit() session.commit()
assert created is False assert created is False
assert updated is True assert updated is True
restored_state = session.query(AssetCacheState).filter_by(file_path=file_path).one() restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
assert restored_state.is_missing is False assert restored_ref.is_missing is False
class TestRestoreCacheStatesByPaths: class TestRestoreReferencesByPaths:
def test_restores_missing_states(self, session: Session): def test_restores_missing_references(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
missing_path = "/missing/file.bin" missing_path = "/missing/file.bin"
active_path = "/active/file.bin" active_path = "/active/file.bin"
missing_state = _make_cache_state(session, asset, missing_path) missing_ref = _make_reference(session, asset, missing_path, name="missing")
missing_state.is_missing = True missing_ref.is_missing = True
_make_cache_state(session, asset, active_path) _make_reference(session, asset, active_path, name="active")
session.commit() session.commit()
restored = restore_cache_states_by_paths(session, [missing_path]) restored = restore_references_by_paths(session, [missing_path])
session.commit() session.commit()
assert restored == 1 assert restored == 1
state = session.query(AssetCacheState).filter_by(file_path=missing_path).one() ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
assert state.is_missing is False assert ref.is_missing is False
def test_empty_list_restores_nothing(self, session: Session): def test_empty_list_restores_nothing(self, session: Session):
restored = restore_cache_states_by_paths(session, []) restored = restore_references_by_paths(session, [])
assert restored == 0 assert restored == 0
class TestMarkCacheStatesMissingOutsidePrefixes: class TestMarkReferencesMissingOutsidePrefixes:
def test_marks_states_missing_outside_prefixes(self, session: Session, tmp_path): def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
valid_dir = tmp_path / "valid" valid_dir = tmp_path / "valid"
valid_dir.mkdir() valid_dir.mkdir()
@ -223,63 +230,58 @@ class TestMarkCacheStatesMissingOutsidePrefixes:
valid_path = str(valid_dir / "file.bin") valid_path = str(valid_dir / "file.bin")
invalid_path = str(invalid_dir / "file.bin") invalid_path = str(invalid_dir / "file.bin")
_make_cache_state(session, asset, valid_path) _make_reference(session, asset, valid_path, name="valid")
_make_cache_state(session, asset, invalid_path) _make_reference(session, asset, invalid_path, name="invalid")
session.commit() session.commit()
marked = mark_cache_states_missing_outside_prefixes(session, [str(valid_dir)]) marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
session.commit() session.commit()
assert marked == 1 assert marked == 1
all_states = session.query(AssetCacheState).all() all_refs = session.query(AssetReference).all()
assert len(all_states) == 2 assert len(all_refs) == 2
valid_state = next(s for s in all_states if s.file_path == valid_path) valid_ref = next(r for r in all_refs if r.file_path == valid_path)
invalid_state = next(s for s in all_states if s.file_path == invalid_path) invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
assert valid_state.is_missing is False assert valid_ref.is_missing is False
assert invalid_state.is_missing is True assert invalid_ref.is_missing is True
def test_empty_prefixes_marks_nothing(self, session: Session): def test_empty_prefixes_marks_nothing(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/some/path.bin") _make_reference(session, asset, "/some/path.bin")
session.commit() session.commit()
marked = mark_cache_states_missing_outside_prefixes(session, []) marked = mark_references_missing_outside_prefixes(session, [])
assert marked == 0 assert marked == 0
class TestGetUnreferencedUnhashedAssetIds: class TestGetUnreferencedUnhashedAssetIds:
def test_returns_unreferenced_unhashed_assets(self, session: Session): def test_returns_unreferenced_unhashed_assets(self, session: Session):
# Unhashed asset (hash=None) with no cache states # Unhashed asset (hash=None) with no references (no file_path)
no_states = _make_asset(session, hash_val=None) no_refs = _make_asset(session, hash_val=None)
# Unhashed asset with active cache state (not unreferenced) # Unhashed asset with active reference (not unreferenced)
with_active_state = _make_asset(session, hash_val=None) with_active_ref = _make_asset(session, hash_val=None)
_make_cache_state(session, with_active_state, "/has/state.bin") _make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
# Unhashed asset with only missing cache state (should be unreferenced) # Unhashed asset with only missing reference (should be unreferenced)
with_missing_state = _make_asset(session, hash_val=None) with_missing_ref = _make_asset(session, hash_val=None)
missing_state = _make_cache_state(session, with_missing_state, "/missing/state.bin") missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
missing_state.is_missing = True missing_ref.is_missing = True
# Regular asset (hash not None) - should not be returned # Regular asset (hash not None) - should not be returned
_make_asset(session, hash_val="blake3:regular") _make_asset(session, hash_val="blake3:regular")
session.commit() session.commit()
unreferenced = get_unreferenced_unhashed_asset_ids(session) unreferenced = get_unreferenced_unhashed_asset_ids(session)
assert no_states.id in unreferenced assert no_refs.id in unreferenced
assert with_missing_state.id in unreferenced assert with_missing_ref.id in unreferenced
assert with_active_state.id not in unreferenced assert with_active_ref.id not in unreferenced
class TestDeleteAssetsByIds: class TestDeleteAssetsByIds:
def test_deletes_assets_and_infos(self, session: Session): def test_deletes_assets_and_references(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
now = get_utc_now() _make_reference(session, asset, "/test/path.bin", name="test")
info = AssetInfo(
owner_id="", name="test", asset_id=asset.id,
created_at=now, updated_at=now, last_access_time=now
)
session.add(info)
session.commit() session.commit()
deleted = delete_assets_by_ids(session, [asset.id]) deleted = delete_assets_by_ids(session, [asset.id])
@ -287,7 +289,7 @@ class TestDeleteAssetsByIds:
assert deleted == 1 assert deleted == 1
assert session.query(Asset).count() == 0 assert session.query(Asset).count() == 0
assert session.query(AssetInfo).count() == 0 assert session.query(AssetReference).count() == 0
def test_empty_list_deletes_nothing(self, session: Session): def test_empty_list_deletes_nothing(self, session: Session):
_make_asset(session, "hash1") _make_asset(session, "hash1")
@ -299,8 +301,8 @@ class TestDeleteAssetsByIds:
assert session.query(Asset).count() == 1 assert session.query(Asset).count() == 1
class TestGetCacheStatesForPrefixes: class TestGetReferencesForPrefixes:
def test_returns_states_matching_prefix(self, session: Session, tmp_path): def test_returns_references_matching_prefix(self, session: Session, tmp_path):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
dir1 = tmp_path / "dir1" dir1 = tmp_path / "dir1"
dir1.mkdir() dir1.mkdir()
@ -310,21 +312,21 @@ class TestGetCacheStatesForPrefixes:
path1 = str(dir1 / "file.bin") path1 = str(dir1 / "file.bin")
path2 = str(dir2 / "file.bin") path2 = str(dir2 / "file.bin")
_make_cache_state(session, asset, path1, mtime_ns=100) _make_reference(session, asset, path1, name="file1", mtime_ns=100)
_make_cache_state(session, asset, path2, mtime_ns=200) _make_reference(session, asset, path2, name="file2", mtime_ns=200)
session.commit() session.commit()
rows = get_cache_states_for_prefixes(session, [str(dir1)]) rows = get_references_for_prefixes(session, [str(dir1)])
assert len(rows) == 1 assert len(rows) == 1
assert rows[0].file_path == path1 assert rows[0].file_path == path1
def test_empty_prefixes_returns_empty(self, session: Session): def test_empty_prefixes_returns_empty(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/some/path.bin") _make_reference(session, asset, "/some/path.bin")
session.commit() session.commit()
rows = get_cache_states_for_prefixes(session, []) rows = get_references_for_prefixes(session, [])
assert rows == [] assert rows == []
@ -332,39 +334,39 @@ class TestGetCacheStatesForPrefixes:
class TestBulkSetNeedsVerify: class TestBulkSetNeedsVerify:
def test_sets_needs_verify_flag(self, session: Session): def test_sets_needs_verify_flag(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
state1 = _make_cache_state(session, asset, "/path1.bin", needs_verify=False) ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
state2 = _make_cache_state(session, asset, "/path2.bin", needs_verify=False) ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
session.commit() session.commit()
updated = bulk_update_needs_verify(session, [state1.id, state2.id], True) updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
session.commit() session.commit()
assert updated == 2 assert updated == 2
session.refresh(state1) session.refresh(ref1)
session.refresh(state2) session.refresh(ref2)
assert state1.needs_verify is True assert ref1.needs_verify is True
assert state2.needs_verify is True assert ref2.needs_verify is True
def test_empty_list_updates_nothing(self, session: Session): def test_empty_list_updates_nothing(self, session: Session):
updated = bulk_update_needs_verify(session, [], True) updated = bulk_update_needs_verify(session, [], True)
assert updated == 0 assert updated == 0
class TestDeleteCacheStatesByIds: class TestDeleteReferencesByIds:
def test_deletes_states_by_id(self, session: Session): def test_deletes_references_by_id(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
state1 = _make_cache_state(session, asset, "/path1.bin") ref1 = _make_reference(session, asset, "/path1.bin")
_make_cache_state(session, asset, "/path2.bin") _make_reference(session, asset, "/path2.bin")
session.commit() session.commit()
deleted = delete_cache_states_by_ids(session, [state1.id]) deleted = delete_references_by_ids(session, [ref1.id])
session.commit() session.commit()
assert deleted == 1 assert deleted == 1
assert session.query(AssetCacheState).count() == 1 assert session.query(AssetReference).count() == 1
def test_empty_list_deletes_nothing(self, session: Session): def test_empty_list_deletes_nothing(self, session: Session):
deleted = delete_cache_states_by_ids(session, []) deleted = delete_references_by_ids(session, [])
assert deleted == 0 assert deleted == 0
@ -384,12 +386,7 @@ class TestDeleteOrphanedSeedAsset:
if create_asset: if create_asset:
asset = _make_asset(session, hash_val=None) asset = _make_asset(session, hash_val=None)
asset_id = asset.id asset_id = asset.id
now = get_utc_now() _make_reference(session, asset, "/test/path.bin", name="test")
info = AssetInfo(
owner_id="", name="test", asset_id=asset.id,
created_at=now, updated_at=now, last_access_time=now
)
session.add(info)
session.commit() session.commit()
deleted = delete_orphaned_seed_asset(session, asset_id) deleted = delete_orphaned_seed_asset(session, asset_id)
@ -400,53 +397,87 @@ class TestDeleteOrphanedSeedAsset:
assert session.query(Asset).count() == expected_count assert session.query(Asset).count() == expected_count
class TestBulkInsertCacheStatesIgnoreConflicts: class TestBulkInsertReferencesIgnoreConflicts:
def test_inserts_multiple_states(self, session: Session): def test_inserts_multiple_references(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
now = get_utc_now()
rows = [ rows = [
{"asset_id": asset.id, "file_path": "/bulk1.bin", "mtime_ns": 100}, {
{"asset_id": asset.id, "file_path": "/bulk2.bin", "mtime_ns": 200}, "asset_id": asset.id,
"file_path": "/bulk1.bin",
"name": "bulk1",
"mtime_ns": 100,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
{
"asset_id": asset.id,
"file_path": "/bulk2.bin",
"name": "bulk2",
"mtime_ns": 200,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
] ]
bulk_insert_cache_states_ignore_conflicts(session, rows) bulk_insert_references_ignore_conflicts(session, rows)
session.commit() session.commit()
assert session.query(AssetCacheState).count() == 2 assert session.query(AssetReference).count() == 2
def test_ignores_conflicts(self, session: Session): def test_ignores_conflicts(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_cache_state(session, asset, "/existing.bin", mtime_ns=100) _make_reference(session, asset, "/existing.bin", mtime_ns=100)
session.commit() session.commit()
now = get_utc_now()
rows = [ rows = [
{"asset_id": asset.id, "file_path": "/existing.bin", "mtime_ns": 999}, {
{"asset_id": asset.id, "file_path": "/new.bin", "mtime_ns": 200}, "asset_id": asset.id,
"file_path": "/existing.bin",
"name": "existing",
"mtime_ns": 999,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
{
"asset_id": asset.id,
"file_path": "/new.bin",
"name": "new",
"mtime_ns": 200,
"created_at": now,
"updated_at": now,
"last_access_time": now,
},
] ]
bulk_insert_cache_states_ignore_conflicts(session, rows) bulk_insert_references_ignore_conflicts(session, rows)
session.commit() session.commit()
assert session.query(AssetCacheState).count() == 2 assert session.query(AssetReference).count() == 2
existing = session.query(AssetCacheState).filter_by(file_path="/existing.bin").one() existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
assert existing.mtime_ns == 100 # Original value preserved assert existing.mtime_ns == 100 # Original value preserved
def test_empty_list_is_noop(self, session: Session): def test_empty_list_is_noop(self, session: Session):
bulk_insert_cache_states_ignore_conflicts(session, []) bulk_insert_references_ignore_conflicts(session, [])
assert session.query(AssetCacheState).count() == 0 assert session.query(AssetReference).count() == 0
class TestGetCacheStatesByPathsAndAssetIds: class TestGetReferencesByPathsAndAssetIds:
def test_returns_matching_paths(self, session: Session): def test_returns_matching_paths(self, session: Session):
asset1 = _make_asset(session, "hash1") asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2") asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path1.bin") _make_reference(session, asset1, "/path1.bin")
_make_cache_state(session, asset2, "/path2.bin") _make_reference(session, asset2, "/path2.bin")
session.commit() session.commit()
path_to_asset = { path_to_asset = {
"/path1.bin": asset1.id, "/path1.bin": asset1.id,
"/path2.bin": asset2.id, "/path2.bin": asset2.id,
} }
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset) winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
assert winners == {"/path1.bin", "/path2.bin"} assert winners == {"/path1.bin", "/path2.bin"}
@ -454,15 +485,15 @@ class TestGetCacheStatesByPathsAndAssetIds:
asset1 = _make_asset(session, "hash1") asset1 = _make_asset(session, "hash1")
asset2 = _make_asset(session, "hash2") asset2 = _make_asset(session, "hash2")
_make_cache_state(session, asset1, "/path1.bin") _make_reference(session, asset1, "/path1.bin")
session.commit() session.commit()
# Path exists but with different asset_id # Path exists but with different asset_id
path_to_asset = {"/path1.bin": asset2.id} path_to_asset = {"/path1.bin": asset2.id}
winners = get_cache_states_by_paths_and_asset_ids(session, path_to_asset) winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
assert winners == set() assert winners == set()
def test_empty_dict_returns_empty(self, session: Session): def test_empty_dict_returns_empty(self, session: Session):
winners = get_cache_states_by_paths_and_asset_ids(session, {}) winners = get_references_by_paths_and_asset_ids(session, {})
assert winners == set() assert winners == set()

View File

@ -1,10 +1,10 @@
"""Tests for metadata filtering logic in asset_info queries.""" """Tests for metadata filtering logic in asset_reference queries."""
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoMeta from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
from app.assets.database.queries import list_asset_infos_page from app.assets.database.queries import list_references_page
from app.assets.database.queries.asset_info import convert_metadata_to_rows from app.assets.database.queries.asset_reference import convert_metadata_to_rows
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str) -> Asset:
return asset return asset
def _make_asset_info( def _make_reference(
session: Session, session: Session,
asset: Asset, asset: Asset,
name: str, name: str,
metadata: dict | None = None, metadata: dict | None = None,
) -> AssetInfo: ) -> AssetReference:
now = get_utc_now() now = get_utc_now()
info = AssetInfo( ref = AssetReference(
owner_id="", owner_id="",
name=name, name=name,
asset_id=asset.id, asset_id=asset.id,
@ -31,14 +31,14 @@ def _make_asset_info(
updated_at=now, updated_at=now,
last_access_time=now, last_access_time=now,
) )
session.add(info) session.add(ref)
session.flush() session.flush()
if metadata: if metadata:
for key, val in metadata.items(): for key, val in metadata.items():
for row in convert_metadata_to_rows(key, val): for row in convert_metadata_to_rows(key, val):
meta_row = AssetInfoMeta( meta_row = AssetReferenceMeta(
asset_info_id=info.id, asset_reference_id=ref.id,
key=row["key"], key=row["key"],
ordinal=row.get("ordinal", 0), ordinal=row.get("ordinal", 0),
val_str=row.get("val_str"), val_str=row.get("val_str"),
@ -49,7 +49,7 @@ def _make_asset_info(
session.add(meta_row) session.add(meta_row)
session.flush() session.flush()
return info return ref
class TestMetadataFilterByType: class TestMetadataFilterByType:
@ -75,15 +75,15 @@ class TestMetadataFilterByType:
self, session: Session, match_meta, nomatch_meta, filter_key, filter_val self, session: Session, match_meta, nomatch_meta, filter_key, filter_val
): ):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "match", match_meta) _make_reference(session, asset, "match", match_meta)
_make_asset_info(session, asset, "nomatch", nomatch_meta) _make_reference(session, asset, "nomatch", nomatch_meta)
session.commit() session.commit()
infos, _, total = list_asset_infos_page( refs, _, total = list_references_page(
session, metadata_filter={filter_key: filter_val} session, metadata_filter={filter_key: filter_val}
) )
assert total == 1 assert total == 1
assert infos[0].name == "match" assert refs[0].name == "match"
@pytest.mark.parametrize( @pytest.mark.parametrize(
"stored_meta,filter_key,filter_val", "stored_meta,filter_key,filter_val",
@ -101,10 +101,10 @@ class TestMetadataFilterByType:
self, session: Session, stored_meta, filter_key, filter_val self, session: Session, stored_meta, filter_key, filter_val
): ):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "item", stored_meta) _make_reference(session, asset, "item", stored_meta)
session.commit() session.commit()
infos, _, total = list_asset_infos_page( refs, _, total = list_references_page(
session, metadata_filter={filter_key: filter_val} session, metadata_filter={filter_key: filter_val}
) )
assert total == 0 assert total == 0
@ -127,13 +127,13 @@ class TestMetadataFilterNull:
self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key
): ):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, match_name, match_meta) _make_reference(session, asset, match_name, match_meta)
_make_asset_info(session, asset, nomatch_name, nomatch_meta) _make_reference(session, asset, nomatch_name, nomatch_meta)
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={filter_key: None}) refs, _, total = list_references_page(session, metadata_filter={filter_key: None})
assert total == 1 assert total == 1
assert infos[0].name == match_name assert refs[0].name == match_name
class TestMetadataFilterList: class TestMetadataFilterList:
@ -142,14 +142,14 @@ class TestMetadataFilterList:
def test_filter_by_list_matches_any(self, session: Session): def test_filter_by_list_matches_any(self, session: Session):
"""List values should match ANY of the values (OR).""" """List values should match ANY of the values (OR)."""
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "cat_a", {"category": "a"}) _make_reference(session, asset, "cat_a", {"category": "a"})
_make_asset_info(session, asset, "cat_b", {"category": "b"}) _make_reference(session, asset, "cat_b", {"category": "b"})
_make_asset_info(session, asset, "cat_c", {"category": "c"}) _make_reference(session, asset, "cat_c", {"category": "c"})
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={"category": ["a", "b"]}) refs, _, total = list_references_page(session, metadata_filter={"category": ["a", "b"]})
assert total == 2 assert total == 2
names = {i.name for i in infos} names = {r.name for r in refs}
assert names == {"cat_a", "cat_b"} assert names == {"cat_a", "cat_b"}
@ -159,16 +159,16 @@ class TestMetadataFilterMultipleKeys:
def test_multiple_keys_must_all_match(self, session: Session): def test_multiple_keys_must_all_match(self, session: Session):
"""Multiple keys should ALL match (AND).""" """Multiple keys should ALL match (AND)."""
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "match", {"type": "model", "version": 2}) _make_reference(session, asset, "match", {"type": "model", "version": 2})
_make_asset_info(session, asset, "wrong_type", {"type": "config", "version": 2}) _make_reference(session, asset, "wrong_type", {"type": "config", "version": 2})
_make_asset_info(session, asset, "wrong_version", {"type": "model", "version": 1}) _make_reference(session, asset, "wrong_version", {"type": "model", "version": 1})
session.commit() session.commit()
infos, _, total = list_asset_infos_page( refs, _, total = list_references_page(
session, metadata_filter={"type": "model", "version": 2} session, metadata_filter={"type": "model", "version": 2}
) )
assert total == 1 assert total == 1
assert infos[0].name == "match" assert refs[0].name == "match"
class TestMetadataFilterEmptyDict: class TestMetadataFilterEmptyDict:
@ -176,9 +176,9 @@ class TestMetadataFilterEmptyDict:
def test_empty_filter_returns_all(self, session: Session): def test_empty_filter_returns_all(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
_make_asset_info(session, asset, "a", {"key": "val"}) _make_reference(session, asset, "a", {"key": "val"})
_make_asset_info(session, asset, "b", {}) _make_reference(session, asset, "b", {})
session.commit() session.commit()
infos, _, total = list_asset_infos_page(session, metadata_filter={}) refs, _, total = list_references_page(session, metadata_filter={})
assert total == 2 assert total == 2

View File

@ -1,13 +1,13 @@
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, AssetInfoTag, AssetInfoMeta, Tag from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, AssetReferenceMeta, Tag
from app.assets.database.queries import ( from app.assets.database.queries import (
ensure_tags_exist, ensure_tags_exist,
get_asset_tags, get_reference_tags,
set_asset_info_tags, set_reference_tags,
add_tags_to_asset_info, add_tags_to_reference,
remove_tags_from_asset_info, remove_tags_from_reference,
add_missing_tag_for_asset_id, add_missing_tag_for_asset_id,
remove_missing_tag_for_asset_id, remove_missing_tag_for_asset_id,
list_tags_with_usage, list_tags_with_usage,
@ -23,9 +23,9 @@ def _make_asset(session: Session, hash_val: str | None = None) -> Asset:
return asset return asset
def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetInfo: def _make_reference(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetReference:
now = get_utc_now() now = get_utc_now()
info = AssetInfo( ref = AssetReference(
owner_id=owner_id, owner_id=owner_id,
name=name, name=name,
asset_id=asset.id, asset_id=asset.id,
@ -33,9 +33,9 @@ def _make_asset_info(session: Session, asset: Asset, name: str = "test", owner_i
updated_at=now, updated_at=now,
last_access_time=now, last_access_time=now,
) )
session.add(info) session.add(ref)
session.flush() session.flush()
return info return ref
class TestEnsureTagsExist: class TestEnsureTagsExist:
@ -73,35 +73,35 @@ class TestEnsureTagsExist:
assert tag.tag_type == "system" assert tag.tag_type == "system"
class TestGetAssetTags: class TestGetReferenceTags:
def test_returns_empty_for_no_tags(self, session: Session): def test_returns_empty_for_no_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
tags = get_asset_tags(session, asset_info_id=info.id) tags = get_reference_tags(session, reference_id=ref.id)
assert tags == [] assert tags == []
def test_returns_tags_for_asset(self, session: Session): def test_returns_tags_for_reference(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["tag1", "tag2"]) ensure_tags_exist(session, ["tag1", "tag2"])
session.add_all([ session.add_all([
AssetInfoTag(asset_info_id=info.id, tag_name="tag1", origin="manual", added_at=get_utc_now()), AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
AssetInfoTag(asset_info_id=info.id, tag_name="tag2", origin="manual", added_at=get_utc_now()), AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
]) ])
session.flush() session.flush()
tags = get_asset_tags(session, asset_info_id=info.id) tags = get_reference_tags(session, reference_id=ref.id)
assert set(tags) == {"tag1", "tag2"} assert set(tags) == {"tag1", "tag2"}
class TestSetAssetInfoTags: class TestSetReferenceTags:
def test_adds_new_tags(self, session: Session): def test_adds_new_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"]) result = set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
session.commit() session.commit()
assert set(result["added"]) == {"a", "b"} assert set(result["added"]) == {"a", "b"}
@ -110,10 +110,10 @@ class TestSetAssetInfoTags:
def test_removes_old_tags(self, session: Session): def test_removes_old_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b", "c"]) set_reference_tags(session, reference_id=ref.id, tags=["a", "b", "c"])
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["a"]) result = set_reference_tags(session, reference_id=ref.id, tags=["a"])
session.commit() session.commit()
assert result["added"] == [] assert result["added"] == []
@ -122,10 +122,10 @@ class TestSetAssetInfoTags:
def test_replaces_tags(self, session: Session): def test_replaces_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
set_asset_info_tags(session, asset_info_id=info.id, tags=["a", "b"]) set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
result = set_asset_info_tags(session, asset_info_id=info.id, tags=["b", "c"]) result = set_reference_tags(session, reference_id=ref.id, tags=["b", "c"])
session.commit() session.commit()
assert result["added"] == ["c"] assert result["added"] == ["c"]
@ -133,12 +133,12 @@ class TestSetAssetInfoTags:
assert set(result["total"]) == {"b", "c"} assert set(result["total"]) == {"b", "c"}
class TestAddTagsToAssetInfo: class TestAddTagsToReference:
def test_adds_tags(self, session: Session): def test_adds_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"]) result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
session.commit() session.commit()
assert set(result["added"]) == {"x", "y"} assert set(result["added"]) == {"x", "y"}
@ -146,27 +146,27 @@ class TestAddTagsToAssetInfo:
def test_reports_already_present(self, session: Session): def test_reports_already_present(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x"]) add_tags_to_reference(session, reference_id=ref.id, tags=["x"])
result = add_tags_to_asset_info(session, asset_info_id=info.id, tags=["x", "y"]) result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
session.commit() session.commit()
assert result["added"] == ["y"] assert result["added"] == ["y"]
assert result["already_present"] == ["x"] assert result["already_present"] == ["x"]
def test_raises_for_missing_asset_info(self, session: Session): def test_raises_for_missing_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
add_tags_to_asset_info(session, asset_info_id="nonexistent", tags=["x"]) add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
class TestRemoveTagsFromAssetInfo: class TestRemoveTagsFromReference:
def test_removes_tags(self, session: Session): def test_removes_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"]) add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "b"]) result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "b"])
session.commit() session.commit()
assert set(result["removed"]) == {"a", "b"} assert set(result["removed"]) == {"a", "b"}
@ -175,54 +175,54 @@ class TestRemoveTagsFromAssetInfo:
def test_reports_not_present(self, session: Session): def test_reports_not_present(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a"]) add_tags_to_reference(session, reference_id=ref.id, tags=["a"])
result = remove_tags_from_asset_info(session, asset_info_id=info.id, tags=["a", "x"]) result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "x"])
session.commit() session.commit()
assert result["removed"] == ["a"] assert result["removed"] == ["a"]
assert result["not_present"] == ["x"] assert result["not_present"] == ["x"]
def test_raises_for_missing_asset_info(self, session: Session): def test_raises_for_missing_reference(self, session: Session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
remove_tags_from_asset_info(session, asset_info_id="nonexistent", tags=["x"]) remove_tags_from_reference(session, reference_id="nonexistent", tags=["x"])
class TestMissingTagFunctions: class TestMissingTagFunctions:
def test_add_missing_tag_for_asset_id(self, session: Session): def test_add_missing_tag_for_asset_id(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system") ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id) add_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit() session.commit()
tags = get_asset_tags(session, asset_info_id=info.id) tags = get_reference_tags(session, reference_id=ref.id)
assert "missing" in tags assert "missing" in tags
def test_add_missing_tag_is_idempotent(self, session: Session): def test_add_missing_tag_is_idempotent(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system") ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id) add_missing_tag_for_asset_id(session, asset_id=asset.id)
add_missing_tag_for_asset_id(session, asset_id=asset.id) add_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit() session.commit()
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="missing").all() links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="missing").all()
assert len(links) == 1 assert len(links) == 1
def test_remove_missing_tag_for_asset_id(self, session: Session): def test_remove_missing_tag_for_asset_id(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["missing"], tag_type="system") ensure_tags_exist(session, ["missing"], tag_type="system")
add_missing_tag_for_asset_id(session, asset_id=asset.id) add_missing_tag_for_asset_id(session, asset_id=asset.id)
remove_missing_tag_for_asset_id(session, asset_id=asset.id) remove_missing_tag_for_asset_id(session, asset_id=asset.id)
session.commit() session.commit()
tags = get_asset_tags(session, asset_info_id=info.id) tags = get_reference_tags(session, reference_id=ref.id)
assert "missing" not in tags assert "missing" not in tags
@ -231,8 +231,8 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["used", "unused"]) ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"]) add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit() session.commit()
rows, total = list_tags_with_usage(session) rows, total = list_tags_with_usage(session)
@ -246,8 +246,8 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["used", "unused"]) ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"]) add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit() session.commit()
rows, total = list_tags_with_usage(session, include_zero=False) rows, total = list_tags_with_usage(session, include_zero=False)
@ -278,11 +278,11 @@ class TestListTagsWithUsage:
ensure_tags_exist(session, ["shared-tag", "owner-tag"]) ensure_tags_exist(session, ["shared-tag", "owner-tag"])
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
shared_info = _make_asset_info(session, asset, name="shared", owner_id="") shared_ref = _make_reference(session, asset, name="shared", owner_id="")
owner_info = _make_asset_info(session, asset, name="owned", owner_id="user1") owner_ref = _make_reference(session, asset, name="owned", owner_id="user1")
add_tags_to_asset_info(session, asset_info_id=shared_info.id, tags=["shared-tag"]) add_tags_to_reference(session, reference_id=shared_ref.id, tags=["shared-tag"])
add_tags_to_asset_info(session, asset_info_id=owner_info.id, tags=["owner-tag"]) add_tags_to_reference(session, reference_id=owner_ref.id, tags=["owner-tag"])
session.commit() session.commit()
# Empty owner sees only shared # Empty owner sees only shared
@ -301,29 +301,29 @@ class TestListTagsWithUsage:
class TestBulkInsertTagsAndMeta: class TestBulkInsertTagsAndMeta:
def test_inserts_tags(self, session: Session): def test_inserts_tags(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"]) ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"])
session.commit() session.commit()
now = get_utc_now() now = get_utc_now()
tag_rows = [ tag_rows = [
{"asset_info_id": info.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now}, {"asset_reference_id": ref.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
{"asset_info_id": info.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now}, {"asset_reference_id": ref.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
] ]
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[]) bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
session.commit() session.commit()
tags = get_asset_tags(session, asset_info_id=info.id) tags = get_reference_tags(session, reference_id=ref.id)
assert set(tags) == {"bulk-tag1", "bulk-tag2"} assert set(tags) == {"bulk-tag1", "bulk-tag2"}
def test_inserts_meta(self, session: Session): def test_inserts_meta(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
meta_rows = [ meta_rows = [
{ {
"asset_info_id": info.id, "asset_reference_id": ref.id,
"key": "meta-key", "key": "meta-key",
"ordinal": 0, "ordinal": 0,
"val_str": "meta-value", "val_str": "meta-value",
@ -335,32 +335,32 @@ class TestBulkInsertTagsAndMeta:
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows) bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows)
session.commit() session.commit()
meta = session.query(AssetInfoMeta).filter_by(asset_info_id=info.id).all() meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
assert len(meta) == 1 assert len(meta) == 1
assert meta[0].key == "meta-key" assert meta[0].key == "meta-key"
assert meta[0].val_str == "meta-value" assert meta[0].val_str == "meta-value"
def test_ignores_conflicts(self, session: Session): def test_ignores_conflicts(self, session: Session):
asset = _make_asset(session, "hash1") asset = _make_asset(session, "hash1")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["existing-tag"]) ensure_tags_exist(session, ["existing-tag"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing-tag"]) add_tags_to_reference(session, reference_id=ref.id, tags=["existing-tag"])
session.commit() session.commit()
now = get_utc_now() now = get_utc_now()
tag_rows = [ tag_rows = [
{"asset_info_id": info.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now}, {"asset_reference_id": ref.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
] ]
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[]) bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
session.commit() session.commit()
# Should still have only one tag link # Should still have only one tag link
links = session.query(AssetInfoTag).filter_by(asset_info_id=info.id, tag_name="existing-tag").all() links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="existing-tag").all()
assert len(links) == 1 assert len(links) == 1
# Origin should be original, not overwritten # Origin should be original, not overwritten
assert links[0].origin == "manual" assert links[0].origin == "manual"
def test_empty_lists_is_noop(self, session: Session): def test_empty_lists_is_noop(self, session: Session):
bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[]) bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[])
assert session.query(AssetInfoTag).count() == 0 assert session.query(AssetReferenceTag).count() == 0
assert session.query(AssetInfoMeta).count() == 0 assert session.query(AssetReferenceMeta).count() == 0

View File

@ -9,6 +9,12 @@ from sqlalchemy.orm import Session
from app.assets.database.models import Base from app.assets.database.models import Base
@pytest.fixture(autouse=True)
def autoclean_unit_test_assets():
"""Override parent autouse fixture - service unit tests don't need server cleanup."""
yield
@pytest.fixture @pytest.fixture
def db_engine(): def db_engine():
"""In-memory SQLite engine for fast unit tests.""" """In-memory SQLite engine for fast unit tests."""

View File

@ -2,8 +2,8 @@
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
from app.assets.services import ( from app.assets.services import (
get_asset_detail, get_asset_detail,
@ -20,14 +20,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 102
return asset return asset
def _make_asset_info( def _make_reference(
session: Session, session: Session,
asset: Asset, asset: Asset,
name: str = "test", name: str = "test",
owner_id: str = "", owner_id: str = "",
) -> AssetInfo: ) -> AssetReference:
now = get_utc_now() now = get_utc_now()
info = AssetInfo( ref = AssetReference(
owner_id=owner_id, owner_id=owner_id,
name=name, name=name,
asset_id=asset.id, asset_id=asset.id,
@ -35,70 +35,70 @@ def _make_asset_info(
updated_at=now, updated_at=now,
last_access_time=now, last_access_time=now,
) )
session.add(info) session.add(ref)
session.flush() session.flush()
return info return ref
class TestGetAssetDetail: class TestGetAssetDetail:
def test_returns_none_for_nonexistent(self, mock_create_session): def test_returns_none_for_nonexistent(self, mock_create_session):
result = get_asset_detail(asset_info_id="nonexistent") result = get_asset_detail(reference_id="nonexistent")
assert result is None assert result is None
def test_returns_asset_with_tags(self, mock_create_session, session: Session): def test_returns_asset_with_tags(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, name="test.bin") ref = _make_reference(session, asset, name="test.bin")
ensure_tags_exist(session, ["alpha", "beta"]) ensure_tags_exist(session, ["alpha", "beta"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"]) add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
session.commit() session.commit()
result = get_asset_detail(asset_info_id=info.id) result = get_asset_detail(reference_id=ref.id)
assert result is not None assert result is not None
assert result.info.id == info.id assert result.ref.id == ref.id
assert result.asset.hash == asset.hash assert result.asset.hash == asset.hash
assert set(result.tags) == {"alpha", "beta"} assert set(result.tags) == {"alpha", "beta"}
def test_respects_owner_visibility(self, mock_create_session, session: Session): def test_respects_owner_visibility(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
# Wrong owner cannot see # Wrong owner cannot see
result = get_asset_detail(asset_info_id=info.id, owner_id="user2") result = get_asset_detail(reference_id=ref.id, owner_id="user2")
assert result is None assert result is None
# Correct owner can see # Correct owner can see
result = get_asset_detail(asset_info_id=info.id, owner_id="user1") result = get_asset_detail(reference_id=ref.id, owner_id="user1")
assert result is not None assert result is not None
class TestUpdateAssetMetadata: class TestUpdateAssetMetadata:
def test_updates_name(self, mock_create_session, session: Session): def test_updates_name(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, name="old_name.bin") ref = _make_reference(session, asset, name="old_name.bin")
info_id = info.id ref_id = ref.id
session.commit() session.commit()
update_asset_metadata( update_asset_metadata(
asset_info_id=info_id, reference_id=ref_id,
name="new_name.bin", name="new_name.bin",
) )
# Verify by re-fetching from DB # Verify by re-fetching from DB
session.expire_all() session.expire_all()
updated_info = session.get(AssetInfo, info_id) updated_ref = session.get(AssetReference, ref_id)
assert updated_info.name == "new_name.bin" assert updated_ref.name == "new_name.bin"
def test_updates_tags(self, mock_create_session, session: Session): def test_updates_tags(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["old"]) ensure_tags_exist(session, ["old"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["old"]) add_tags_to_reference(session, reference_id=ref.id, tags=["old"])
session.commit() session.commit()
result = update_asset_metadata( result = update_asset_metadata(
asset_info_id=info.id, reference_id=ref.id,
tags=["new1", "new2"], tags=["new1", "new2"],
) )
@ -107,84 +107,84 @@ class TestUpdateAssetMetadata:
def test_updates_user_metadata(self, mock_create_session, session: Session): def test_updates_user_metadata(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
info_id = info.id ref_id = ref.id
session.commit() session.commit()
update_asset_metadata( update_asset_metadata(
asset_info_id=info_id, reference_id=ref_id,
user_metadata={"key": "value", "num": 42}, user_metadata={"key": "value", "num": 42},
) )
# Verify by re-fetching from DB # Verify by re-fetching from DB
session.expire_all() session.expire_all()
updated_info = session.get(AssetInfo, info_id) updated_ref = session.get(AssetReference, ref_id)
assert updated_info.user_metadata["key"] == "value" assert updated_ref.user_metadata["key"] == "value"
assert updated_info.user_metadata["num"] == 42 assert updated_ref.user_metadata["num"] == 42
def test_raises_for_nonexistent(self, mock_create_session): def test_raises_for_nonexistent(self, mock_create_session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
update_asset_metadata(asset_info_id="nonexistent", name="fail") update_asset_metadata(reference_id="nonexistent", name="fail")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session): def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
with pytest.raises(PermissionError, match="not owner"): with pytest.raises(PermissionError, match="not owner"):
update_asset_metadata( update_asset_metadata(
asset_info_id=info.id, reference_id=ref.id,
name="new", name="new",
owner_id="user2", owner_id="user2",
) )
class TestDeleteAssetReference: class TestDeleteAssetReference:
def test_deletes_asset_info(self, mock_create_session, session: Session): def test_deletes_reference(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
info_id = info.id ref_id = ref.id
session.commit() session.commit()
result = delete_asset_reference( result = delete_asset_reference(
asset_info_id=info_id, reference_id=ref_id,
owner_id="", owner_id="",
delete_content_if_orphan=False, delete_content_if_orphan=False,
) )
assert result is True assert result is True
assert session.get(AssetInfo, info_id) is None assert session.get(AssetReference, ref_id) is None
def test_returns_false_for_nonexistent(self, mock_create_session): def test_returns_false_for_nonexistent(self, mock_create_session):
result = delete_asset_reference( result = delete_asset_reference(
asset_info_id="nonexistent", reference_id="nonexistent",
owner_id="", owner_id="",
) )
assert result is False assert result is False
def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session): def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
info_id = info.id ref_id = ref.id
session.commit() session.commit()
result = delete_asset_reference( result = delete_asset_reference(
asset_info_id=info_id, reference_id=ref_id,
owner_id="user2", owner_id="user2",
) )
assert result is False assert result is False
assert session.get(AssetInfo, info_id) is not None assert session.get(AssetReference, ref_id) is not None
def test_keeps_asset_if_other_infos_exist(self, mock_create_session, session: Session): def test_keeps_asset_if_other_references_exist(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info1 = _make_asset_info(session, asset, name="info1") ref1 = _make_reference(session, asset, name="ref1")
_make_asset_info(session, asset, name="info2") # Second info keeps asset alive _make_reference(session, asset, name="ref2") # Second ref keeps asset alive
asset_id = asset.id asset_id = asset.id
session.commit() session.commit()
delete_asset_reference( delete_asset_reference(
asset_info_id=info1.id, reference_id=ref1.id,
owner_id="", owner_id="",
delete_content_if_orphan=True, delete_content_if_orphan=True,
) )
@ -194,19 +194,19 @@ class TestDeleteAssetReference:
def test_deletes_orphaned_asset(self, mock_create_session, session: Session): def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
asset_id = asset.id asset_id = asset.id
info_id = info.id ref_id = ref.id
session.commit() session.commit()
delete_asset_reference( delete_asset_reference(
asset_info_id=info_id, reference_id=ref_id,
owner_id="", owner_id="",
delete_content_if_orphan=True, delete_content_if_orphan=True,
) )
# Both info and asset should be gone # Both ref and asset should be gone
assert session.get(AssetInfo, info_id) is None assert session.get(AssetReference, ref_id) is None
assert session.get(Asset, asset_id) is None assert session.get(Asset, asset_id) is None
@ -214,51 +214,51 @@ class TestSetAssetPreview:
def test_sets_preview(self, mock_create_session, session: Session): def test_sets_preview(self, mock_create_session, session: Session):
asset = _make_asset(session, hash_val="blake3:main") asset = _make_asset(session, hash_val="blake3:main")
preview_asset = _make_asset(session, hash_val="blake3:preview") preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
info_id = info.id ref_id = ref.id
preview_id = preview_asset.id preview_id = preview_asset.id
session.commit() session.commit()
set_asset_preview( set_asset_preview(
asset_info_id=info_id, reference_id=ref_id,
preview_asset_id=preview_id, preview_asset_id=preview_id,
) )
# Verify by re-fetching from DB # Verify by re-fetching from DB
session.expire_all() session.expire_all()
updated_info = session.get(AssetInfo, info_id) updated_ref = session.get(AssetReference, ref_id)
assert updated_info.preview_id == preview_id assert updated_ref.preview_id == preview_id
def test_clears_preview(self, mock_create_session, session: Session): def test_clears_preview(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
preview_asset = _make_asset(session, hash_val="blake3:preview") preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
info.preview_id = preview_asset.id ref.preview_id = preview_asset.id
info_id = info.id ref_id = ref.id
session.commit() session.commit()
set_asset_preview( set_asset_preview(
asset_info_id=info_id, reference_id=ref_id,
preview_asset_id=None, preview_asset_id=None,
) )
# Verify by re-fetching from DB # Verify by re-fetching from DB
session.expire_all() session.expire_all()
updated_info = session.get(AssetInfo, info_id) updated_ref = session.get(AssetReference, ref_id)
assert updated_info.preview_id is None assert updated_ref.preview_id is None
def test_raises_for_nonexistent_info(self, mock_create_session): def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
set_asset_preview(asset_info_id="nonexistent") set_asset_preview(reference_id="nonexistent")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session): def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
with pytest.raises(PermissionError, match="not owner"): with pytest.raises(PermissionError, match="not owner"):
set_asset_preview( set_asset_preview(
asset_info_id=info.id, reference_id=ref.id,
preview_asset_id=None, preview_asset_id=None,
owner_id="user2", owner_id="user2",
) )

View File

@ -4,7 +4,7 @@ from pathlib import Path
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset from app.assets.database.models import Asset, AssetReference
from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
@ -30,7 +30,7 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="") result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == 1 assert result.inserted_refs == 1
# Verify Asset has mime_type populated # Verify Asset has mime_type populated
assets = session.query(Asset).all() assets = session.query(Asset).all()
@ -58,7 +58,7 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="") result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == 1 assert result.inserted_refs == 1
assets = session.query(Asset).all() assets = session.query(Asset).all()
assert len(assets) == 1 assert len(assets) == 1
@ -93,13 +93,12 @@ class TestBatchInsertSeedAssets:
result = batch_insert_seed_assets(session, specs=specs, owner_id="") result = batch_insert_seed_assets(session, specs=specs, owner_id="")
assert result.inserted_infos == len(test_cases) assert result.inserted_refs == len(test_cases)
for filename, expected_mime in test_cases: for filename, expected_mime in test_cases:
from app.assets.database.models import AssetInfo ref = session.query(AssetReference).filter_by(name=filename).first()
info = session.query(AssetInfo).filter_by(name=filename).first() assert ref is not None
assert info is not None asset = session.query(Asset).filter_by(id=ref.asset_id).first()
asset = session.query(Asset).filter_by(id=info.asset_id).first()
assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}" assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"

View File

@ -0,0 +1,253 @@
"""Tests for asset enrichment (mime_type and hash population)."""
from pathlib import Path
from unittest.mock import patch
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetReference
from app.assets.scanner import (
ENRICHMENT_HASHED,
ENRICHMENT_METADATA,
ENRICHMENT_STUB,
enrich_asset,
)
def _create_stub_asset(
session: Session,
file_path: str,
asset_id: str = "test-asset-id",
reference_id: str = "test-ref-id",
name: str | None = None,
) -> tuple[Asset, AssetReference]:
"""Create a stub asset with reference for testing enrichment."""
asset = Asset(
id=asset_id,
hash=None,
size_bytes=100,
mime_type=None,
)
session.add(asset)
session.flush()
ref = AssetReference(
id=reference_id,
asset_id=asset_id,
name=name or f"test-asset-{asset_id}",
owner_id="system",
file_path=file_path,
mtime_ns=1234567890000000000,
enrichment_level=ENRICHMENT_STUB,
)
session.add(ref)
session.flush()
return asset, ref
class TestEnrichAsset:
def test_extracts_mime_type_and_updates_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify mime_type is written to the Asset table during enrichment."""
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"\x00" * 100)
asset, ref = _create_stub_asset(
session, str(file_path), "asset-1", "ref-1"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=False,
)
assert new_level == ENRICHMENT_METADATA
session.expire_all()
updated_asset = session.get(Asset, "asset-1")
assert updated_asset is not None
assert updated_asset.mime_type == "application/safetensors"
def test_computes_hash_and_updates_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify hash is written to the Asset table during enrichment."""
file_path = temp_dir / "data.bin"
file_path.write_bytes(b"test content for hashing")
asset, ref = _create_stub_asset(
session, str(file_path), "asset-2", "ref-2"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
assert new_level == ENRICHMENT_HASHED
session.expire_all()
updated_asset = session.get(Asset, "asset-2")
assert updated_asset is not None
assert updated_asset.hash is not None
assert updated_asset.hash.startswith("blake3:")
def test_enrichment_updates_both_mime_and_hash(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify both mime_type and hash are set when full enrichment runs."""
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"\x00" * 50)
asset, ref = _create_stub_asset(
session, str(file_path), "asset-3", "ref-3"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
session.expire_all()
updated_asset = session.get(Asset, "asset-3")
assert updated_asset is not None
assert updated_asset.mime_type == "application/safetensors"
assert updated_asset.hash is not None
assert updated_asset.hash.startswith("blake3:")
def test_missing_file_returns_stub_level(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify missing files don't cause errors and return STUB level."""
file_path = temp_dir / "nonexistent.bin"
asset, ref = _create_stub_asset(
session, str(file_path), "asset-4", "ref-4"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
new_level = enrich_asset(
file_path=str(file_path),
reference_id=ref.id,
asset_id=asset.id,
extract_metadata=True,
compute_hash=True,
)
assert new_level == ENRICHMENT_STUB
session.expire_all()
updated_asset = session.get(Asset, "asset-4")
assert updated_asset.mime_type is None
assert updated_asset.hash is None
def test_duplicate_hash_merges_into_existing_asset(
self, db_engine, temp_dir: Path, session: Session
):
"""Verify duplicate files merge into existing asset instead of failing."""
file_path_1 = temp_dir / "file1.bin"
file_path_2 = temp_dir / "file2.bin"
content = b"identical content"
file_path_1.write_bytes(content)
file_path_2.write_bytes(content)
asset1, ref1 = _create_stub_asset(
session, str(file_path_1), "asset-dup-1", "ref-dup-1"
)
asset2, ref2 = _create_stub_asset(
session, str(file_path_2), "asset-dup-2", "ref-dup-2"
)
session.commit()
with patch("app.assets.scanner.create_session") as mock_cs:
from contextlib import contextmanager
@contextmanager
def _create_session():
with Session(db_engine) as sess:
yield sess
mock_cs.side_effect = _create_session
enrich_asset(
file_path=str(file_path_1),
reference_id=ref1.id,
asset_id=asset1.id,
extract_metadata=True,
compute_hash=True,
)
enrich_asset(
file_path=str(file_path_2),
reference_id=ref2.id,
asset_id=asset2.id,
extract_metadata=True,
compute_hash=True,
)
session.expire_all()
updated_asset1 = session.get(Asset, "asset-dup-1")
assert updated_asset1 is not None
assert updated_asset1.hash is not None
updated_asset2 = session.get(Asset, "asset-dup-2")
assert updated_asset2 is None
updated_ref2 = session.get(AssetReference, "ref-dup-2")
assert updated_ref2 is not None
assert updated_ref2.asset_id == "asset-dup-1"

View File

@ -4,13 +4,13 @@ from pathlib import Path
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo, Tag from app.assets.database.models import Asset, AssetReference, Tag
from app.assets.database.queries import get_asset_tags from app.assets.database.queries import get_reference_tags
from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
class TestIngestFileFromPath: class TestIngestFileFromPath:
def test_creates_asset_and_cache_state(self, mock_create_session, temp_dir: Path, session: Session): def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "test_file.bin" file_path = temp_dir / "test_file.bin"
file_path.write_bytes(b"test content") file_path.write_bytes(b"test content")
@ -23,19 +23,19 @@ class TestIngestFileFromPath:
) )
assert result.asset_created is True assert result.asset_created is True
assert result.state_created is True assert result.ref_created is True
assert result.asset_info_id is None # no info_name provided assert result.reference_id is not None
# Verify DB state # Verify DB state
assets = session.query(Asset).all() assets = session.query(Asset).all()
assert len(assets) == 1 assert len(assets) == 1
assert assets[0].hash == "blake3:abc123" assert assets[0].hash == "blake3:abc123"
states = session.query(AssetCacheState).all() refs = session.query(AssetReference).all()
assert len(states) == 1 assert len(refs) == 1
assert states[0].file_path == str(file_path) assert refs[0].file_path == str(file_path)
def test_creates_asset_info_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session): def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "model.safetensors" file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"model data") file_path.write_bytes(b"model data")
@ -50,12 +50,12 @@ class TestIngestFileFromPath:
) )
assert result.asset_created is True assert result.asset_created is True
assert result.asset_info_id is not None assert result.reference_id is not None
info = session.query(AssetInfo).first() ref = session.query(AssetReference).first()
assert info is not None assert ref is not None
assert info.name == "My Model" assert ref.name == "My Model"
assert info.owner_id == "user1" assert ref.owner_id == "user1"
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session): def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "tagged.bin" file_path = temp_dir / "tagged.bin"
@ -70,7 +70,7 @@ class TestIngestFileFromPath:
tags=["models", "checkpoints"], tags=["models", "checkpoints"],
) )
assert result.asset_info_id is not None assert result.reference_id is not None
# Verify tags were created and linked # Verify tags were created and linked
tags = session.query(Tag).all() tags = session.query(Tag).all()
@ -78,8 +78,8 @@ class TestIngestFileFromPath:
assert "models" in tag_names assert "models" in tag_names
assert "checkpoints" in tag_names assert "checkpoints" in tag_names
asset_tags = get_asset_tags(session, asset_info_id=result.asset_info_id) ref_tags = get_reference_tags(session, reference_id=result.reference_id)
assert set(asset_tags) == {"models", "checkpoints"} assert set(ref_tags) == {"models", "checkpoints"}
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session): def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "dup.bin" file_path = temp_dir / "dup.bin"
@ -102,7 +102,7 @@ class TestIngestFileFromPath:
mtime_ns=1234567890000000001, # different mtime mtime_ns=1234567890000000001, # different mtime
) )
assert r2.asset_created is False assert r2.asset_created is False
assert r2.state_updated is True or r2.state_created is False assert r2.ref_updated is True or r2.ref_created is False
# Still only one asset # Still only one asset
assets = session.query(Asset).all() assets = session.query(Asset).all()
@ -127,9 +127,9 @@ class TestIngestFileFromPath:
preview_id=preview_id, preview_id=preview_id,
) )
assert result.asset_info_id is not None assert result.reference_id is not None
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first() ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
assert info.preview_id == preview_id assert ref.preview_id == preview_id
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session): def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "bad_preview.bin" file_path = temp_dir / "bad_preview.bin"
@ -144,13 +144,13 @@ class TestIngestFileFromPath:
preview_id="nonexistent-uuid", preview_id="nonexistent-uuid",
) )
assert result.asset_info_id is not None assert result.reference_id is not None
info = session.query(AssetInfo).filter_by(id=result.asset_info_id).first() ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
assert info.preview_id is None assert ref.preview_id is None
class TestRegisterExistingAsset: class TestRegisterExistingAsset:
def test_creates_info_for_existing_asset(self, mock_create_session, session: Session): def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
# Create existing asset # Create existing asset
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png") asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
session.add(asset) session.add(asset)
@ -168,42 +168,43 @@ class TestRegisterExistingAsset:
# Verify by re-fetching from DB # Verify by re-fetching from DB
session.expire_all() session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Registered Asset").all() refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
assert len(infos) == 1 assert len(refs) == 1
def test_returns_existing_info(self, mock_create_session, session: Session): def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
# Create asset and info # Create asset and reference
asset = Asset(hash="blake3:withinfo", size_bytes=512) asset = Asset(hash="blake3:withref", size_bytes=512)
session.add(asset) session.add(asset)
session.flush() session.flush()
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
info = AssetInfo( ref = AssetReference(
owner_id="", owner_id="",
name="Existing Info", name="Existing Ref",
asset_id=asset.id, asset_id=asset.id,
created_at=get_utc_now(), created_at=get_utc_now(),
updated_at=get_utc_now(), updated_at=get_utc_now(),
last_access_time=get_utc_now(), last_access_time=get_utc_now(),
) )
session.add(info) session.add(ref)
session.flush() # Flush to get the ID session.flush()
info_id = info.id ref_id = ref.id
session.commit() session.commit()
result = _register_existing_asset( result = _register_existing_asset(
asset_hash="blake3:withinfo", asset_hash="blake3:withref",
name="Existing Info", name="Existing Ref",
owner_id="", owner_id="",
) )
assert result.created is False # Multiple files with same name are allowed
assert result.created is True
# Verify only one AssetInfo exists for this name # Verify two AssetReferences exist for this name
session.expire_all() session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Existing Info").all() refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
assert len(infos) == 1 assert len(refs) == 2
assert infos[0].id == info_id assert ref_id in [r.id for r in refs]
def test_raises_for_nonexistent_hash(self, mock_create_session): def test_raises_for_nonexistent_hash(self, mock_create_session):
with pytest.raises(ValueError, match="No asset with hash"): with pytest.raises(ValueError, match="No asset with hash"):
@ -212,14 +213,14 @@ class TestRegisterExistingAsset:
name="Fail", name="Fail",
) )
def test_applies_tags_to_new_info(self, mock_create_session, session: Session): def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
asset = Asset(hash="blake3:tagged", size_bytes=256) asset = Asset(hash="blake3:tagged", size_bytes=256)
session.add(asset) session.add(asset)
session.commit() session.commit()
result = _register_existing_asset( result = _register_existing_asset(
asset_hash="blake3:tagged", asset_hash="blake3:tagged",
name="Tagged Info", name="Tagged Ref",
tags=["alpha", "beta"], tags=["alpha", "beta"],
) )

View File

@ -2,8 +2,8 @@
import pytest import pytest
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo from app.assets.database.models import Asset, AssetReference
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
from app.assets.helpers import get_utc_now from app.assets.helpers import get_utc_now
from app.assets.services import apply_tags, remove_tags, list_tags from app.assets.services import apply_tags, remove_tags, list_tags
@ -15,14 +15,14 @@ def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
return asset return asset
def _make_asset_info( def _make_reference(
session: Session, session: Session,
asset: Asset, asset: Asset,
name: str = "test", name: str = "test",
owner_id: str = "", owner_id: str = "",
) -> AssetInfo: ) -> AssetReference:
now = get_utc_now() now = get_utc_now()
info = AssetInfo( ref = AssetReference(
owner_id=owner_id, owner_id=owner_id,
name=name, name=name,
asset_id=asset.id, asset_id=asset.id,
@ -30,19 +30,19 @@ def _make_asset_info(
updated_at=now, updated_at=now,
last_access_time=now, last_access_time=now,
) )
session.add(info) session.add(ref)
session.flush() session.flush()
return info return ref
class TestApplyTags: class TestApplyTags:
def test_adds_new_tags(self, mock_create_session, session: Session): def test_adds_new_tags(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
session.commit() session.commit()
result = apply_tags( result = apply_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["alpha", "beta"], tags=["alpha", "beta"],
) )
@ -52,31 +52,31 @@ class TestApplyTags:
def test_reports_already_present(self, mock_create_session, session: Session): def test_reports_already_present(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["existing"]) ensure_tags_exist(session, ["existing"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing"]) add_tags_to_reference(session, reference_id=ref.id, tags=["existing"])
session.commit() session.commit()
result = apply_tags( result = apply_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["existing", "new"], tags=["existing", "new"],
) )
assert result.added == ["new"] assert result.added == ["new"]
assert result.already_present == ["existing"] assert result.already_present == ["existing"]
def test_raises_for_nonexistent_info(self, mock_create_session): def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
apply_tags(asset_info_id="nonexistent", tags=["x"]) apply_tags(reference_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session): def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
with pytest.raises(PermissionError, match="not owner"): with pytest.raises(PermissionError, match="not owner"):
apply_tags( apply_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["new"], tags=["new"],
owner_id="user2", owner_id="user2",
) )
@ -85,13 +85,13 @@ class TestApplyTags:
class TestRemoveTags: class TestRemoveTags:
def test_removes_tags(self, mock_create_session, session: Session): def test_removes_tags(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["a", "b", "c"]) ensure_tags_exist(session, ["a", "b", "c"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"]) add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
session.commit() session.commit()
result = remove_tags( result = remove_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["a", "b"], tags=["a", "b"],
) )
@ -101,31 +101,31 @@ class TestRemoveTags:
def test_reports_not_present(self, mock_create_session, session: Session): def test_reports_not_present(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
ensure_tags_exist(session, ["present"]) ensure_tags_exist(session, ["present"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["present"]) add_tags_to_reference(session, reference_id=ref.id, tags=["present"])
session.commit() session.commit()
result = remove_tags( result = remove_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["present", "absent"], tags=["present", "absent"],
) )
assert result.removed == ["present"] assert result.removed == ["present"]
assert result.not_present == ["absent"] assert result.not_present == ["absent"]
def test_raises_for_nonexistent_info(self, mock_create_session): def test_raises_for_nonexistent_ref(self, mock_create_session):
with pytest.raises(ValueError, match="not found"): with pytest.raises(ValueError, match="not found"):
remove_tags(asset_info_id="nonexistent", tags=["x"]) remove_tags(reference_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session): def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1") ref = _make_reference(session, asset, owner_id="user1")
session.commit() session.commit()
with pytest.raises(PermissionError, match="not owner"): with pytest.raises(PermissionError, match="not owner"):
remove_tags( remove_tags(
asset_info_id=info.id, reference_id=ref.id,
tags=["x"], tags=["x"],
owner_id="user2", owner_id="user2",
) )
@ -135,8 +135,8 @@ class TestListTags:
def test_returns_tags_with_counts(self, mock_create_session, session: Session): def test_returns_tags_with_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"]) ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"]) add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit() session.commit()
rows, total = list_tags() rows, total = list_tags()
@ -149,8 +149,8 @@ class TestListTags:
def test_excludes_zero_counts(self, mock_create_session, session: Session): def test_excludes_zero_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"]) ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session) asset = _make_asset(session)
info = _make_asset_info(session, asset) ref = _make_reference(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"]) add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
session.commit() session.commit()
rows, total = list_tags(include_zero=False) rows, total = list_tags(include_zero=False)

View File

@ -24,11 +24,11 @@ def test_create_from_hash_success(
assert b1["created_new"] is False assert b1["created_new"] is False
aid = b1["id"] aid = b1["id"]
# Calling again with the same name should return the same AssetInfo id # Calling again with the same name creates a new AssetInfo (duplicates allowed)
r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120) r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
b2 = r2.json() b2 = r2.json()
assert r2.status_code == 201, b2 assert r2.status_code == 201, b2
assert b2["id"] == aid assert b2["id"] != aid # new reference, not the same one
def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict): def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict):

View File

@ -18,25 +18,24 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma
assert r1.status_code == 201, a1 assert r1.status_code == 201, a1
assert a1["created_new"] is True assert a1["created_new"] is True
# Second upload with the same data and name should return created_new == False and the same asset # Second upload with the same data and name creates a new AssetInfo (duplicates allowed)
# Returns 200 because Asset already exists, but a new AssetInfo is created
files = {"file": (name, data, "application/octet-stream")} files = {"file": (name, data, "application/octet-stream")}
form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)} form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120) r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
a2 = r2.json() a2 = r2.json()
assert r2.status_code == 200, a2 assert r2.status_code in (200, 201), a2
assert a2["created_new"] is False
assert a2["asset_hash"] == a1["asset_hash"] assert a2["asset_hash"] == a1["asset_hash"]
assert a2["id"] == a1["id"] # old reference assert a2["id"] != a1["id"] # new reference with same content
# Third upload with the same data but new name should return created_new == False and the new AssetReference # Third upload with the same data but different name also creates new AssetInfo
files = {"file": (name, data, "application/octet-stream")} files = {"file": (name, data, "application/octet-stream")}
form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)} form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120) r3 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
a3 = r2.json() a3 = r3.json()
assert r2.status_code == 200, a3 assert r3.status_code in (200, 201), a3
assert a3["created_new"] is False
assert a3["asset_hash"] == a1["asset_hash"] assert a3["asset_hash"] == a1["asset_hash"]
assert a3["id"] != a1["id"] # old reference assert a3["id"] != a1["id"]
def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str): def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):