refactor(assets): consolidate duplicated query utilities and remove unused code

- Extract shared helpers to database/queries/common.py:
  - MAX_BIND_PARAMS, calculate_rows_per_statement, iter_chunks, iter_row_chunks
  - build_visible_owner_clause

- Remove duplicate _compute_filename_for_asset, consolidate in path_utils.py

- Remove unused get_asset_info_with_tags (duplicated get_asset_detail)

- Remove redundant __all__ from cache_state.py

- Make internal helpers private (_check_is_scalar)

Amp-Thread-ID: https://ampcode.com/threads/T-019c2ad9-9432-7451-94a8-79287dbbb19e
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr 2026-02-04 15:04:30 -08:00
parent adf6eb73fd
commit b23302f372
15 changed files with 514 additions and 309 deletions

View File

@ -38,6 +38,7 @@ USER_MANAGER: user_manager.UserManager | None = None
# UUID regex (canonical hyphenated form, case-insensitive) # UUID regex (canonical hyphenated form, case-insensitive)
UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" UUID_RE = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
def get_query_dict(request: web.Request) -> dict[str, Any]: def get_query_dict(request: web.Request) -> dict[str, Any]:
""" """
Gets a dictionary of query parameters from the request. Gets a dictionary of query parameters from the request.
@ -45,21 +46,33 @@ def get_query_dict(request: web.Request) -> dict[str, Any]:
'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic. 'request.query' is a MultiMapping[str], needs to be converted to a dictionary to be validated by Pydantic.
""" """
query_dict = { query_dict = {
key: request.query.getall(key) if len(request.query.getall(key)) > 1 else request.query.get(key) key: request.query.getall(key)
if len(request.query.getall(key)) > 1
else request.query.get(key)
for key in request.query.keys() for key in request.query.keys()
} }
return query_dict return query_dict
# Note to any custom node developers reading this code: # Note to any custom node developers reading this code:
# The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same. # The assets system is not yet fully implemented, do not rely on the code in /app/assets remaining the same.
def register_assets_system(app: web.Application, user_manager_instance: user_manager.UserManager) -> None:
def register_assets_system(
app: web.Application, user_manager_instance: user_manager.UserManager
) -> None:
global USER_MANAGER global USER_MANAGER
USER_MANAGER = user_manager_instance USER_MANAGER = user_manager_instance
app.add_routes(ROUTES) app.add_routes(ROUTES)
def _build_error_response(status: int, code: str, message: str, details: dict | None = None) -> web.Response:
return web.json_response({"error": {"code": code, "message": message, "details": details or {}}}, status=status) def _build_error_response(
status: int, code: str, message: str, details: dict | None = None
) -> web.Response:
return web.json_response(
{"error": {"code": code, "message": message, "details": details or {}}},
status=status,
)
def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response: def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response:
@ -79,10 +92,18 @@ def _validate_sort_field(requested: str | None) -> str:
async def head_asset_by_hash(request: web.Request) -> web.Response: async def head_asset_by_hash(request: web.Request) -> web.Response:
hash_str = request.match_info.get("hash", "").strip().lower() hash_str = request.match_info.get("hash", "").strip().lower()
if not hash_str or ":" not in hash_str: if not hash_str or ":" not in hash_str:
return _build_error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'") return _build_error_response(
400, "INVALID_HASH", "hash must be like 'blake3:<hex>'"
)
algo, digest = hash_str.split(":", 1) algo, digest = hash_str.split(":", 1)
if algo != "blake3" or not digest or any(c for c in digest if c not in "0123456789abcdef"): if (
return _build_error_response(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'") algo != "blake3"
or not digest
or any(c for c in digest if c not in "0123456789abcdef")
):
return _build_error_response(
400, "INVALID_HASH", "hash must be like 'blake3:<hex>'"
)
exists = asset_exists(hash_str) exists = asset_exists(hash_str)
return web.Response(status=200 if exists else 404) return web.Response(status=200 if exists else 404)
@ -99,7 +120,11 @@ async def list_assets_route(request: web.Request) -> web.Response:
return _build_validation_error_response("INVALID_QUERY", ve) return _build_validation_error_response("INVALID_QUERY", ve)
sort = _validate_sort_field(q.sort) sort = _validate_sort_field(q.sort)
order = "desc" if (q.order or "desc").lower() not in {"asc", "desc"} else q.order.lower() order = (
"desc"
if (q.order or "desc").lower() not in {"asc", "desc"}
else q.order.lower()
)
result = list_assets_page( result = list_assets_page(
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
@ -118,7 +143,9 @@ async def list_assets_route(request: web.Request) -> web.Response:
id=item.info.id, id=item.info.id,
name=item.info.name, name=item.info.name,
asset_hash=item.asset.hash if item.asset else None, asset_hash=item.asset.hash if item.asset else None,
size=int(item.asset.size_bytes) if item.asset and item.asset.size_bytes else None, size=int(item.asset.size_bytes)
if item.asset and item.asset.size_bytes
else None,
mime_type=item.asset.mime_type if item.asset else None, mime_type=item.asset.mime_type if item.asset else None,
tags=item.tags, tags=item.tags,
created_at=item.info.created_at, created_at=item.info.created_at,
@ -148,13 +175,20 @@ async def get_asset_route(request: web.Request) -> web.Response:
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
if not result: if not result:
return _build_error_response(404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found", {"id": asset_info_id}) return _build_error_response(
404,
"ASSET_NOT_FOUND",
f"AssetInfo {asset_info_id} not found",
{"id": asset_info_id},
)
payload = schemas_out.AssetDetail( payload = schemas_out.AssetDetail(
id=result.info.id, id=result.info.id,
name=result.info.name, name=result.info.name,
asset_hash=result.asset.hash if result.asset else None, asset_hash=result.asset.hash if result.asset else None,
size=int(result.asset.size_bytes) if result.asset and result.asset.size_bytes is not None else None, size=int(result.asset.size_bytes)
if result.asset and result.asset.size_bytes is not None
else None,
mime_type=result.asset.mime_type if result.asset else None, mime_type=result.asset.mime_type if result.asset else None,
tags=result.tags, tags=result.tags,
user_metadata=result.info.user_metadata or {}, user_metadata=result.info.user_metadata or {},
@ -163,7 +197,9 @@ async def get_asset_route(request: web.Request) -> web.Response:
last_access_time=result.info.last_access_time, last_access_time=result.info.last_access_time,
) )
except ValueError as e: except ValueError as e:
return _build_error_response(404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id}) return _build_error_response(
404, "ASSET_NOT_FOUND", str(e), {"id": asset_info_id}
)
except Exception: except Exception:
logging.exception( logging.exception(
"get_asset failed for asset_info_id=%s, owner_id=%s", "get_asset failed for asset_info_id=%s, owner_id=%s",
@ -193,10 +229,12 @@ async def download_asset_content(request: web.Request) -> web.Response:
except NotImplementedError as nie: except NotImplementedError as nie:
return _build_error_response(501, "BACKEND_UNSUPPORTED", str(nie)) return _build_error_response(501, "BACKEND_UNSUPPORTED", str(nie))
except FileNotFoundError: except FileNotFoundError:
return _build_error_response(404, "FILE_NOT_FOUND", "Underlying file not found on disk.") return _build_error_response(
404, "FILE_NOT_FOUND", "Underlying file not found on disk."
)
quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'") quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
cd = f'{disposition}; filename="{quoted}"; filename*=UTF-8\'\'{urllib.parse.quote(filename)}' cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{urllib.parse.quote(filename)}"
file_size = os.path.getsize(abs_path) file_size = os.path.getsize(abs_path)
logging.info( logging.info(
@ -235,7 +273,9 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response:
except ValidationError as ve: except ValidationError as ve:
return _build_validation_error_response("INVALID_BODY", ve) return _build_validation_error_response("INVALID_BODY", ve)
except Exception: except Exception:
return _build_error_response(400, "INVALID_JSON", "Request body must be valid JSON.") return _build_error_response(
400, "INVALID_JSON", "Request body must be valid JSON."
)
result = create_from_hash( result = create_from_hash(
hash_str=body.hash, hash_str=body.hash,
@ -245,7 +285,9 @@ async def create_asset_from_hash_route(request: web.Request) -> web.Response:
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
if result is None: if result is None:
return _build_error_response(404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist") return _build_error_response(
404, "ASSET_NOT_FOUND", f"Asset content {body.hash} does not exist"
)
payload_out = schemas_out.AssetCreated( payload_out = schemas_out.AssetCreated(
id=result.info.id, id=result.info.id,
@ -282,21 +324,30 @@ async def upload_asset(request: web.Request) -> web.Response:
owner_id = USER_MANAGER.get_request_user_id(request) owner_id = USER_MANAGER.get_request_user_id(request)
try: try:
spec = schemas_in.UploadAssetSpec.model_validate({ spec = schemas_in.UploadAssetSpec.model_validate(
"tags": parsed.tags_raw, {
"name": parsed.provided_name, "tags": parsed.tags_raw,
"user_metadata": parsed.user_metadata_raw, "name": parsed.provided_name,
"hash": parsed.provided_hash, "user_metadata": parsed.user_metadata_raw,
}) "hash": parsed.provided_hash,
}
)
except ValidationError as ve: except ValidationError as ve:
_delete_temp_file_if_exists(parsed.tmp_path) _delete_temp_file_if_exists(parsed.tmp_path)
return _build_error_response(400, "INVALID_BODY", f"Validation failed: {ve.json()}") return _build_error_response(
400, "INVALID_BODY", f"Validation failed: {ve.json()}"
)
if spec.tags and spec.tags[0] == "models": if spec.tags and spec.tags[0] == "models":
if len(spec.tags) < 2 or spec.tags[1] not in folder_paths.folder_names_and_paths: if (
len(spec.tags) < 2
or spec.tags[1] not in folder_paths.folder_names_and_paths
):
_delete_temp_file_if_exists(parsed.tmp_path) _delete_temp_file_if_exists(parsed.tmp_path)
category = spec.tags[1] if len(spec.tags) >= 2 else "" category = spec.tags[1] if len(spec.tags) >= 2 else ""
return _build_error_response(400, "INVALID_BODY", f"unknown models category '{category}'") return _build_error_response(
400, "INVALID_BODY", f"unknown models category '{category}'"
)
try: try:
# Fast path: if a valid provided hash exists, create AssetInfo without writing anything # Fast path: if a valid provided hash exists, create AssetInfo without writing anything
@ -310,12 +361,18 @@ async def upload_asset(request: web.Request) -> web.Response:
) )
if result is None: if result is None:
_delete_temp_file_if_exists(parsed.tmp_path) _delete_temp_file_if_exists(parsed.tmp_path)
return _build_error_response(404, "ASSET_NOT_FOUND", f"Asset content {spec.hash} does not exist") return _build_error_response(
404, "ASSET_NOT_FOUND", f"Asset content {spec.hash} does not exist"
)
_delete_temp_file_if_exists(parsed.tmp_path) _delete_temp_file_if_exists(parsed.tmp_path)
else: else:
# Otherwise, we must have a temp file path to ingest # Otherwise, we must have a temp file path to ingest
if not parsed.tmp_path or not os.path.exists(parsed.tmp_path): if not parsed.tmp_path or not os.path.exists(parsed.tmp_path):
return _build_error_response(404, "ASSET_NOT_FOUND", "Provided hash not found and no file uploaded.") return _build_error_response(
404,
"ASSET_NOT_FOUND",
"Provided hash not found and no file uploaded.",
)
result = upload_from_temp_path( result = upload_from_temp_path(
temp_path=parsed.tmp_path, temp_path=parsed.tmp_path,
@ -365,7 +422,9 @@ async def update_asset_route(request: web.Request) -> web.Response:
except ValidationError as ve: except ValidationError as ve:
return _build_validation_error_response("INVALID_BODY", ve) return _build_validation_error_response("INVALID_BODY", ve)
except Exception: except Exception:
return _build_error_response(400, "INVALID_JSON", "Request body must be valid JSON.") return _build_error_response(
400, "INVALID_JSON", "Request body must be valid JSON."
)
try: try:
result = update_asset_metadata( result = update_asset_metadata(
@ -383,7 +442,9 @@ async def update_asset_route(request: web.Request) -> web.Response:
updated_at=result.info.updated_at, updated_at=result.info.updated_at,
) )
except (ValueError, PermissionError) as ve: except (ValueError, PermissionError) as ve:
return _build_error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}) return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
)
except Exception: except Exception:
logging.exception( logging.exception(
"update_asset failed for asset_info_id=%s, owner_id=%s", "update_asset failed for asset_info_id=%s, owner_id=%s",
@ -398,7 +459,11 @@ async def update_asset_route(request: web.Request) -> web.Response:
async def delete_asset_route(request: web.Request) -> web.Response: async def delete_asset_route(request: web.Request) -> web.Response:
asset_info_id = str(uuid.UUID(request.match_info["id"])) asset_info_id = str(uuid.UUID(request.match_info["id"]))
delete_content_param = request.query.get("delete_content") delete_content_param = request.query.get("delete_content")
delete_content = True if delete_content_param is None else delete_content_param.lower() not in {"0", "false", "no"} delete_content = (
True
if delete_content_param is None
else delete_content_param.lower() not in {"0", "false", "no"}
)
try: try:
deleted = delete_asset_reference( deleted = delete_asset_reference(
@ -415,7 +480,9 @@ async def delete_asset_route(request: web.Request) -> web.Response:
return _build_error_response(500, "INTERNAL", "Unexpected server error.") return _build_error_response(500, "INTERNAL", "Unexpected server error.")
if not deleted: if not deleted:
return _build_error_response(404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found.") return _build_error_response(
404, "ASSET_NOT_FOUND", f"AssetInfo {asset_info_id} not found."
)
return web.Response(status=204) return web.Response(status=204)
@ -430,7 +497,13 @@ async def get_tags(request: web.Request) -> web.Response:
query = schemas_in.TagsListQuery.model_validate(query_map) query = schemas_in.TagsListQuery.model_validate(query_map)
except ValidationError as e: except ValidationError as e:
return web.json_response( return web.json_response(
{"error": {"code": "INVALID_QUERY", "message": "Invalid query parameters", "details": e.errors()}}, {
"error": {
"code": "INVALID_QUERY",
"message": "Invalid query parameters",
"details": e.errors(),
}
},
status=400, status=400,
) )
@ -443,8 +516,13 @@ async def get_tags(request: web.Request) -> web.Response:
owner_id=USER_MANAGER.get_request_user_id(request), owner_id=USER_MANAGER.get_request_user_id(request),
) )
tags = [schemas_out.TagUsage(name=name, count=count, type=tag_type) for (name, tag_type, count) in rows] tags = [
payload = schemas_out.TagsList(tags=tags, total=total, has_more=(query.offset + len(tags)) < total) schemas_out.TagUsage(name=name, count=count, type=tag_type)
for (name, tag_type, count) in rows
]
payload = schemas_out.TagsList(
tags=tags, total=total, has_more=(query.offset + len(tags)) < total
)
return web.json_response(payload.model_dump(mode="json")) return web.json_response(payload.model_dump(mode="json"))
@ -455,9 +533,16 @@ async def add_asset_tags(request: web.Request) -> web.Response:
json_payload = await request.json() json_payload = await request.json()
data = schemas_in.TagsAdd.model_validate(json_payload) data = schemas_in.TagsAdd.model_validate(json_payload)
except ValidationError as ve: except ValidationError as ve:
return _build_error_response(400, "INVALID_BODY", "Invalid JSON body for tags add.", {"errors": ve.errors()}) return _build_error_response(
400,
"INVALID_BODY",
"Invalid JSON body for tags add.",
{"errors": ve.errors()},
)
except Exception: except Exception:
return _build_error_response(400, "INVALID_JSON", "Request body must be valid JSON.") return _build_error_response(
400, "INVALID_JSON", "Request body must be valid JSON."
)
try: try:
result = apply_tags( result = apply_tags(
@ -472,7 +557,9 @@ async def add_asset_tags(request: web.Request) -> web.Response:
total_tags=result.total_tags, total_tags=result.total_tags,
) )
except (ValueError, PermissionError) as ve: except (ValueError, PermissionError) as ve:
return _build_error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}) return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
)
except Exception: except Exception:
logging.exception( logging.exception(
"add_tags_to_asset failed for asset_info_id=%s, owner_id=%s", "add_tags_to_asset failed for asset_info_id=%s, owner_id=%s",
@ -491,9 +578,16 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
json_payload = await request.json() json_payload = await request.json()
data = schemas_in.TagsRemove.model_validate(json_payload) data = schemas_in.TagsRemove.model_validate(json_payload)
except ValidationError as ve: except ValidationError as ve:
return _build_error_response(400, "INVALID_BODY", "Invalid JSON body for tags remove.", {"errors": ve.errors()}) return _build_error_response(
400,
"INVALID_BODY",
"Invalid JSON body for tags remove.",
{"errors": ve.errors()},
)
except Exception: except Exception:
return _build_error_response(400, "INVALID_JSON", "Request body must be valid JSON.") return _build_error_response(
400, "INVALID_JSON", "Request body must be valid JSON."
)
try: try:
result = remove_tags( result = remove_tags(
@ -507,7 +601,9 @@ async def delete_asset_tags(request: web.Request) -> web.Response:
total_tags=result.total_tags, total_tags=result.total_tags,
) )
except ValueError as ve: except ValueError as ve:
return _build_error_response(404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}) return _build_error_response(
404, "ASSET_NOT_FOUND", str(ve), {"id": asset_info_id}
)
except Exception: except Exception:
logging.exception( logging.exception(
"remove_tags_from_asset failed for asset_info_id=%s, owner_id=%s", "remove_tags_from_asset failed for asset_info_id=%s, owner_id=%s",

View File

@ -65,6 +65,7 @@ class ParsedUpload:
provided_hash: str | None provided_hash: str | None
provided_hash_exists: bool | None provided_hash_exists: bool | None
class ListAssetsQuery(BaseModel): class ListAssetsQuery(BaseModel):
include_tags: list[str] = Field(default_factory=list) include_tags: list[str] = Field(default_factory=list)
exclude_tags: list[str] = Field(default_factory=list) exclude_tags: list[str] = Field(default_factory=list)
@ -76,7 +77,9 @@ class ListAssetsQuery(BaseModel):
limit: conint(ge=1, le=500) = 20 limit: conint(ge=1, le=500) = 20
offset: conint(ge=0) = 0 offset: conint(ge=0) = 0
sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = "created_at" sort: Literal["name", "created_at", "updated_at", "size", "last_access_time"] = (
"created_at"
)
order: Literal["asc", "desc"] = "desc" order: Literal["asc", "desc"] = "desc"
@field_validator("include_tags", "exclude_tags", mode="before") @field_validator("include_tags", "exclude_tags", mode="before")
@ -218,6 +221,7 @@ class UploadAssetSpec(BaseModel):
Files created via this endpoint are stored on disk using the **content hash** as the filename stem Files created via this endpoint are stored on disk using the **content hash** as the filename stem
and the original extension is preserved when available. and the original extension is preserved when available.
""" """
model_config = ConfigDict(extra="ignore", str_strip_whitespace=True) model_config = ConfigDict(extra="ignore", str_strip_whitespace=True)
tags: list[str] = Field(..., min_length=1) tags: list[str] = Field(..., min_length=1)
@ -315,5 +319,7 @@ class UploadAssetSpec(BaseModel):
raise ValueError("first tag must be one of: models, input, output") raise ValueError("first tag must be one of: models, input, output")
if root == "models": if root == "models":
if len(self.tags) < 2: if len(self.tags) < 2:
raise ValueError("models uploads require a category tag as the second tag") raise ValueError(
"models uploads require a category tag as the second tag"
)
return self return self

View File

@ -19,7 +19,11 @@ def normalize_and_validate_hash(s: str) -> str:
if ":" not in s: if ":" not in s:
raise UploadError(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'") raise UploadError(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
algo, digest = s.split(":", 1) algo, digest = s.split(":", 1)
if algo != "blake3" or not digest or any(c for c in digest if c not in "0123456789abcdef"): if (
algo != "blake3"
or not digest
or any(c for c in digest if c not in "0123456789abcdef")
):
raise UploadError(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'") raise UploadError(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'")
return f"{algo}:{digest}" return f"{algo}:{digest}"
@ -42,7 +46,9 @@ async def parse_multipart_upload(
UploadError: On validation or I/O errors UploadError: On validation or I/O errors
""" """
if not (request.content_type or "").lower().startswith("multipart/"): if not (request.content_type or "").lower().startswith("multipart/"):
raise UploadError(415, "UNSUPPORTED_MEDIA_TYPE", "Use multipart/form-data for uploads.") raise UploadError(
415, "UNSUPPORTED_MEDIA_TYPE", "Use multipart/form-data for uploads."
)
reader = await request.multipart() reader = await request.multipart()
@ -68,7 +74,9 @@ async def parse_multipart_upload(
try: try:
s = ((await field.text()) or "").strip().lower() s = ((await field.text()) or "").strip().lower()
except Exception: except Exception:
raise UploadError(400, "INVALID_HASH", "hash must be like 'blake3:<hex>'") raise UploadError(
400, "INVALID_HASH", "hash must be like 'blake3:<hex>'"
)
if s: if s:
provided_hash = normalize_and_validate_hash(s) provided_hash = normalize_and_validate_hash(s)
@ -90,7 +98,9 @@ async def parse_multipart_upload(
break break
file_written += len(chunk) file_written += len(chunk)
except Exception: except Exception:
raise UploadError(500, "UPLOAD_IO_ERROR", "Failed to receive uploaded file.") raise UploadError(
500, "UPLOAD_IO_ERROR", "Failed to receive uploaded file."
)
continue continue
uploads_root = os.path.join(folder_paths.get_temp_directory(), "uploads") uploads_root = os.path.join(folder_paths.get_temp_directory(), "uploads")
@ -108,7 +118,9 @@ async def parse_multipart_upload(
file_written += len(chunk) file_written += len(chunk)
except Exception: except Exception:
_delete_temp_file_if_exists(tmp_path) _delete_temp_file_if_exists(tmp_path)
raise UploadError(500, "UPLOAD_IO_ERROR", "Failed to receive and store uploaded file.") raise UploadError(
500, "UPLOAD_IO_ERROR", "Failed to receive and store uploaded file."
)
elif fname == "tags": elif fname == "tags":
tags_raw.append((await field.text()) or "") tags_raw.append((await field.text()) or "")
@ -118,9 +130,15 @@ async def parse_multipart_upload(
user_metadata_raw = (await field.text()) or None user_metadata_raw = (await field.text()) or None
if not file_present and not (provided_hash and provided_hash_exists): if not file_present and not (provided_hash and provided_hash_exists):
raise UploadError(400, "MISSING_FILE", "Form must include a 'file' part or a known 'hash'.") raise UploadError(
400, "MISSING_FILE", "Form must include a 'file' part or a known 'hash'."
)
if file_present and file_written == 0 and not (provided_hash and provided_hash_exists): if (
file_present
and file_written == 0
and not (provided_hash and provided_hash_exists)
):
_delete_temp_file_if_exists(tmp_path) _delete_temp_file_if_exists(tmp_path)
raise UploadError(400, "EMPTY_UPLOAD", "Uploaded file is empty.") raise UploadError(400, "EMPTY_UPLOAD", "Uploaded file is empty.")

View File

@ -27,7 +27,9 @@ from app.database.models import Base, to_dict
class Asset(Base): class Asset(Base):
__tablename__ = "assets" __tablename__ = "assets"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
)
hash: Mapped[str | None] = mapped_column(String(256), nullable=True) hash: Mapped[str | None] = mapped_column(String(256), nullable=True)
size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0) size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
mime_type: Mapped[str | None] = mapped_column(String(255)) mime_type: Mapped[str | None] = mapped_column(String(255))
@ -75,7 +77,9 @@ class AssetCacheState(Base):
__tablename__ = "asset_cache_state" __tablename__ = "asset_cache_state"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False) asset_id: Mapped[str] = mapped_column(
String(36), ForeignKey("assets.id", ondelete="CASCADE"), nullable=False
)
file_path: Mapped[str] = mapped_column(Text, nullable=False) file_path: Mapped[str] = mapped_column(Text, nullable=False)
mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True) mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
@ -85,7 +89,9 @@ class AssetCacheState(Base):
__table_args__ = ( __table_args__ = (
Index("ix_asset_cache_state_file_path", "file_path"), Index("ix_asset_cache_state_file_path", "file_path"),
Index("ix_asset_cache_state_asset_id", "asset_id"), Index("ix_asset_cache_state_asset_id", "asset_id"),
CheckConstraint("(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"), CheckConstraint(
"(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg"
),
UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"), UniqueConstraint("file_path", name="uq_asset_cache_state_file_path"),
) )
@ -99,15 +105,29 @@ class AssetCacheState(Base):
class AssetInfo(Base): class AssetInfo(Base):
__tablename__ = "assets_info" __tablename__ = "assets_info"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4())) id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid.uuid4())
)
owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="") owner_id: Mapped[str] = mapped_column(String(128), nullable=False, default="")
name: Mapped[str] = mapped_column(String(512), nullable=False) name: Mapped[str] = mapped_column(String(512), nullable=False)
asset_id: Mapped[str] = mapped_column(String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False) asset_id: Mapped[str] = mapped_column(
preview_id: Mapped[str | None] = mapped_column(String(36), ForeignKey("assets.id", ondelete="SET NULL")) String(36), ForeignKey("assets.id", ondelete="RESTRICT"), nullable=False
user_metadata: Mapped[dict[str, Any] | None] = mapped_column(JSON(none_as_null=True)) )
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=get_utc_now) preview_id: Mapped[str | None] = mapped_column(
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=get_utc_now) String(36), ForeignKey("assets.id", ondelete="SET NULL")
last_access_time: Mapped[datetime] = mapped_column(DateTime(timezone=False), nullable=False, default=get_utc_now) )
user_metadata: Mapped[dict[str, Any] | None] = mapped_column(
JSON(none_as_null=True)
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=False), nullable=False, default=get_utc_now
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=False), nullable=False, default=get_utc_now
)
last_access_time: Mapped[datetime] = mapped_column(
DateTime(timezone=False), nullable=False, default=get_utc_now
)
asset: Mapped[Asset] = relationship( asset: Mapped[Asset] = relationship(
"Asset", "Asset",
@ -143,7 +163,9 @@ class AssetInfo(Base):
) )
__table_args__ = ( __table_args__ = (
UniqueConstraint("asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"), UniqueConstraint(
"asset_id", "owner_id", "name", name="uq_assets_info_asset_owner_name"
),
Index("ix_assets_info_owner_name", "owner_id", "name"), Index("ix_assets_info_owner_name", "owner_id", "name"),
Index("ix_assets_info_owner_id", "owner_id"), Index("ix_assets_info_owner_id", "owner_id"),
Index("ix_assets_info_asset_id", "asset_id"), Index("ix_assets_info_asset_id", "asset_id"),
@ -225,9 +247,7 @@ class Tag(Base):
overlaps="asset_info_links,tag_links,tags,asset_info", overlaps="asset_info_links,tag_links,tags,asset_info",
) )
__table_args__ = ( __table_args__ = (Index("ix_tags_tag_type", "tag_type"),)
Index("ix_tags_tag_type", "tag_type"),
)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<Tag {self.name}>" return f"<Tag {self.name}>"

View File

@ -4,17 +4,7 @@ from sqlalchemy.dialects import sqlite
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset from app.assets.database.models import Asset
from app.assets.database.queries.common import calculate_rows_per_statement, iter_chunks
MAX_BIND_PARAMS = 800
def _calculate_rows_per_statement(cols: int) -> int:
return max(1, MAX_BIND_PARAMS // max(1, cols))
def _iter_chunks(seq, n: int):
for i in range(0, len(seq), n):
yield seq[i : i + n]
def asset_exists_by_hash( def asset_exists_by_hash(
@ -26,7 +16,10 @@ def asset_exists_by_hash(
""" """
row = ( row = (
session.execute( session.execute(
select(sa.literal(True)).select_from(Asset).where(Asset.hash == asset_hash).limit(1) select(sa.literal(True))
.select_from(Asset)
.where(Asset.hash == asset_hash)
.limit(1)
) )
).first() ).first()
return row is not None return row is not None
@ -37,8 +30,10 @@ def get_asset_by_hash(
asset_hash: str, asset_hash: str,
) -> Asset | None: ) -> Asset | None:
return ( return (
session.execute(select(Asset).where(Asset.hash == asset_hash).limit(1)) (session.execute(select(Asset).where(Asset.hash == asset_hash).limit(1)))
).scalars().first() .scalars()
.first()
)
def upsert_asset( def upsert_asset(
@ -60,9 +55,11 @@ def upsert_asset(
res = session.execute(ins) res = session.execute(ins)
created = int(res.rowcount or 0) > 0 created = int(res.rowcount or 0) > 0
asset = session.execute( asset = (
select(Asset).where(Asset.hash == asset_hash).limit(1) session.execute(select(Asset).where(Asset.hash == asset_hash).limit(1))
).scalars().first() .scalars()
.first()
)
if not asset: if not asset:
raise RuntimeError("Asset row not found after upsert.") raise RuntimeError("Asset row not found after upsert.")
@ -89,5 +86,5 @@ def bulk_insert_assets(
if not rows: if not rows:
return return
ins = sqlite.insert(Asset) ins = sqlite.insert(Asset)
for chunk in _iter_chunks(rows, _calculate_rows_per_statement(5)): for chunk in iter_chunks(rows, calculate_rows_per_statement(5)):
session.execute(ins, chunk) session.execute(ins, chunk)

View File

@ -16,10 +16,16 @@ from app.assets.database.models import (
AssetInfoTag, AssetInfoTag,
Tag, Tag,
) )
from app.assets.database.queries.common import (
MAX_BIND_PARAMS,
build_visible_owner_clause,
calculate_rows_per_statement,
iter_chunks,
)
from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
def check_is_scalar(v): def _check_is_scalar(v):
if v is None: if v is None:
return True return True
if isinstance(v, bool): if isinstance(v, bool):
@ -33,8 +39,12 @@ def _scalar_to_row(key: str, ordinal: int, value) -> dict:
"""Convert a scalar value to a typed projection row.""" """Convert a scalar value to a typed projection row."""
if value is None: if value is None:
return { return {
"key": key, "ordinal": ordinal, "key": key,
"val_str": None, "val_num": None, "val_bool": None, "val_json": None "ordinal": ordinal,
"val_str": None,
"val_num": None,
"val_bool": None,
"val_json": None,
} }
if isinstance(value, bool): if isinstance(value, bool):
return {"key": key, "ordinal": ordinal, "val_bool": bool(value)} return {"key": key, "ordinal": ordinal, "val_bool": bool(value)}
@ -55,35 +65,16 @@ def convert_metadata_to_rows(key: str, value) -> list[dict]:
if value is None: if value is None:
return [_scalar_to_row(key, 0, None)] return [_scalar_to_row(key, 0, None)]
if check_is_scalar(value): if _check_is_scalar(value):
return [_scalar_to_row(key, 0, value)] return [_scalar_to_row(key, 0, value)]
if isinstance(value, list): if isinstance(value, list):
if all(check_is_scalar(x) for x in value): if all(_check_is_scalar(x) for x in value):
return [_scalar_to_row(key, i, x) for i, x in enumerate(value)] return [_scalar_to_row(key, i, x) for i, x in enumerate(value)]
return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)] return [{"key": key, "ordinal": i, "val_json": x} for i, x in enumerate(value)]
return [{"key": key, "ordinal": 0, "val_json": value}] return [{"key": key, "ordinal": 0, "val_json": value}]
MAX_BIND_PARAMS = 800
def _calculate_rows_per_statement(cols: int) -> int:
return max(1, MAX_BIND_PARAMS // max(1, cols))
def _iter_chunks(seq, n: int):
for i in range(0, len(seq), n):
yield seq[i : i + n]
def _build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
"""Build owner visibility predicate for reads. Owner-less rows are visible to everyone."""
owner_id = (owner_id or "").strip()
if owner_id == "":
return AssetInfo.owner_id == ""
return AssetInfo.owner_id.in_(["", owner_id])
def _apply_tag_filters( def _apply_tag_filters(
stmt: sa.sql.Select, stmt: sa.sql.Select,
@ -229,15 +220,19 @@ def get_or_create_asset_info(
if info: if info:
return info, True return info, True
existing = session.execute( existing = (
select(AssetInfo) session.execute(
.where( select(AssetInfo)
AssetInfo.asset_id == asset_id, .where(
AssetInfo.name == name, AssetInfo.asset_id == asset_id,
AssetInfo.owner_id == owner_id, AssetInfo.name == name,
AssetInfo.owner_id == owner_id,
)
.limit(1)
) )
.limit(1) .unique()
).unique().scalar_one_or_none() .scalar_one_or_none()
)
if not existing: if not existing:
raise RuntimeError("Failed to find AssetInfo after insert conflict.") raise RuntimeError("Failed to find AssetInfo after insert conflict.")
return existing, False return existing, False
@ -274,7 +269,7 @@ def list_asset_infos_page(
select(AssetInfo) select(AssetInfo)
.join(Asset, Asset.id == AssetInfo.asset_id) .join(Asset, Asset.id == AssetInfo.asset_id)
.options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags)) .options(contains_eager(AssetInfo.asset), noload(AssetInfo.tags))
.where(_build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
) )
if name_contains: if name_contains:
@ -302,7 +297,7 @@ def list_asset_infos_page(
select(sa.func.count()) select(sa.func.count())
.select_from(AssetInfo) .select_from(AssetInfo)
.join(Asset, Asset.id == AssetInfo.asset_id) .join(Asset, Asset.id == AssetInfo.asset_id)
.where(_build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
) )
if name_contains: if name_contains:
escaped, esc = escape_sql_like_string(name_contains) escaped, esc = escape_sql_like_string(name_contains)
@ -341,7 +336,7 @@ def fetch_asset_info_asset_and_tags(
.join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True) .join(Tag, Tag.name == AssetInfoTag.tag_name, isouter=True)
.where( .where(
AssetInfo.id == asset_info_id, AssetInfo.id == asset_info_id,
_build_visible_owner_clause(owner_id), build_visible_owner_clause(owner_id),
) )
.options(noload(AssetInfo.tags)) .options(noload(AssetInfo.tags))
.order_by(Tag.name.asc()) .order_by(Tag.name.asc())
@ -371,7 +366,7 @@ def fetch_asset_info_and_asset(
.join(Asset, Asset.id == AssetInfo.asset_id) .join(Asset, Asset.id == AssetInfo.asset_id)
.where( .where(
AssetInfo.id == asset_info_id, AssetInfo.id == asset_info_id,
_build_visible_owner_clause(owner_id), build_visible_owner_clause(owner_id),
) )
.limit(1) .limit(1)
.options(noload(AssetInfo.tags)) .options(noload(AssetInfo.tags))
@ -393,7 +388,9 @@ def update_asset_info_access_time(
stmt = sa.update(AssetInfo).where(AssetInfo.id == asset_info_id) stmt = sa.update(AssetInfo).where(AssetInfo.id == asset_info_id)
if only_if_newer: if only_if_newer:
stmt = stmt.where( stmt = stmt.where(
sa.or_(AssetInfo.last_access_time.is_(None), AssetInfo.last_access_time < ts) sa.or_(
AssetInfo.last_access_time.is_(None), AssetInfo.last_access_time < ts
)
) )
session.execute(stmt.values(last_access_time=ts)) session.execute(stmt.values(last_access_time=ts))
@ -420,9 +417,7 @@ def update_asset_info_updated_at(
"""Update the updated_at timestamp of an AssetInfo.""" """Update the updated_at timestamp of an AssetInfo."""
ts = ts or get_utc_now() ts = ts or get_utc_now()
session.execute( session.execute(
sa.update(AssetInfo) sa.update(AssetInfo).where(AssetInfo.id == asset_info_id).values(updated_at=ts)
.where(AssetInfo.id == asset_info_id)
.values(updated_at=ts)
) )
@ -439,7 +434,9 @@ def set_asset_info_metadata(
info.updated_at = get_utc_now() info.updated_at = get_utc_now()
session.flush() session.flush()
session.execute(delete(AssetInfoMeta).where(AssetInfoMeta.asset_info_id == asset_info_id)) session.execute(
delete(AssetInfoMeta).where(AssetInfoMeta.asset_info_id == asset_info_id)
)
session.flush() session.flush()
if not user_metadata: if not user_metadata:
@ -471,7 +468,7 @@ def delete_asset_info_by_id(
) -> bool: ) -> bool:
stmt = sa.delete(AssetInfo).where( stmt = sa.delete(AssetInfo).where(
AssetInfo.id == asset_info_id, AssetInfo.id == asset_info_id,
_build_visible_owner_clause(owner_id), build_visible_owner_clause(owner_id),
) )
return int((session.execute(stmt)).rowcount or 0) > 0 return int((session.execute(stmt)).rowcount or 0) > 0
@ -511,7 +508,7 @@ def bulk_insert_asset_infos_ignore_conflicts(
ins = sqlite.insert(AssetInfo).on_conflict_do_nothing( ins = sqlite.insert(AssetInfo).on_conflict_do_nothing(
index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name] index_elements=[AssetInfo.asset_id, AssetInfo.owner_id, AssetInfo.name]
) )
for chunk in _iter_chunks(rows, _calculate_rows_per_statement(9)): for chunk in iter_chunks(rows, calculate_rows_per_statement(9)):
session.execute(ins, chunk) session.execute(ins, chunk)
@ -524,9 +521,7 @@ def get_asset_info_ids_by_ids(
return set() return set()
found: set[str] = set() found: set[str] = set()
for chunk in _iter_chunks(info_ids, MAX_BIND_PARAMS): for chunk in iter_chunks(info_ids, MAX_BIND_PARAMS):
result = session.execute( result = session.execute(select(AssetInfo.id).where(AssetInfo.id.in_(chunk)))
select(AssetInfo.id).where(AssetInfo.id.in_(chunk))
)
found.update(result.scalars().all()) found.update(result.scalars().all())
return found return found

View File

@ -7,34 +7,13 @@ from sqlalchemy.dialects import sqlite
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo from app.assets.database.models import Asset, AssetCacheState, AssetInfo
from app.assets.database.queries.common import (
MAX_BIND_PARAMS,
calculate_rows_per_statement,
iter_chunks,
)
from app.assets.helpers import escape_sql_like_string from app.assets.helpers import escape_sql_like_string
MAX_BIND_PARAMS = 800
__all__ = [
"CacheStateRow",
"list_cache_states_by_asset_id",
"upsert_cache_state",
"delete_cache_states_outside_prefixes",
"get_orphaned_seed_asset_ids",
"delete_assets_by_ids",
"get_cache_states_for_prefixes",
"bulk_set_needs_verify",
"delete_cache_states_by_ids",
"delete_orphaned_seed_asset",
"bulk_insert_cache_states_ignore_conflicts",
"get_cache_states_by_paths_and_asset_ids",
]
def _calculate_rows_per_statement(cols: int) -> int:
return max(1, MAX_BIND_PARAMS // max(1, cols))
def _iter_chunks(seq, n: int):
for i in range(0, len(seq), n):
yield seq[i : i + n]
class CacheStateRow(NamedTuple): class CacheStateRow(NamedTuple):
"""Row from cache state query with joined asset data.""" """Row from cache state query with joined asset data."""
@ -52,12 +31,16 @@ def list_cache_states_by_asset_id(
session: Session, *, asset_id: str session: Session, *, asset_id: str
) -> Sequence[AssetCacheState]: ) -> Sequence[AssetCacheState]:
return ( return (
session.execute( (
select(AssetCacheState) session.execute(
.where(AssetCacheState.asset_id == asset_id) select(AssetCacheState)
.order_by(AssetCacheState.id.asc()) .where(AssetCacheState.asset_id == asset_id)
.order_by(AssetCacheState.id.asc())
)
) )
).scalars().all() .scalars()
.all()
)
def upsert_cache_state( def upsert_cache_state(
@ -100,7 +83,9 @@ def upsert_cache_state(
return False, updated return False, updated
def delete_cache_states_outside_prefixes(session: Session, valid_prefixes: list[str]) -> int: def delete_cache_states_outside_prefixes(
session: Session, valid_prefixes: list[str]
) -> int:
"""Delete cache states with file_path not matching any of the valid prefixes. """Delete cache states with file_path not matching any of the valid prefixes.
Args: Args:
@ -261,7 +246,7 @@ def bulk_insert_cache_states_ignore_conflicts(
ins = sqlite.insert(AssetCacheState).on_conflict_do_nothing( ins = sqlite.insert(AssetCacheState).on_conflict_do_nothing(
index_elements=[AssetCacheState.file_path] index_elements=[AssetCacheState.file_path]
) )
for chunk in _iter_chunks(rows, _calculate_rows_per_statement(3)): for chunk in iter_chunks(rows, calculate_rows_per_statement(3)):
session.execute(ins, chunk) session.execute(ins, chunk)
@ -283,7 +268,7 @@ def get_cache_states_by_paths_and_asset_ids(
paths = list(path_to_asset.keys()) paths = list(path_to_asset.keys())
winners: set[str] = set() winners: set[str] = set()
for chunk in _iter_chunks(paths, MAX_BIND_PARAMS): for chunk in iter_chunks(paths, MAX_BIND_PARAMS):
result = session.execute( result = session.execute(
select(AssetCacheState.file_path).where( select(AssetCacheState.file_path).where(
AssetCacheState.file_path.in_(chunk), AssetCacheState.file_path.in_(chunk),

View File

@ -0,0 +1,37 @@
"""Shared utilities for database query modules."""
from typing import Iterable
import sqlalchemy as sa
from app.assets.database.models import AssetInfo
MAX_BIND_PARAMS = 800
def calculate_rows_per_statement(cols: int) -> int:
"""Calculate how many rows can fit in one statement given column count."""
return max(1, MAX_BIND_PARAMS // max(1, cols))
def iter_chunks(seq, n: int):
"""Yield successive n-sized chunks from seq."""
for i in range(0, len(seq), n):
yield seq[i : i + n]
def iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]:
"""Yield chunks of rows sized to fit within bind param limits."""
if not rows:
return []
rows_per_stmt = max(1, MAX_BIND_PARAMS // max(1, cols_per_row))
for i in range(0, len(rows), rows_per_stmt):
yield rows[i : i + rows_per_stmt]
def build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
"""Build owner visibility predicate for reads. Owner-less rows are visible to everyone."""
owner_id = (owner_id or "").strip()
if owner_id == "":
return AssetInfo.owner_id == ""
return AssetInfo.owner_id.in_(["", owner_id])

View File

@ -7,6 +7,10 @@ from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from app.assets.database.models import AssetInfo, AssetInfoMeta, AssetInfoTag, Tag from app.assets.database.models import AssetInfo, AssetInfoMeta, AssetInfoTag, Tag
from app.assets.database.queries.common import (
build_visible_owner_clause,
iter_row_chunks,
)
from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags from app.assets.helpers import escape_sql_like_string, get_utc_now, normalize_tags
@ -27,30 +31,10 @@ class SetTagsDict(TypedDict):
removed: list[str] removed: list[str]
total: list[str] total: list[str]
MAX_BIND_PARAMS = 800
def ensure_tags_exist(
def _calculate_rows_per_statement(cols: int) -> int: session: Session, names: Iterable[str], tag_type: str = "user"
return max(1, MAX_BIND_PARAMS // max(1, cols)) ) -> None:
def _iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]:
if not rows:
return []
rows_per_stmt = max(1, MAX_BIND_PARAMS // max(1, cols_per_row))
for i in range(0, len(rows), rows_per_stmt):
yield rows[i : i + rows_per_stmt]
def _build_visible_owner_clause(owner_id: str) -> sa.sql.ClauseElement:
"""Build owner visibility predicate for reads. Owner-less rows are visible to everyone."""
owner_id = (owner_id or "").strip()
if owner_id == "":
return AssetInfo.owner_id == ""
return AssetInfo.owner_id.in_(["", owner_id])
def ensure_tags_exist(session: Session, names: Iterable[str], tag_type: str = "user") -> None:
wanted = normalize_tags(list(names)) wanted = normalize_tags(list(names))
if not wanted: if not wanted:
return return
@ -65,9 +49,12 @@ def ensure_tags_exist(session: Session, names: Iterable[str], tag_type: str = "u
def get_asset_tags(session: Session, asset_info_id: str) -> list[str]: def get_asset_tags(session: Session, asset_info_id: str) -> list[str]:
return [ return [
tag_name for (tag_name,) in ( tag_name
for (tag_name,) in (
session.execute( session.execute(
select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id) select(AssetInfoTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id
)
) )
).all() ).all()
] ]
@ -82,8 +69,13 @@ def set_asset_info_tags(
desired = normalize_tags(tags) desired = normalize_tags(tags)
current = set( current = set(
tag_name for (tag_name,) in ( tag_name
session.execute(select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id)) for (tag_name,) in (
session.execute(
select(AssetInfoTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id
)
)
).all() ).all()
) )
@ -92,16 +84,25 @@ def set_asset_info_tags(
if to_add: if to_add:
ensure_tags_exist(session, to_add, tag_type="user") ensure_tags_exist(session, to_add, tag_type="user")
session.add_all([ session.add_all(
AssetInfoTag(asset_info_id=asset_info_id, tag_name=t, origin=origin, added_at=get_utc_now()) [
for t in to_add AssetInfoTag(
]) asset_info_id=asset_info_id,
tag_name=t,
origin=origin,
added_at=get_utc_now(),
)
for t in to_add
]
)
session.flush() session.flush()
if to_remove: if to_remove:
session.execute( session.execute(
delete(AssetInfoTag) delete(AssetInfoTag).where(
.where(AssetInfoTag.asset_info_id == asset_info_id, AssetInfoTag.tag_name.in_(to_remove)) AssetInfoTag.asset_info_id == asset_info_id,
AssetInfoTag.tag_name.in_(to_remove),
)
) )
session.flush() session.flush()
@ -133,7 +134,9 @@ def add_tags_to_asset_info(
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
sa.select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id) sa.select(AssetInfoTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id
)
) )
).all() ).all()
} }
@ -185,7 +188,9 @@ def remove_tags_from_asset_info(
tag_name tag_name
for (tag_name,) in ( for (tag_name,) in (
session.execute( session.execute(
sa.select(AssetInfoTag.tag_name).where(AssetInfoTag.asset_info_id == asset_info_id) sa.select(AssetInfoTag.tag_name).where(
AssetInfoTag.asset_info_id == asset_info_id
)
) )
).all() ).all()
} }
@ -195,8 +200,7 @@ def remove_tags_from_asset_info(
if to_remove: if to_remove:
session.execute( session.execute(
delete(AssetInfoTag) delete(AssetInfoTag).where(
.where(
AssetInfoTag.asset_info_id == asset_info_id, AssetInfoTag.asset_info_id == asset_info_id,
AssetInfoTag.tag_name.in_(to_remove), AssetInfoTag.tag_name.in_(to_remove),
) )
@ -222,7 +226,10 @@ def add_missing_tag_for_asset_id(
.where(AssetInfo.asset_id == asset_id) .where(AssetInfo.asset_id == asset_id)
.where( .where(
sa.not_( sa.not_(
sa.exists().where((AssetInfoTag.asset_info_id == AssetInfo.id) & (AssetInfoTag.tag_name == "missing")) sa.exists().where(
(AssetInfoTag.asset_info_id == AssetInfo.id)
& (AssetInfoTag.tag_name == "missing")
)
) )
) )
) )
@ -232,7 +239,9 @@ def add_missing_tag_for_asset_id(
["asset_info_id", "tag_name", "origin", "added_at"], ["asset_info_id", "tag_name", "origin", "added_at"],
select_rows, select_rows,
) )
.on_conflict_do_nothing(index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]) .on_conflict_do_nothing(
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]
)
) )
@ -242,7 +251,9 @@ def remove_missing_tag_for_asset_id(
) -> None: ) -> None:
session.execute( session.execute(
sa.delete(AssetInfoTag).where( sa.delete(AssetInfoTag).where(
AssetInfoTag.asset_info_id.in_(sa.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)), AssetInfoTag.asset_info_id.in_(
sa.select(AssetInfo.id).where(AssetInfo.asset_id == asset_id)
),
AssetInfoTag.tag_name == "missing", AssetInfoTag.tag_name == "missing",
) )
) )
@ -264,7 +275,7 @@ def list_tags_with_usage(
) )
.select_from(AssetInfoTag) .select_from(AssetInfoTag)
.join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id) .join(AssetInfo, AssetInfo.id == AssetInfoTag.asset_info_id)
.where(_build_visible_owner_clause(owner_id)) .where(build_visible_owner_clause(owner_id))
.group_by(AssetInfoTag.tag_name) .group_by(AssetInfoTag.tag_name)
.subquery() .subquery()
) )
@ -323,12 +334,16 @@ def bulk_insert_tags_and_meta(
ins_tags = sqlite.insert(AssetInfoTag).on_conflict_do_nothing( ins_tags = sqlite.insert(AssetInfoTag).on_conflict_do_nothing(
index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name] index_elements=[AssetInfoTag.asset_info_id, AssetInfoTag.tag_name]
) )
for chunk in _iter_row_chunks(tag_rows, cols_per_row=4): for chunk in iter_row_chunks(tag_rows, cols_per_row=4):
session.execute(ins_tags, chunk) session.execute(ins_tags, chunk)
if meta_rows: if meta_rows:
ins_meta = sqlite.insert(AssetInfoMeta).on_conflict_do_nothing( ins_meta = sqlite.insert(AssetInfoMeta).on_conflict_do_nothing(
index_elements=[AssetInfoMeta.asset_info_id, AssetInfoMeta.key, AssetInfoMeta.ordinal] index_elements=[
AssetInfoMeta.asset_info_id,
AssetInfoMeta.key,
AssetInfoMeta.ordinal,
]
) )
for chunk in _iter_row_chunks(meta_rows, cols_per_row=7): for chunk in iter_row_chunks(meta_rows, cols_per_row=7):
session.execute(ins_meta, chunk) session.execute(ins_meta, chunk)

View File

@ -10,7 +10,11 @@ def select_best_live_path(states: Sequence) -> str:
2) Otherwise, pick the first path that exists. 2) Otherwise, pick the first path that exists.
3) Otherwise return empty string. 3) Otherwise return empty string.
""" """
alive = [s for s in states if getattr(s, "file_path", None) and os.path.isfile(s.file_path)] alive = [
s
for s in states
if getattr(s, "file_path", None) and os.path.isfile(s.file_path)
]
if not alive: if not alive:
return "" return ""
for s in alive: for s in alive:
@ -19,7 +23,11 @@ def select_best_live_path(states: Sequence) -> str:
return alive[0].file_path return alive[0].file_path
ALLOWED_ROOTS: tuple[Literal["models", "input", "output"], ...] = ("models", "input", "output") ALLOWED_ROOTS: tuple[Literal["models", "input", "output"], ...] = (
"models",
"input",
"output",
)
def escape_sql_like_string(s: str, escape: str = "!") -> tuple[str, str]: def escape_sql_like_string(s: str, escape: str = "!") -> tuple[str, str]:
@ -43,4 +51,3 @@ def normalize_tags(tags: list[str] | None) -> list[str]:
- Removing duplicates. - Removing duplicates.
""" """
return [t.strip().lower() for t in (tags or []) if (t or "").strip()] return [t.strip().lower() for t in (tags or []) if (t or "").strip()]

View File

@ -44,7 +44,9 @@ def verify_asset_file_unchanged(
) -> bool: ) -> bool:
if mtime_db is None: if mtime_db is None:
return False return False
actual_mtime_ns = getattr(stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000)) actual_mtime_ns = getattr(
stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000)
)
if int(mtime_db) != int(actual_mtime_ns): if int(mtime_db) != int(actual_mtime_ns):
return False return False
sz = int(size_db or 0) sz = int(size_db or 0)
@ -58,7 +60,9 @@ def list_files_recursively(base_dir: str) -> list[str]:
base_abs = os.path.abspath(base_dir) base_abs = os.path.abspath(base_dir)
if not os.path.isdir(base_abs): if not os.path.isdir(base_abs):
return out return out
for dirpath, _subdirs, filenames in os.walk(base_abs, topdown=True, followlinks=False): for dirpath, _subdirs, filenames in os.walk(
base_abs, topdown=True, followlinks=False
):
for name in filenames: for name in filenames:
out.append(os.path.abspath(os.path.join(dirpath, name))) out.append(os.path.abspath(os.path.join(dirpath, name)))
return out return out
@ -141,18 +145,22 @@ def _batch_insert_assets_from_paths(
path_list.append(ap) path_list.append(ap)
path_to_asset[ap] = aid path_to_asset[ap] = aid
asset_rows.append({ asset_rows.append(
"id": aid, {
"hash": None, "id": aid,
"size_bytes": sp["size_bytes"], "hash": None,
"mime_type": None, "size_bytes": sp["size_bytes"],
"created_at": now, "mime_type": None,
}) "created_at": now,
state_rows.append({ }
"asset_id": aid, )
"file_path": ap, state_rows.append(
"mtime_ns": sp["mtime_ns"], {
}) "asset_id": aid,
"file_path": ap,
"mtime_ns": sp["mtime_ns"],
}
)
asset_to_info[aid] = { asset_to_info[aid] = {
"id": iid, "id": iid,
"owner_id": owner_id, "owner_id": owner_id,
@ -179,7 +187,11 @@ def _batch_insert_assets_from_paths(
delete_assets_by_ids(session, lost_assets) delete_assets_by_ids(session, lost_assets)
if not winners_by_path: if not winners_by_path:
return {"inserted_infos": 0, "won_states": 0, "lost_states": len(losers_by_path)} return {
"inserted_infos": 0,
"won_states": 0,
"lost_states": len(losers_by_path),
}
winner_info_rows = [asset_to_info[path_to_asset[p]] for p in winners_by_path] winner_info_rows = [asset_to_info[path_to_asset[p]] for p in winners_by_path]
db_info_rows = [ db_info_rows = [
@ -209,22 +221,26 @@ def _batch_insert_assets_from_paths(
if iid not in inserted_info_ids: if iid not in inserted_info_ids:
continue continue
for t in row["_tags"]: for t in row["_tags"]:
tag_rows.append({ tag_rows.append(
"asset_info_id": iid, {
"tag_name": t, "asset_info_id": iid,
"origin": "automatic", "tag_name": t,
"added_at": now, "origin": "automatic",
}) "added_at": now,
}
)
if row["_filename"]: if row["_filename"]:
meta_rows.append({ meta_rows.append(
"asset_info_id": iid, {
"key": "filename", "asset_info_id": iid,
"ordinal": 0, "key": "filename",
"val_str": row["_filename"], "ordinal": 0,
"val_num": None, "val_str": row["_filename"],
"val_bool": None, "val_num": None,
"val_json": None, "val_bool": None,
}) "val_json": None,
}
)
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=meta_rows) bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=meta_rows)
@ -299,13 +315,15 @@ def sync_cache_states_with_filesystem(
except OSError: except OSError:
exists = False exists = False
acc["states"].append({ acc["states"].append(
"sid": row.state_id, {
"fp": row.file_path, "sid": row.state_id,
"exists": exists, "fp": row.file_path,
"fast_ok": fast_ok, "exists": exists,
"needs_verify": row.needs_verify, "fast_ok": fast_ok,
}) "needs_verify": row.needs_verify,
}
)
to_set_verify: list[int] = [] to_set_verify: list[int] = []
to_clear_verify: list[int] = [] to_clear_verify: list[int] = []
@ -425,14 +443,18 @@ def _build_asset_specs(
if not stat_p.st_size: if not stat_p.st_size:
continue continue
name, tags = get_name_and_tags_from_asset_path(abs_p) name, tags = get_name_and_tags_from_asset_path(abs_p)
specs.append({ specs.append(
"abs_path": abs_p, {
"size_bytes": stat_p.st_size, "abs_path": abs_p,
"mtime_ns": getattr(stat_p, "st_mtime_ns", int(stat_p.st_mtime * 1_000_000_000)), "size_bytes": stat_p.st_size,
"info_name": name, "mtime_ns": getattr(
"tags": tags, stat_p, "st_mtime_ns", int(stat_p.st_mtime * 1_000_000_000)
"fname": compute_relative_filename(abs_p), ),
}) "info_name": name,
"tags": tags,
"fname": compute_relative_filename(abs_p),
}
)
tag_pool.update(tags) tag_pool.update(tags)
return specs, tag_pool, skipped return specs, tag_pool, skipped
@ -463,9 +485,7 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
for r in roots: for r in roots:
existing_paths.update(_sync_root_safely(r)) existing_paths.update(_sync_root_safely(r))
all_prefixes = [ all_prefixes = [os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r)]
os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r)
]
orphans_pruned = _prune_orphans_safely(all_prefixes) orphans_pruned = _prune_orphans_safely(all_prefixes)
paths = _collect_paths_for_roots(roots) paths = _collect_paths_for_roots(roots)

View File

@ -3,7 +3,6 @@ from app.assets.services.asset_management import (
delete_asset_reference, delete_asset_reference,
get_asset_by_hash, get_asset_by_hash,
get_asset_detail, get_asset_detail,
get_asset_info_with_tags,
list_assets_page, list_assets_page,
resolve_asset_for_download, resolve_asset_for_download,
set_asset_preview, set_asset_preview,
@ -49,7 +48,6 @@ __all__ = [
"asset_exists", "asset_exists",
"get_asset_by_hash", "get_asset_by_hash",
"get_asset_detail", "get_asset_detail",
"get_asset_info_with_tags",
"list_assets_page", "list_assets_page",
"resolve_asset_for_download", "resolve_asset_for_download",
"update_asset_metadata", "update_asset_metadata",

View File

@ -3,7 +3,6 @@ import mimetypes
import os import os
from typing import Sequence from typing import Sequence
from sqlalchemy.orm import Session
from app.assets.database.models import Asset from app.assets.database.models import Asset
from app.assets.database.queries import ( from app.assets.database.queries import (
@ -14,7 +13,6 @@ from app.assets.database.queries import (
fetch_asset_info_asset_and_tags, fetch_asset_info_asset_and_tags,
get_asset_by_hash as queries_get_asset_by_hash, get_asset_by_hash as queries_get_asset_by_hash,
get_asset_info_by_id, get_asset_info_by_id,
get_asset_tags,
list_asset_infos_page, list_asset_infos_page,
list_cache_states_by_asset_id, list_cache_states_by_asset_id,
set_asset_info_metadata, set_asset_info_metadata,
@ -25,7 +23,7 @@ from app.assets.database.queries import (
update_asset_info_updated_at, update_asset_info_updated_at,
) )
from app.assets.helpers import select_best_live_path from app.assets.helpers import select_best_live_path
from app.assets.services.path_utils import compute_relative_filename from app.assets.services.path_utils import compute_filename_for_asset
from app.assets.services.schemas import ( from app.assets.services.schemas import (
AssetData, AssetData,
AssetDetailResult, AssetDetailResult,
@ -80,7 +78,7 @@ def update_asset_metadata(
update_asset_info_name(session, asset_info_id=asset_info_id, name=name) update_asset_info_name(session, asset_info_id=asset_info_id, name=name)
touched = True touched = True
computed_filename = _compute_filename_for_asset(session, info.asset_id) computed_filename = compute_filename_for_asset(session, info.asset_id)
new_meta: dict | None = None new_meta: dict | None = None
if user_metadata is not None: if user_metadata is not None:
@ -138,7 +136,9 @@ def delete_asset_reference(
info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id) info_row = get_asset_info_by_id(session, asset_info_id=asset_info_id)
asset_id = info_row.asset_id if info_row else None asset_id = info_row.asset_id if info_row else None
deleted = delete_asset_info_by_id(session, asset_info_id=asset_info_id, owner_id=owner_id) deleted = delete_asset_info_by_id(
session, asset_info_id=asset_info_id, owner_id=owner_id
)
if not deleted: if not deleted:
session.commit() session.commit()
return False return False
@ -154,7 +154,9 @@ def delete_asset_reference(
# Orphaned asset - delete it and its files # Orphaned asset - delete it and its files
states = list_cache_states_by_asset_id(session, asset_id=asset_id) states = list_cache_states_by_asset_id(session, asset_id=asset_id)
file_paths = [s.file_path for s in (states or []) if getattr(s, "file_path", None)] file_paths = [
s.file_path for s in (states or []) if getattr(s, "file_path", None)
]
asset_row = session.get(Asset, asset_id) asset_row = session.get(Asset, asset_id)
if asset_row is not None: if asset_row is not None:
@ -206,11 +208,6 @@ def set_asset_preview(
return detail return detail
def _compute_filename_for_asset(session: Session, asset_id: str) -> str | None:
primary_path = select_best_live_path(list_cache_states_by_asset_id(session, asset_id=asset_id))
return compute_relative_filename(primary_path) if primary_path else None
def asset_exists(asset_hash: str) -> bool: def asset_exists(asset_hash: str) -> bool:
with create_session() as session: with create_session() as session:
return asset_exists_by_hash(session, asset_hash=asset_hash) return asset_exists_by_hash(session, asset_hash=asset_hash)
@ -265,7 +262,9 @@ def resolve_asset_for_download(
owner_id: str = "", owner_id: str = "",
) -> DownloadResolutionResult: ) -> DownloadResolutionResult:
with create_session() as session: with create_session() as session:
pair = fetch_asset_info_and_asset(session, asset_info_id=asset_info_id, owner_id=owner_id) pair = fetch_asset_info_and_asset(
session, asset_info_id=asset_info_id, owner_id=owner_id
)
if not pair: if not pair:
raise ValueError(f"AssetInfo {asset_info_id} not found") raise ValueError(f"AssetInfo {asset_info_id} not found")
@ -278,27 +277,14 @@ def resolve_asset_for_download(
update_asset_info_access_time(session, asset_info_id=asset_info_id) update_asset_info_access_time(session, asset_info_id=asset_info_id)
session.commit() session.commit()
ctype = asset.mime_type or mimetypes.guess_type(info.name or abs_path)[0] or "application/octet-stream" ctype = (
asset.mime_type
or mimetypes.guess_type(info.name or abs_path)[0]
or "application/octet-stream"
)
download_name = info.name or os.path.basename(abs_path) download_name = info.name or os.path.basename(abs_path)
return DownloadResolutionResult( return DownloadResolutionResult(
abs_path=abs_path, abs_path=abs_path,
content_type=ctype, content_type=ctype,
download_name=download_name, download_name=download_name,
) )
def get_asset_info_with_tags(
asset_info_id: str,
owner_id: str = "",
) -> AssetDetailResult | None:
with create_session() as session:
pair = fetch_asset_info_and_asset(session, asset_info_id=asset_info_id, owner_id=owner_id)
if not pair:
return None
info, asset = pair
tags = get_asset_tags(session, asset_info_id=asset_info_id)
return AssetDetailResult(
info=extract_info_data(info),
asset=extract_asset_data(asset),
tags=tags,
)

View File

@ -15,7 +15,6 @@ from app.assets.database.queries import (
get_asset_by_hash, get_asset_by_hash,
get_asset_tags, get_asset_tags,
get_or_create_asset_info, get_or_create_asset_info,
list_cache_states_by_asset_id,
remove_missing_tag_for_asset_id, remove_missing_tag_for_asset_id,
set_asset_info_metadata, set_asset_info_metadata,
set_asset_info_tags, set_asset_info_tags,
@ -23,9 +22,9 @@ from app.assets.database.queries import (
upsert_asset, upsert_asset,
upsert_cache_state, upsert_cache_state,
) )
from app.assets.helpers import normalize_tags, select_best_live_path from app.assets.helpers import normalize_tags
from app.assets.services.path_utils import ( from app.assets.services.path_utils import (
compute_relative_filename, compute_filename_for_asset,
resolve_destination_from_tags, resolve_destination_from_tags,
validate_path_within_base, validate_path_within_base,
) )
@ -92,7 +91,9 @@ def ingest_file_from_path(
if info_created: if info_created:
asset_info_id = info.id asset_info_id = info.id
else: else:
update_asset_info_timestamps(session, asset_info=info, preview_id=preview_id) update_asset_info_timestamps(
session, asset_info=info, preview_id=preview_id
)
asset_info_id = info.id asset_info_id = info.id
norm = normalize_tags(list(tags)) norm = normalize_tags(list(tags))
@ -165,7 +166,7 @@ def register_existing_asset(
return result return result
new_meta = dict(user_metadata or {}) new_meta = dict(user_metadata or {})
computed_filename = _compute_filename_for_asset(session, asset.id) computed_filename = compute_filename_for_asset(session, asset.id)
if computed_filename: if computed_filename:
new_meta["filename"] = computed_filename new_meta["filename"] = computed_filename
@ -199,18 +200,14 @@ def register_existing_asset(
def _validate_tags_exist(session: Session, tags: list[str]) -> None: def _validate_tags_exist(session: Session, tags: list[str]) -> None:
existing_tag_names = set( existing_tag_names = set(
name for (name,) in session.execute(select(Tag.name).where(Tag.name.in_(tags))).all() name
for (name,) in session.execute(select(Tag.name).where(Tag.name.in_(tags))).all()
) )
missing = [t for t in tags if t not in existing_tag_names] missing = [t for t in tags if t not in existing_tag_names]
if missing: if missing:
raise ValueError(f"Unknown tags: {missing}") raise ValueError(f"Unknown tags: {missing}")
def _compute_filename_for_asset(session: Session, asset_id: str) -> str | None:
primary_path = select_best_live_path(list_cache_states_by_asset_id(session, asset_id=asset_id))
return compute_relative_filename(primary_path) if primary_path else None
def _update_metadata_with_filename( def _update_metadata_with_filename(
session: Session, session: Session,
asset_info_id: str, asset_info_id: str,
@ -218,7 +215,7 @@ def _update_metadata_with_filename(
info: AssetInfo, info: AssetInfo,
user_metadata: UserMetadata, user_metadata: UserMetadata,
) -> None: ) -> None:
computed_filename = _compute_filename_for_asset(session, asset_id) computed_filename = compute_filename_for_asset(session, asset_id)
current_meta = info.user_metadata or {} current_meta = info.user_metadata or {}
new_meta = dict(current_meta) new_meta = dict(current_meta)
@ -346,7 +343,9 @@ def upload_from_temp_path(
raise RuntimeError("failed to create asset metadata") raise RuntimeError("failed to create asset metadata")
with create_session() as session: with create_session() as session:
pair = fetch_asset_info_and_asset(session, asset_info_id=info_id, owner_id=owner_id) pair = fetch_asset_info_and_asset(
session, asset_info_id=info_id, owner_id=owner_id
)
if not pair: if not pair:
raise RuntimeError("inconsistent DB state after ingest") raise RuntimeError("inconsistent DB state after ingest")
info, asset = pair info, asset = pair
@ -376,7 +375,9 @@ def create_from_hash(
result = register_existing_asset( result = register_existing_asset(
asset_hash=canonical, asset_hash=canonical,
name=_sanitize_filename(name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical), name=_sanitize_filename(
name, fallback=canonical.split(":", 1)[1] if ":" in canonical else canonical
),
user_metadata=user_metadata or {}, user_metadata=user_metadata or {},
tags=tags or [], tags=tags or [],
tag_origin="manual", tag_origin="manual",

View File

@ -15,7 +15,10 @@ def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
targets: list[tuple[str, list[str]]] = [] targets: list[tuple[str, list[str]]] = []
models_root = os.path.abspath(folder_paths.models_dir) models_root = os.path.abspath(folder_paths.models_dir)
for name, values in folder_paths.folder_names_and_paths.items(): for name, values in folder_paths.folder_names_and_paths.items():
paths, _exts = values[0], values[1] # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI paths, _exts = (
values[0],
values[1],
) # NOTE: this prevents nodepacks that hackily edit folder_... from breaking ComfyUI
if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths): if any(os.path.abspath(p).startswith(models_root + os.sep) for p in paths):
targets.append((name, paths)) targets.append((name, paths))
return targets return targets
@ -37,7 +40,9 @@ def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]:
raw_subdirs = tags[2:] raw_subdirs = tags[2:]
else: else:
base_dir = os.path.abspath( base_dir = os.path.abspath(
folder_paths.get_input_directory() if root == "input" else folder_paths.get_output_directory() folder_paths.get_input_directory()
if root == "input"
else folder_paths.get_output_directory()
) )
raw_subdirs = tags[1:] raw_subdirs = tags[1:]
for i in raw_subdirs: for i in raw_subdirs:
@ -84,7 +89,9 @@ def compute_relative_filename(file_path: str) -> str | None:
return "/".join(parts) # input/output: keep all parts return "/".join(parts) # input/output: keep all parts
def get_asset_category_and_relative_path(file_path: str) -> tuple[Literal["input", "output", "models"], str]: def get_asset_category_and_relative_path(
file_path: str,
) -> tuple[Literal["input", "output", "models"], str]:
"""Given an absolute or relative file path, determine which root category the path belongs to: """Given an absolute or relative file path, determine which root category the path belongs to:
- 'input' if the file resides under `folder_paths.get_input_directory()` - 'input' if the file resides under `folder_paths.get_input_directory()`
- 'output' if the file resides under `folder_paths.get_output_directory()` - 'output' if the file resides under `folder_paths.get_output_directory()`
@ -107,7 +114,9 @@ def get_asset_category_and_relative_path(file_path: str) -> tuple[Literal["input
return False return False
def _compute_relative(child: str, parent: str) -> str: def _compute_relative(child: str, parent: str) -> str:
return os.path.relpath(os.path.join(os.sep, os.path.relpath(child, parent)), os.sep) return os.path.relpath(
os.path.join(os.sep, os.path.relpath(child, parent)), os.sep
)
# 1) input # 1) input
input_base = os.path.abspath(folder_paths.get_input_directory()) input_base = os.path.abspath(folder_paths.get_input_directory())
@ -135,7 +144,20 @@ def get_asset_category_and_relative_path(file_path: str) -> tuple[Literal["input
combined = os.path.join(bucket, rel_inside) combined = os.path.join(bucket, rel_inside)
return "models", os.path.relpath(os.path.join(os.sep, combined), os.sep) return "models", os.path.relpath(os.path.join(os.sep, combined), os.sep)
raise ValueError(f"Path is not within input, output, or configured model bases: {file_path}") raise ValueError(
f"Path is not within input, output, or configured model bases: {file_path}"
)
def compute_filename_for_asset(session, asset_id: str) -> str | None:
"""Compute the relative filename for an asset from its best live cache state path."""
from app.assets.database.queries import list_cache_states_by_asset_id
from app.assets.helpers import select_best_live_path
primary_path = select_best_live_path(
list_cache_states_by_asset_id(session, asset_id=asset_id)
)
return compute_relative_filename(primary_path) if primary_path else None
def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
@ -156,5 +178,7 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
""" """
root_category, some_path = get_asset_category_and_relative_path(file_path) root_category, some_path = get_asset_category_and_relative_path(file_path)
p = Path(some_path) p = Path(some_path)
parent_parts = [part for part in p.parent.parts if part not in (".", "..", p.anchor)] parent_parts = [
part for part in p.parent.parts if part not in (".", "..", p.anchor)
]
return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts]))) return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts])))