mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-05 17:27:42 +08:00
Add optional blake3 hashing during asset scanning
- Make blake3 import lazy in hashing.py (only imported when needed) - Add compute_hashes parameter to AssetSeeder.start(), build_asset_specs(), and seed_assets() - Fix missing tag clearing: include is_missing states in sync when update_missing_tags=True - Clear is_missing flag on cache states when files are restored with matching mtime/size - Fix validation error serialization in routes.py (use json.loads(ve.json())) Amp-Thread-ID: https://ampcode.com/threads/T-019c3614-56d4-74a8-a717-19922d6dbbee Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
7a75af59ab
commit
a2d26dece5
@ -79,7 +79,9 @@ def _build_error_response(
|
|||||||
|
|
||||||
|
|
||||||
def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response:
|
def _build_validation_error_response(code: str, ve: ValidationError) -> web.Response:
|
||||||
return _build_error_response(400, code, "Validation failed.", {"errors": ve.json()})
|
import json
|
||||||
|
errors = json.loads(ve.json())
|
||||||
|
return _build_error_response(400, code, "Validation failed.", {"errors": errors})
|
||||||
|
|
||||||
|
|
||||||
def _validate_sort_field(requested: str | None) -> str:
|
def _validate_sort_field(requested: str | None) -> str:
|
||||||
@ -123,11 +125,8 @@ async def list_assets_route(request: web.Request) -> web.Response:
|
|||||||
return _build_validation_error_response("INVALID_QUERY", ve)
|
return _build_validation_error_response("INVALID_QUERY", ve)
|
||||||
|
|
||||||
sort = _validate_sort_field(q.sort)
|
sort = _validate_sort_field(q.sort)
|
||||||
order = (
|
order_candidate = (q.order or "desc").lower()
|
||||||
"desc"
|
order = order_candidate if order_candidate in {"asc", "desc"} else "desc"
|
||||||
if (q.order or "desc").lower() not in {"asc", "desc"}
|
|
||||||
else q.order.lower()
|
|
||||||
)
|
|
||||||
|
|
||||||
result = list_assets_page(
|
result = list_assets_page(
|
||||||
owner_id=USER_MANAGER.get_request_user_id(request),
|
owner_id=USER_MANAGER.get_request_user_id(request),
|
||||||
@ -233,7 +232,7 @@ async def download_asset_content(request: web.Request) -> web.Response:
|
|||||||
)
|
)
|
||||||
|
|
||||||
quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
|
quoted = (filename or "").replace("\r", "").replace("\n", "").replace('"', "'")
|
||||||
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{urllib.parse.quote(filename)}"
|
cd = f"{disposition}; filename=\"{quoted}\"; filename*=UTF-8''{urllib.parse.quote(quoted)}"
|
||||||
|
|
||||||
file_size = os.path.getsize(abs_path)
|
file_size = os.path.getsize(abs_path)
|
||||||
logging.info(
|
logging.info(
|
||||||
@ -490,15 +489,9 @@ async def get_tags(request: web.Request) -> web.Response:
|
|||||||
try:
|
try:
|
||||||
query = schemas_in.TagsListQuery.model_validate(query_map)
|
query = schemas_in.TagsListQuery.model_validate(query_map)
|
||||||
except ValidationError as e:
|
except ValidationError as e:
|
||||||
return web.json_response(
|
import json
|
||||||
{
|
return _build_error_response(
|
||||||
"error": {
|
400, "INVALID_QUERY", "Invalid query parameters", {"errors": json.loads(e.json())}
|
||||||
"code": "INVALID_QUERY",
|
|
||||||
"message": "Invalid query parameters",
|
|
||||||
"details": e.errors(),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
status=400,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
rows, total = list_tags(
|
rows, total = list_tags(
|
||||||
|
|||||||
@ -28,6 +28,7 @@ class AssetValidationError(Exception):
|
|||||||
def __init__(self, code: str, message: str):
|
def __init__(self, code: str, message: str):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.code = code
|
self.code = code
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class AssetNotFoundError(Exception):
|
class AssetNotFoundError(Exception):
|
||||||
@ -35,12 +36,15 @@ class AssetNotFoundError(Exception):
|
|||||||
|
|
||||||
def __init__(self, message: str):
|
def __init__(self, message: str):
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class HashMismatchError(Exception):
|
class HashMismatchError(Exception):
|
||||||
"""Uploaded file hash does not match provided hash."""
|
"""Uploaded file hash does not match provided hash."""
|
||||||
|
|
||||||
pass
|
def __init__(self, message: str):
|
||||||
|
super().__init__(message)
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
|
||||||
class DependencyMissingError(Exception):
|
class DependencyMissingError(Exception):
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
@ -83,7 +84,10 @@ async def parse_multipart_upload(
|
|||||||
provided_hash = normalize_and_validate_hash(s)
|
provided_hash = normalize_and_validate_hash(s)
|
||||||
try:
|
try:
|
||||||
provided_hash_exists = check_hash_exists(provided_hash)
|
provided_hash_exists = check_hash_exists(provided_hash)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
logging.warning(
|
||||||
|
"check_hash_exists failed for hash=%s: %s", provided_hash, e
|
||||||
|
)
|
||||||
provided_hash_exists = None # do not fail the whole request here
|
provided_hash_exists = None # do not fail the whole request here
|
||||||
|
|
||||||
elif fname == "file":
|
elif fname == "file":
|
||||||
@ -162,5 +166,5 @@ def delete_temp_file_if_exists(tmp_path: str | None) -> None:
|
|||||||
try:
|
try:
|
||||||
if os.path.exists(tmp_path):
|
if os.path.exists(tmp_path):
|
||||||
os.remove(tmp_path)
|
os.remove(tmp_path)
|
||||||
except Exception:
|
except OSError as e:
|
||||||
pass
|
logging.debug("Failed to delete temp file %s: %s", tmp_path, e)
|
||||||
|
|||||||
@ -85,6 +85,6 @@ def bulk_insert_assets(
|
|||||||
"""Bulk insert Asset rows. Each dict should have: id, hash, size_bytes, mime_type, created_at."""
|
"""Bulk insert Asset rows. Each dict should have: id, hash, size_bytes, mime_type, created_at."""
|
||||||
if not rows:
|
if not rows:
|
||||||
return
|
return
|
||||||
ins = sqlite.insert(Asset)
|
ins = sqlite.insert(Asset).on_conflict_do_nothing(index_elements=[Asset.hash])
|
||||||
for chunk in iter_chunks(rows, calculate_rows_per_statement(5)):
|
for chunk in iter_chunks(rows, calculate_rows_per_statement(5)):
|
||||||
session.execute(ins, chunk)
|
session.execute(ins, chunk)
|
||||||
|
|||||||
@ -23,8 +23,8 @@ def iter_chunks(seq, n: int):
|
|||||||
def iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]:
|
def iter_row_chunks(rows: list[dict], cols_per_row: int) -> Iterable[list[dict]]:
|
||||||
"""Yield chunks of rows sized to fit within bind param limits."""
|
"""Yield chunks of rows sized to fit within bind param limits."""
|
||||||
if not rows:
|
if not rows:
|
||||||
return []
|
return
|
||||||
rows_per_stmt = max(1, MAX_BIND_PARAMS // max(1, cols_per_row))
|
rows_per_stmt = calculate_rows_per_statement(cols_per_row)
|
||||||
for i in range(0, len(rows), rows_per_stmt):
|
for i in range(0, len(rows), rows_per_stmt):
|
||||||
yield rows[i : i + rows_per_stmt]
|
yield rows[i : i + rows_per_stmt]
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,7 @@ from app.assets.services.file_utils import (
|
|||||||
list_files_recursively,
|
list_files_recursively,
|
||||||
verify_file_unchanged,
|
verify_file_unchanged,
|
||||||
)
|
)
|
||||||
|
from app.assets.services.hashing import compute_blake3_hash
|
||||||
from app.assets.services.metadata_extract import extract_file_metadata
|
from app.assets.services.metadata_extract import extract_file_metadata
|
||||||
from app.assets.services.path_utils import (
|
from app.assets.services.path_utils import (
|
||||||
compute_relative_filename,
|
compute_relative_filename,
|
||||||
@ -84,7 +85,7 @@ def collect_models_files() -> list[str]:
|
|||||||
allowed = False
|
allowed = False
|
||||||
for b in bases:
|
for b in bases:
|
||||||
base_abs = os.path.abspath(b)
|
base_abs = os.path.abspath(b)
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(ValueError):
|
||||||
if os.path.commonpath([abs_path, base_abs]) == base_abs:
|
if os.path.commonpath([abs_path, base_abs]) == base_abs:
|
||||||
allowed = True
|
allowed = True
|
||||||
break
|
break
|
||||||
@ -120,7 +121,9 @@ def sync_cache_states_with_filesystem(
|
|||||||
if not prefixes:
|
if not prefixes:
|
||||||
return set() if collect_existing_paths else None
|
return set() if collect_existing_paths else None
|
||||||
|
|
||||||
rows = get_cache_states_for_prefixes(session, prefixes)
|
rows = get_cache_states_for_prefixes(
|
||||||
|
session, prefixes, include_missing=update_missing_tags
|
||||||
|
)
|
||||||
|
|
||||||
by_asset: dict[str, _AssetAccumulator] = {}
|
by_asset: dict[str, _AssetAccumulator] = {}
|
||||||
for row in rows:
|
for row in rows:
|
||||||
@ -139,8 +142,12 @@ def sync_cache_states_with_filesystem(
|
|||||||
)
|
)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
exists = False
|
exists = False
|
||||||
except OSError:
|
except PermissionError:
|
||||||
|
exists = True
|
||||||
|
logging.debug("Permission denied accessing %s", row.file_path)
|
||||||
|
except OSError as e:
|
||||||
exists = False
|
exists = False
|
||||||
|
logging.debug("OSError checking %s: %s", row.file_path, e)
|
||||||
|
|
||||||
acc["states"].append(
|
acc["states"].append(
|
||||||
{
|
{
|
||||||
@ -156,6 +163,7 @@ def sync_cache_states_with_filesystem(
|
|||||||
to_clear_verify: list[int] = []
|
to_clear_verify: list[int] = []
|
||||||
stale_state_ids: list[int] = []
|
stale_state_ids: list[int] = []
|
||||||
to_mark_missing: list[int] = []
|
to_mark_missing: list[int] = []
|
||||||
|
to_clear_missing: list[int] = []
|
||||||
survivors: set[str] = set()
|
survivors: set[str] = set()
|
||||||
|
|
||||||
for aid, acc in by_asset.items():
|
for aid, acc in by_asset.items():
|
||||||
@ -168,8 +176,10 @@ def sync_cache_states_with_filesystem(
|
|||||||
if not s["exists"]:
|
if not s["exists"]:
|
||||||
to_mark_missing.append(s["sid"])
|
to_mark_missing.append(s["sid"])
|
||||||
continue
|
continue
|
||||||
if s["fast_ok"] and s["needs_verify"]:
|
if s["fast_ok"]:
|
||||||
to_clear_verify.append(s["sid"])
|
to_clear_missing.append(s["sid"])
|
||||||
|
if s["needs_verify"]:
|
||||||
|
to_clear_verify.append(s["sid"])
|
||||||
if not s["fast_ok"] and not s["needs_verify"]:
|
if not s["fast_ok"] and not s["needs_verify"]:
|
||||||
to_set_verify.append(s["sid"])
|
to_set_verify.append(s["sid"])
|
||||||
|
|
||||||
@ -187,11 +197,15 @@ def sync_cache_states_with_filesystem(
|
|||||||
if not s["exists"]:
|
if not s["exists"]:
|
||||||
stale_state_ids.append(s["sid"])
|
stale_state_ids.append(s["sid"])
|
||||||
if update_missing_tags:
|
if update_missing_tags:
|
||||||
with contextlib.suppress(Exception):
|
try:
|
||||||
remove_missing_tag_for_asset_id(session, asset_id=aid)
|
remove_missing_tag_for_asset_id(session, asset_id=aid)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Failed to remove missing tag for asset %s: %s", aid, e)
|
||||||
elif update_missing_tags:
|
elif update_missing_tags:
|
||||||
with contextlib.suppress(Exception):
|
try:
|
||||||
add_missing_tag_for_asset_id(session, asset_id=aid, origin="automatic")
|
add_missing_tag_for_asset_id(session, asset_id=aid, origin="automatic")
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Failed to add missing tag for asset %s: %s", aid, e)
|
||||||
|
|
||||||
for s in states:
|
for s in states:
|
||||||
if s["exists"]:
|
if s["exists"]:
|
||||||
@ -201,6 +215,7 @@ def sync_cache_states_with_filesystem(
|
|||||||
stale_set = set(stale_state_ids)
|
stale_set = set(stale_state_ids)
|
||||||
to_mark_missing = [sid for sid in to_mark_missing if sid not in stale_set]
|
to_mark_missing = [sid for sid in to_mark_missing if sid not in stale_set]
|
||||||
bulk_update_is_missing(session, to_mark_missing, value=True)
|
bulk_update_is_missing(session, to_mark_missing, value=True)
|
||||||
|
bulk_update_is_missing(session, to_clear_missing, value=False)
|
||||||
bulk_update_needs_verify(session, to_set_verify, value=True)
|
bulk_update_needs_verify(session, to_set_verify, value=True)
|
||||||
bulk_update_needs_verify(session, to_clear_verify, value=False)
|
bulk_update_needs_verify(session, to_clear_verify, value=False)
|
||||||
|
|
||||||
@ -258,6 +273,7 @@ def build_asset_specs(
|
|||||||
paths: list[str],
|
paths: list[str],
|
||||||
existing_paths: set[str],
|
existing_paths: set[str],
|
||||||
enable_metadata_extraction: bool = True,
|
enable_metadata_extraction: bool = True,
|
||||||
|
compute_hashes: bool = False,
|
||||||
) -> tuple[list[SeedAssetSpec], set[str], int]:
|
) -> tuple[list[SeedAssetSpec], set[str], int]:
|
||||||
"""Build asset specs from paths, returning (specs, tag_pool, skipped_count).
|
"""Build asset specs from paths, returning (specs, tag_pool, skipped_count).
|
||||||
|
|
||||||
@ -265,6 +281,7 @@ def build_asset_specs(
|
|||||||
paths: List of file paths to process
|
paths: List of file paths to process
|
||||||
existing_paths: Set of paths that already exist in the database
|
existing_paths: Set of paths that already exist in the database
|
||||||
enable_metadata_extraction: If True, extract tier 1 & 2 metadata from files
|
enable_metadata_extraction: If True, extract tier 1 & 2 metadata from files
|
||||||
|
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||||
"""
|
"""
|
||||||
specs: list[SeedAssetSpec] = []
|
specs: list[SeedAssetSpec] = []
|
||||||
tag_pool: set[str] = set()
|
tag_pool: set[str] = set()
|
||||||
@ -294,6 +311,15 @@ def build_asset_specs(
|
|||||||
relative_filename=rel_fname,
|
relative_filename=rel_fname,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Compute hash if requested
|
||||||
|
asset_hash: str | None = None
|
||||||
|
if compute_hashes:
|
||||||
|
try:
|
||||||
|
digest = compute_blake3_hash(abs_p)
|
||||||
|
asset_hash = "blake3:" + digest
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Failed to hash %s: %s", abs_p, e)
|
||||||
|
|
||||||
specs.append(
|
specs.append(
|
||||||
{
|
{
|
||||||
"abs_path": abs_p,
|
"abs_path": abs_p,
|
||||||
@ -303,6 +329,7 @@ def build_asset_specs(
|
|||||||
"tags": tags,
|
"tags": tags,
|
||||||
"fname": rel_fname,
|
"fname": rel_fname,
|
||||||
"metadata": metadata,
|
"metadata": metadata,
|
||||||
|
"hash": asset_hash,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
tag_pool.update(tags)
|
tag_pool.update(tags)
|
||||||
@ -322,9 +349,18 @@ def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
|
|||||||
return result.inserted_infos
|
return result.inserted_infos
|
||||||
|
|
||||||
|
|
||||||
def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None:
|
def seed_assets(
|
||||||
|
roots: tuple[RootType, ...],
|
||||||
|
enable_logging: bool = False,
|
||||||
|
compute_hashes: bool = False,
|
||||||
|
) -> None:
|
||||||
"""Scan the given roots and seed the assets into the database.
|
"""Scan the given roots and seed the assets into the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
roots: Tuple of root types to scan (models, input, output)
|
||||||
|
enable_logging: If True, log progress and completion messages
|
||||||
|
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||||
|
|
||||||
Note: This function does not mark missing assets. Call mark_missing_outside_prefixes_safely
|
Note: This function does not mark missing assets. Call mark_missing_outside_prefixes_safely
|
||||||
separately if cleanup is needed.
|
separately if cleanup is needed.
|
||||||
"""
|
"""
|
||||||
@ -340,7 +376,9 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
|
|||||||
existing_paths.update(sync_root_safely(r))
|
existing_paths.update(sync_root_safely(r))
|
||||||
|
|
||||||
paths = collect_paths_for_roots(roots)
|
paths = collect_paths_for_roots(roots)
|
||||||
specs, tag_pool, skipped_existing = build_asset_specs(paths, existing_paths)
|
specs, tag_pool, skipped_existing = build_asset_specs(
|
||||||
|
paths, existing_paths, compute_hashes=compute_hashes
|
||||||
|
)
|
||||||
created = insert_asset_specs(specs, tag_pool)
|
created = insert_asset_specs(specs, tag_pool)
|
||||||
|
|
||||||
if enable_logging:
|
if enable_logging:
|
||||||
|
|||||||
@ -82,6 +82,7 @@ class AssetSeeder:
|
|||||||
self._thread: threading.Thread | None = None
|
self._thread: threading.Thread | None = None
|
||||||
self._cancel_event = threading.Event()
|
self._cancel_event = threading.Event()
|
||||||
self._roots: tuple[RootType, ...] = ()
|
self._roots: tuple[RootType, ...] = ()
|
||||||
|
self._compute_hashes: bool = False
|
||||||
self._progress_callback: ProgressCallback | None = None
|
self._progress_callback: ProgressCallback | None = None
|
||||||
|
|
||||||
def start(
|
def start(
|
||||||
@ -89,6 +90,7 @@ class AssetSeeder:
|
|||||||
roots: tuple[RootType, ...] = ("models", "input", "output"),
|
roots: tuple[RootType, ...] = ("models", "input", "output"),
|
||||||
progress_callback: ProgressCallback | None = None,
|
progress_callback: ProgressCallback | None = None,
|
||||||
prune_first: bool = False,
|
prune_first: bool = False,
|
||||||
|
compute_hashes: bool = False,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Start a background scan for the given roots.
|
"""Start a background scan for the given roots.
|
||||||
|
|
||||||
@ -96,6 +98,7 @@ class AssetSeeder:
|
|||||||
roots: Tuple of root types to scan (models, input, output)
|
roots: Tuple of root types to scan (models, input, output)
|
||||||
progress_callback: Optional callback called with progress updates
|
progress_callback: Optional callback called with progress updates
|
||||||
prune_first: If True, prune orphaned assets before scanning
|
prune_first: If True, prune orphaned assets before scanning
|
||||||
|
compute_hashes: If True, compute blake3 hashes for each file (slow for large files)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True if scan was started, False if already running
|
True if scan was started, False if already running
|
||||||
@ -108,6 +111,7 @@ class AssetSeeder:
|
|||||||
self._errors = []
|
self._errors = []
|
||||||
self._roots = roots
|
self._roots = roots
|
||||||
self._prune_first = prune_first
|
self._prune_first = prune_first
|
||||||
|
self._compute_hashes = compute_hashes
|
||||||
self._progress_callback = progress_callback
|
self._progress_callback = progress_callback
|
||||||
self._cancel_event.clear()
|
self._cancel_event.clear()
|
||||||
self._thread = threading.Thread(
|
self._thread = threading.Thread(
|
||||||
@ -237,6 +241,9 @@ class AssetSeeder:
|
|||||||
skipped: int | None = None,
|
skipped: int | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Update progress counters (thread-safe)."""
|
"""Update progress counters (thread-safe)."""
|
||||||
|
callback: ProgressCallback | None = None
|
||||||
|
progress: Progress | None = None
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if self._progress is None:
|
if self._progress is None:
|
||||||
return
|
return
|
||||||
@ -249,17 +256,19 @@ class AssetSeeder:
|
|||||||
if skipped is not None:
|
if skipped is not None:
|
||||||
self._progress.skipped = skipped
|
self._progress.skipped = skipped
|
||||||
if self._progress_callback:
|
if self._progress_callback:
|
||||||
try:
|
callback = self._progress_callback
|
||||||
self._progress_callback(
|
progress = Progress(
|
||||||
Progress(
|
scanned=self._progress.scanned,
|
||||||
scanned=self._progress.scanned,
|
total=self._progress.total,
|
||||||
total=self._progress.total,
|
created=self._progress.created,
|
||||||
created=self._progress.created,
|
skipped=self._progress.skipped,
|
||||||
skipped=self._progress.skipped,
|
)
|
||||||
)
|
|
||||||
)
|
if callback and progress:
|
||||||
except Exception:
|
try:
|
||||||
pass
|
callback(progress)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def _add_error(self, message: str) -> None:
|
def _add_error(self, message: str) -> None:
|
||||||
"""Add an error message (thread-safe)."""
|
"""Add an error message (thread-safe)."""
|
||||||
@ -334,7 +343,9 @@ class AssetSeeder:
|
|||||||
{"roots": list(roots), "total": total_paths},
|
{"roots": list(roots), "total": total_paths},
|
||||||
)
|
)
|
||||||
|
|
||||||
specs, tag_pool, skipped_existing = build_asset_specs(paths, existing_paths)
|
specs, tag_pool, skipped_existing = build_asset_specs(
|
||||||
|
paths, existing_paths, compute_hashes=self._compute_hashes
|
||||||
|
)
|
||||||
self._update_progress(skipped=skipped_existing)
|
self._update_progress(skipped=skipped_existing)
|
||||||
|
|
||||||
if self._is_cancelled():
|
if self._is_cancelled():
|
||||||
|
|||||||
@ -272,7 +272,9 @@ def resolve_asset_for_download(
|
|||||||
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
|
states = list_cache_states_by_asset_id(session, asset_id=asset.id)
|
||||||
abs_path = select_best_live_path(states)
|
abs_path = select_best_live_path(states)
|
||||||
if not abs_path:
|
if not abs_path:
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError(
|
||||||
|
f"No live path for AssetInfo {asset_info_id} (asset id={asset.id}, name={info.name})"
|
||||||
|
)
|
||||||
|
|
||||||
update_asset_info_access_time(session, asset_info_id=asset_info_id)
|
update_asset_info_access_time(session, asset_info_id=asset_info_id)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|||||||
@ -36,6 +36,7 @@ class SeedAssetSpec(TypedDict):
|
|||||||
tags: list[str]
|
tags: list[str]
|
||||||
fname: str
|
fname: str
|
||||||
metadata: ExtractedMetadata | None
|
metadata: ExtractedMetadata | None
|
||||||
|
hash: str | None
|
||||||
|
|
||||||
|
|
||||||
class AssetRow(TypedDict):
|
class AssetRow(TypedDict):
|
||||||
@ -163,7 +164,7 @@ def batch_insert_seed_assets(
|
|||||||
asset_rows.append(
|
asset_rows.append(
|
||||||
{
|
{
|
||||||
"id": asset_id,
|
"id": asset_id,
|
||||||
"hash": None,
|
"hash": spec.get("hash"),
|
||||||
"size_bytes": spec["size_bytes"],
|
"size_bytes": spec["size_bytes"],
|
||||||
"mime_type": None,
|
"mime_type": None,
|
||||||
"created_at": current_time,
|
"created_at": current_time,
|
||||||
|
|||||||
@ -23,15 +23,16 @@ def verify_file_unchanged(
|
|||||||
|
|
||||||
Returns True if the file's mtime and size match the database values.
|
Returns True if the file's mtime and size match the database values.
|
||||||
Returns False if mtime_db is None or values don't match.
|
Returns False if mtime_db is None or values don't match.
|
||||||
|
|
||||||
|
size_db=None means don't check size; 0 is a valid recorded size.
|
||||||
"""
|
"""
|
||||||
if mtime_db is None:
|
if mtime_db is None:
|
||||||
return False
|
return False
|
||||||
actual_mtime_ns = get_mtime_ns(stat_result)
|
actual_mtime_ns = get_mtime_ns(stat_result)
|
||||||
if int(mtime_db) != int(actual_mtime_ns):
|
if int(mtime_db) != int(actual_mtime_ns):
|
||||||
return False
|
return False
|
||||||
sz = int(size_db or 0)
|
if size_db is not None:
|
||||||
if sz > 0:
|
return int(stat_result.st_size) == int(size_db)
|
||||||
return int(stat_result.st_size) == sz
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2,10 +2,23 @@ import asyncio
|
|||||||
import os
|
import os
|
||||||
from typing import IO
|
from typing import IO
|
||||||
|
|
||||||
from blake3 import blake3
|
|
||||||
|
|
||||||
DEFAULT_CHUNK = 8 * 1024 * 1024
|
DEFAULT_CHUNK = 8 * 1024 * 1024
|
||||||
|
|
||||||
|
_blake3 = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_blake3():
|
||||||
|
global _blake3
|
||||||
|
if _blake3 is None:
|
||||||
|
try:
|
||||||
|
from blake3 import blake3 as _b3
|
||||||
|
_blake3 = _b3
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"blake3 is required for asset hashing. Install with: pip install blake3"
|
||||||
|
)
|
||||||
|
return _blake3
|
||||||
|
|
||||||
|
|
||||||
def compute_blake3_hash(
|
def compute_blake3_hash(
|
||||||
fp: str | IO[bytes],
|
fp: str | IO[bytes],
|
||||||
@ -42,7 +55,7 @@ def _hash_file_obj(file_obj: IO, chunk_size: int = DEFAULT_CHUNK) -> str:
|
|||||||
if orig_pos != 0:
|
if orig_pos != 0:
|
||||||
file_obj.seek(0)
|
file_obj.seek(0)
|
||||||
|
|
||||||
h = blake3()
|
h = _get_blake3()()
|
||||||
while True:
|
while True:
|
||||||
chunk = file_obj.read(chunk_size)
|
chunk = file_obj.read(chunk_size)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user