mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-03-06 01:37:45 +08:00
- Add deleted_at column to AssetReference model and migration
- soft_delete_reference_by_id sets deleted_at instead of removing rows
- DELETE /api/assets/{id} defaults to soft-delete; delete_content=true
for hard-delete
- Add deleted_at IS NULL filters to read queries, tag queries, and
scanner queries so soft-deleted refs are invisible
- restore_references_by_paths skips soft-deleted refs
- upsert_reference clears deleted_at on explicit re-ingest
- Add tests for soft-delete API behavior, scanner persistence, bulk
insert, enrichment exclusion, and seed asset garbage collection
Amp-Thread-ID: https://ampcode.com/threads/T-019cb6fc-c05c-761f-b855-6d5d1c9defa2
Co-authored-by: Amp <amp@ampcode.com>
483 lines
16 KiB
Python
483 lines
16 KiB
Python
"""Tests for sync_references_with_filesystem in scanner.py."""
|
|
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.assets.database.models import (
|
|
Asset,
|
|
AssetReference,
|
|
AssetReferenceTag,
|
|
Base,
|
|
Tag,
|
|
)
|
|
from app.assets.database.queries.asset_reference import (
|
|
bulk_insert_references_ignore_conflicts,
|
|
get_references_for_prefixes,
|
|
get_unenriched_references,
|
|
restore_references_by_paths,
|
|
)
|
|
from app.assets.scanner import sync_references_with_filesystem
|
|
from app.assets.services.file_utils import get_mtime_ns
|
|
|
|
|
|
@pytest.fixture
|
|
def db_engine():
|
|
engine = create_engine("sqlite:///:memory:")
|
|
Base.metadata.create_all(engine)
|
|
return engine
|
|
|
|
|
|
@pytest.fixture
|
|
def session(db_engine):
|
|
with Session(db_engine) as sess:
|
|
yield sess
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_dir():
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
yield Path(tmpdir)
|
|
|
|
|
|
def _create_file(temp_dir: Path, name: str, content: bytes = b"\x00" * 100) -> str:
|
|
"""Create a file and return its absolute path (no symlink resolution)."""
|
|
p = temp_dir / name
|
|
p.parent.mkdir(parents=True, exist_ok=True)
|
|
p.write_bytes(content)
|
|
return os.path.abspath(str(p))
|
|
|
|
|
|
def _stat_mtime_ns(path: str) -> int:
|
|
return get_mtime_ns(os.stat(path, follow_symlinks=True))
|
|
|
|
|
|
def _make_asset(
|
|
session: Session,
|
|
asset_id: str,
|
|
file_path: str,
|
|
ref_id: str,
|
|
*,
|
|
asset_hash: str | None = None,
|
|
size_bytes: int = 100,
|
|
mtime_ns: int | None = None,
|
|
needs_verify: bool = False,
|
|
is_missing: bool = False,
|
|
) -> tuple[Asset, AssetReference]:
|
|
"""Insert an Asset + AssetReference and flush."""
|
|
asset = session.get(Asset, asset_id)
|
|
if asset is None:
|
|
asset = Asset(id=asset_id, hash=asset_hash, size_bytes=size_bytes)
|
|
session.add(asset)
|
|
session.flush()
|
|
|
|
ref = AssetReference(
|
|
id=ref_id,
|
|
asset_id=asset_id,
|
|
name=f"test-{ref_id}",
|
|
owner_id="system",
|
|
file_path=file_path,
|
|
mtime_ns=mtime_ns,
|
|
needs_verify=needs_verify,
|
|
is_missing=is_missing,
|
|
)
|
|
session.add(ref)
|
|
session.flush()
|
|
return asset, ref
|
|
|
|
|
|
def _ensure_missing_tag(session: Session):
|
|
"""Ensure the 'missing' tag exists."""
|
|
if not session.get(Tag, "missing"):
|
|
session.add(Tag(name="missing", tag_type="system"))
|
|
session.flush()
|
|
|
|
|
|
class _VerifyCase:
|
|
def __init__(self, id, stat_unchanged, needs_verify_before, expect_needs_verify):
|
|
self.id = id
|
|
self.stat_unchanged = stat_unchanged
|
|
self.needs_verify_before = needs_verify_before
|
|
self.expect_needs_verify = expect_needs_verify
|
|
|
|
|
|
VERIFY_CASES = [
|
|
_VerifyCase(
|
|
id="unchanged_clears_verify",
|
|
stat_unchanged=True,
|
|
needs_verify_before=True,
|
|
expect_needs_verify=False,
|
|
),
|
|
_VerifyCase(
|
|
id="unchanged_keeps_clear",
|
|
stat_unchanged=True,
|
|
needs_verify_before=False,
|
|
expect_needs_verify=False,
|
|
),
|
|
_VerifyCase(
|
|
id="changed_sets_verify",
|
|
stat_unchanged=False,
|
|
needs_verify_before=False,
|
|
expect_needs_verify=True,
|
|
),
|
|
_VerifyCase(
|
|
id="changed_keeps_verify",
|
|
stat_unchanged=False,
|
|
needs_verify_before=True,
|
|
expect_needs_verify=True,
|
|
),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("case", VERIFY_CASES, ids=lambda c: c.id)
|
|
def test_needs_verify_toggling(session, temp_dir, case):
|
|
"""needs_verify is set/cleared based on mtime+size match."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
real_mtime = _stat_mtime_ns(fp)
|
|
|
|
mtime_for_db = real_mtime if case.stat_unchanged else real_mtime + 1
|
|
_make_asset(
|
|
session, "a1", fp, "r1",
|
|
asset_hash="blake3:abc",
|
|
mtime_ns=mtime_for_db,
|
|
needs_verify=case.needs_verify_before,
|
|
)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref.needs_verify is case.expect_needs_verify
|
|
|
|
|
|
class _MissingCase:
|
|
def __init__(self, id, file_exists, expect_is_missing):
|
|
self.id = id
|
|
self.file_exists = file_exists
|
|
self.expect_is_missing = expect_is_missing
|
|
|
|
|
|
MISSING_CASES = [
|
|
_MissingCase(id="existing_file_not_missing", file_exists=True, expect_is_missing=False),
|
|
_MissingCase(id="missing_file_marked_missing", file_exists=False, expect_is_missing=True),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("case", MISSING_CASES, ids=lambda c: c.id)
|
|
def test_is_missing_flag(session, temp_dir, case):
|
|
"""is_missing reflects whether the file exists on disk."""
|
|
if case.file_exists:
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
else:
|
|
fp = str(temp_dir / "gone.bin")
|
|
mtime = 999
|
|
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref.is_missing is case.expect_is_missing
|
|
|
|
|
|
def test_seed_asset_all_missing_deletes_asset(session, temp_dir):
|
|
"""Seed asset with all refs missing gets deleted entirely."""
|
|
fp = str(temp_dir / "gone.bin")
|
|
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
assert session.get(Asset, "seed1") is None
|
|
assert session.get(AssetReference, "r1") is None
|
|
|
|
|
|
def test_seed_asset_some_exist_returns_survivors(session, temp_dir):
|
|
"""Seed asset with at least one existing ref survives and is returned."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=mtime)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
survivors = sync_references_with_filesystem(
|
|
session, "models", collect_existing_paths=True,
|
|
)
|
|
session.commit()
|
|
|
|
assert session.get(Asset, "seed1") is not None
|
|
assert os.path.abspath(fp) in survivors
|
|
|
|
|
|
def test_hashed_asset_prunes_missing_refs_when_one_is_ok(session, temp_dir):
|
|
"""Hashed asset with one stat-unchanged ref deletes missing refs."""
|
|
fp_ok = _create_file(temp_dir, "good.bin")
|
|
fp_gone = str(temp_dir / "gone.bin")
|
|
mtime = _stat_mtime_ns(fp_ok)
|
|
|
|
_make_asset(session, "h1", fp_ok, "r_ok", asset_hash="blake3:aaa", mtime_ns=mtime)
|
|
# Second ref on same asset, file missing
|
|
ref_gone = AssetReference(
|
|
id="r_gone", asset_id="h1", name="gone",
|
|
owner_id="system", file_path=fp_gone, mtime_ns=999,
|
|
)
|
|
session.add(ref_gone)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
assert session.get(AssetReference, "r_ok") is not None
|
|
assert session.get(AssetReference, "r_gone") is None
|
|
|
|
|
|
def test_hashed_asset_all_missing_keeps_refs(session, temp_dir):
|
|
"""Hashed asset with all refs missing keeps refs (no pruning)."""
|
|
fp = str(temp_dir / "gone.bin")
|
|
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
assert session.get(AssetReference, "r1") is not None
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref.is_missing is True
|
|
|
|
|
|
def test_missing_tag_added_when_all_refs_gone(session, temp_dir):
|
|
"""Missing tag is added to hashed asset when all refs are missing."""
|
|
_ensure_missing_tag(session)
|
|
fp = str(temp_dir / "gone.bin")
|
|
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(
|
|
session, "models", update_missing_tags=True,
|
|
)
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
|
assert tag_link is not None
|
|
|
|
|
|
def test_missing_tag_removed_when_ref_ok(session, temp_dir):
|
|
"""Missing tag is removed from hashed asset when a ref is stat-unchanged."""
|
|
_ensure_missing_tag(session)
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=mtime)
|
|
# Pre-add a stale missing tag
|
|
session.add(AssetReferenceTag(
|
|
asset_reference_id="r1", tag_name="missing", origin="automatic",
|
|
))
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(
|
|
session, "models", update_missing_tags=True,
|
|
)
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
|
assert tag_link is None
|
|
|
|
|
|
def test_missing_tags_not_touched_when_flag_false(session, temp_dir):
|
|
"""Missing tags are not modified when update_missing_tags=False."""
|
|
_ensure_missing_tag(session)
|
|
fp = str(temp_dir / "gone.bin")
|
|
_make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(
|
|
session, "models", update_missing_tags=False,
|
|
)
|
|
session.commit()
|
|
|
|
tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
|
|
assert tag_link is None # tag was never added
|
|
|
|
|
|
def test_returns_none_when_collect_false(session, temp_dir):
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
result = sync_references_with_filesystem(
|
|
session, "models", collect_existing_paths=False,
|
|
)
|
|
|
|
assert result is None
|
|
|
|
|
|
def test_returns_empty_set_for_no_prefixes(session):
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[]):
|
|
result = sync_references_with_filesystem(
|
|
session, "models", collect_existing_paths=True,
|
|
)
|
|
|
|
assert result == set()
|
|
|
|
|
|
def test_no_references_is_noop(session, temp_dir):
|
|
"""No crash and no side effects when there are no references."""
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
survivors = sync_references_with_filesystem(
|
|
session, "models", collect_existing_paths=True,
|
|
)
|
|
session.commit()
|
|
|
|
assert survivors == set()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Soft-delete persistence across scanner operations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _soft_delete_ref(session: Session, ref_id: str) -> None:
|
|
"""Mark a reference as soft-deleted (mimics the API DELETE behaviour)."""
|
|
ref = session.get(AssetReference, ref_id)
|
|
ref.deleted_at = datetime(2025, 1, 1)
|
|
session.flush()
|
|
|
|
|
|
def test_soft_deleted_ref_excluded_from_get_references_for_prefixes(session, temp_dir):
|
|
"""get_references_for_prefixes skips soft-deleted references."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
rows = get_references_for_prefixes(session, [str(temp_dir)], include_missing=True)
|
|
assert len(rows) == 0
|
|
|
|
|
|
def test_sync_does_not_resurrect_soft_deleted_ref(session, temp_dir):
|
|
"""Scanner sync leaves soft-deleted refs untouched even when file exists on disk."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref.deleted_at is not None, "soft-deleted ref must stay deleted after sync"
|
|
|
|
|
|
def test_bulk_insert_does_not_overwrite_soft_deleted_ref(session, temp_dir):
|
|
"""bulk_insert_references_ignore_conflicts cannot replace a soft-deleted row."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
now = datetime.now(tz=None)
|
|
bulk_insert_references_ignore_conflicts(session, [
|
|
{
|
|
"id": "r_new",
|
|
"asset_id": "a1",
|
|
"file_path": fp,
|
|
"name": "model.bin",
|
|
"owner_id": "",
|
|
"mtime_ns": mtime,
|
|
"preview_id": None,
|
|
"user_metadata": None,
|
|
"created_at": now,
|
|
"updated_at": now,
|
|
"last_access_time": now,
|
|
}
|
|
])
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
# Original row is still the soft-deleted one
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref is not None
|
|
assert ref.deleted_at is not None
|
|
# The new row was not inserted (conflict on file_path)
|
|
assert session.get(AssetReference, "r_new") is None
|
|
|
|
|
|
def test_restore_references_by_paths_skips_soft_deleted(session, temp_dir):
|
|
"""restore_references_by_paths does not clear is_missing on soft-deleted refs."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(
|
|
session, "a1", fp, "r1",
|
|
asset_hash="blake3:abc", mtime_ns=mtime, is_missing=True,
|
|
)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
restored = restore_references_by_paths(session, [fp])
|
|
session.commit()
|
|
|
|
assert restored == 0
|
|
session.expire_all()
|
|
ref = session.get(AssetReference, "r1")
|
|
assert ref.is_missing is True, "is_missing must not be cleared on soft-deleted ref"
|
|
assert ref.deleted_at is not None
|
|
|
|
|
|
def test_get_unenriched_references_excludes_soft_deleted(session, temp_dir):
|
|
"""Enrichment queries do not pick up soft-deleted references."""
|
|
fp = _create_file(temp_dir, "model.bin")
|
|
mtime = _stat_mtime_ns(fp)
|
|
_make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
rows = get_unenriched_references(session, [str(temp_dir)], max_level=2)
|
|
assert len(rows) == 0
|
|
|
|
|
|
def test_sync_ignores_soft_deleted_seed_asset(session, temp_dir):
|
|
"""Soft-deleted seed ref is not garbage-collected even when file is missing."""
|
|
fp = str(temp_dir / "gone.bin") # file does not exist
|
|
_make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
|
|
_soft_delete_ref(session, "r1")
|
|
session.commit()
|
|
|
|
with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
|
|
sync_references_with_filesystem(session, "models")
|
|
session.commit()
|
|
|
|
session.expire_all()
|
|
# Asset and ref must still exist — scanner did not see the soft-deleted row
|
|
assert session.get(Asset, "seed1") is not None
|
|
assert session.get(AssetReference, "r1") is not None
|