mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-01-31 00:30:21 +08:00
Add pruning of Assets not reachable through the current configs (#12168)
* Not sure about this one, but try removing assets from old sessions. * Simplify _prune_orphaned_assets: merge functions, use list comprehensions Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927 Co-authored-by: Amp <amp@ampcode.com> * Refactor _prune_orphaned_assets for readability Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927 Co-authored-by: Amp <amp@ampcode.com> * Add unit tests for pruning * Add unit tests for _prune_orphaned_assets Tests cover: - Orphaned seed assets pruned when file removed - Seed assets with valid files survive - Hashed assets not pruned even without file - Multi-root pruning - SQL LIKE escape handling for %, _, spaces Amp-Thread-ID: https://ampcode.com/threads/T-019c0c7a-5c8a-7548-b6c3-823e9829ce74 Co-authored-by: Amp <amp@ampcode.com> * Ruff fix --------- Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
parent
a999cbcfbc
commit
942b2a6526
@ -27,6 +27,7 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
|
||||
t_start = time.perf_counter()
|
||||
created = 0
|
||||
skipped_existing = 0
|
||||
orphans_pruned = 0
|
||||
paths: list[str] = []
|
||||
try:
|
||||
existing_paths: set[str] = set()
|
||||
@ -38,6 +39,11 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
|
||||
except Exception as e:
|
||||
logging.exception("fast DB scan failed for %s: %s", r, e)
|
||||
|
||||
try:
|
||||
orphans_pruned = _prune_orphaned_assets(roots)
|
||||
except Exception as e:
|
||||
logging.exception("orphan pruning failed: %s", e)
|
||||
|
||||
if "models" in roots:
|
||||
paths.extend(collect_models_files())
|
||||
if "input" in roots:
|
||||
@ -85,15 +91,43 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
|
||||
finally:
|
||||
if enable_logging:
|
||||
logging.info(
|
||||
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
|
||||
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
|
||||
roots,
|
||||
time.perf_counter() - t_start,
|
||||
created,
|
||||
skipped_existing,
|
||||
orphans_pruned,
|
||||
len(paths),
|
||||
)
|
||||
|
||||
|
||||
def _prune_orphaned_assets(roots: tuple[RootType, ...]) -> int:
|
||||
"""Prune cache states outside configured prefixes, then delete orphaned seed assets."""
|
||||
all_prefixes = [os.path.abspath(p) for r in roots for p in prefixes_for_root(r)]
|
||||
if not all_prefixes:
|
||||
return 0
|
||||
|
||||
def make_prefix_condition(prefix: str):
|
||||
base = prefix if prefix.endswith(os.sep) else prefix + os.sep
|
||||
escaped, esc = escape_like_prefix(base)
|
||||
return AssetCacheState.file_path.like(escaped + "%", escape=esc)
|
||||
|
||||
matches_valid_prefix = sqlalchemy.or_(*[make_prefix_condition(p) for p in all_prefixes])
|
||||
|
||||
orphan_subq = (
|
||||
sqlalchemy.select(Asset.id)
|
||||
.outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id)
|
||||
.where(Asset.hash.is_(None), AssetCacheState.id.is_(None))
|
||||
).scalar_subquery()
|
||||
|
||||
with create_session() as sess:
|
||||
sess.execute(sqlalchemy.delete(AssetCacheState).where(~matches_valid_prefix))
|
||||
sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id.in_(orphan_subq)))
|
||||
result = sess.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(orphan_subq)))
|
||||
sess.commit()
|
||||
return result.rowcount
|
||||
|
||||
|
||||
def _fast_db_consistency_pass(
|
||||
root: RootType,
|
||||
*,
|
||||
|
||||
141
tests-assets/test_prune_orphaned_assets.py
Normal file
141
tests-assets/test_prune_orphaned_assets.py
Normal file
@ -0,0 +1,141 @@
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from conftest import get_asset_filename, trigger_sync_seed_assets
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_seed_file(comfy_tmp_base_dir: Path):
|
||||
"""Create a file on disk that will become a seed asset after sync."""
|
||||
created: list[Path] = []
|
||||
|
||||
def _create(root: str, scope: str, name: str | None = None, data: bytes = b"TEST") -> Path:
|
||||
name = name or f"seed_{uuid.uuid4().hex[:8]}.bin"
|
||||
path = comfy_tmp_base_dir / root / "unit-tests" / scope / name
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_bytes(data)
|
||||
created.append(path)
|
||||
return path
|
||||
|
||||
yield _create
|
||||
|
||||
for p in created:
|
||||
p.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def find_asset(http: requests.Session, api_base: str):
|
||||
"""Query API for assets matching scope and optional name."""
|
||||
def _find(scope: str, name: str | None = None) -> list[dict]:
|
||||
params = {"include_tags": f"unit-tests,{scope}"}
|
||||
if name:
|
||||
params["name_contains"] = name
|
||||
r = http.get(f"{api_base}/api/assets", params=params, timeout=120)
|
||||
assert r.status_code == 200
|
||||
assets = r.json().get("assets", [])
|
||||
if name:
|
||||
return [a for a in assets if a.get("name") == name]
|
||||
return assets
|
||||
|
||||
return _find
|
||||
|
||||
|
||||
@pytest.mark.parametrize("root", ["input", "output"])
|
||||
def test_orphaned_seed_asset_is_pruned(
|
||||
root: str,
|
||||
create_seed_file,
|
||||
find_asset,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
):
|
||||
"""Seed asset with deleted file is removed; with file present, it survives."""
|
||||
scope = f"prune-{uuid.uuid4().hex[:6]}"
|
||||
fp = create_seed_file(root, scope)
|
||||
name = fp.name
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
assert find_asset(scope, name), "Seed asset should exist"
|
||||
|
||||
fp.unlink()
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
assert not find_asset(scope, name), "Orphaned seed should be pruned"
|
||||
|
||||
|
||||
def test_seed_asset_with_file_survives_prune(
|
||||
create_seed_file,
|
||||
find_asset,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
):
|
||||
"""Seed asset with file still on disk is NOT pruned."""
|
||||
scope = f"keep-{uuid.uuid4().hex[:6]}"
|
||||
fp = create_seed_file("input", scope)
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
assert find_asset(scope, fp.name), "Seed with valid file should survive"
|
||||
|
||||
|
||||
def test_hashed_asset_not_pruned_when_file_missing(
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
comfy_tmp_base_dir: Path,
|
||||
asset_factory,
|
||||
make_asset_bytes,
|
||||
):
|
||||
"""Hashed assets are never deleted by prune, even without file."""
|
||||
scope = f"hashed-{uuid.uuid4().hex[:6]}"
|
||||
data = make_asset_bytes("test", 2048)
|
||||
a = asset_factory("test.bin", ["input", "unit-tests", scope], {}, data)
|
||||
|
||||
path = comfy_tmp_base_dir / "input" / "unit-tests" / scope / get_asset_filename(a["asset_hash"], ".bin")
|
||||
path.unlink()
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
r = http.get(f"{api_base}/api/assets/{a['id']}", timeout=120)
|
||||
assert r.status_code == 200, "Hashed asset should NOT be pruned"
|
||||
|
||||
|
||||
def test_prune_across_multiple_roots(
|
||||
create_seed_file,
|
||||
find_asset,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
):
|
||||
"""Prune correctly handles assets across input and output roots."""
|
||||
scope = f"multi-{uuid.uuid4().hex[:6]}"
|
||||
input_fp = create_seed_file("input", scope, "input.bin")
|
||||
create_seed_file("output", scope, "output.bin")
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
assert len(find_asset(scope)) == 2
|
||||
|
||||
input_fp.unlink()
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
remaining = find_asset(scope)
|
||||
assert len(remaining) == 1
|
||||
assert remaining[0]["name"] == "output.bin"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dirname", ["100%_done", "my_folder_name", "has spaces"])
|
||||
def test_special_chars_in_path_escaped_correctly(
|
||||
dirname: str,
|
||||
create_seed_file,
|
||||
find_asset,
|
||||
http: requests.Session,
|
||||
api_base: str,
|
||||
comfy_tmp_base_dir: Path,
|
||||
):
|
||||
"""SQL LIKE wildcards (%, _) and spaces in paths don't cause false matches."""
|
||||
scope = f"special-{uuid.uuid4().hex[:6]}/{dirname}"
|
||||
fp = create_seed_file("input", scope)
|
||||
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
trigger_sync_seed_assets(http, api_base)
|
||||
|
||||
assert find_asset(scope.split("/")[0], fp.name), "Asset with special chars should survive"
|
||||
Loading…
Reference in New Issue
Block a user