Add pruning of Assets not reachable through the current configs (#12168)
Some checks are pending
Python Linting / Run Ruff (push) Waiting to run
Python Linting / Run Pylint (push) Waiting to run

* Not sure about this one, but try removing assets from old sessions.

* Simplify _prune_orphaned_assets: merge functions, use list comprehensions

Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927
Co-authored-by: Amp <amp@ampcode.com>

* Refactor _prune_orphaned_assets for readability

Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927
Co-authored-by: Amp <amp@ampcode.com>

* Add unit tests for pruning

* Add unit tests for _prune_orphaned_assets

Tests cover:

- Orphaned seed assets pruned when file removed

- Seed assets with valid files survive

- Hashed assets not pruned even without file

- Multi-root pruning

- SQL LIKE escape handling for %, _, spaces

Amp-Thread-ID: https://ampcode.com/threads/T-019c0c7a-5c8a-7548-b6c3-823e9829ce74
Co-authored-by: Amp <amp@ampcode.com>

* Ruff fix

---------

Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Alexander Brown 2026-01-29 18:34:14 -08:00 committed by GitHub
parent a999cbcfbc
commit 942b2a6526
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 176 additions and 1 deletions

View File

@ -27,6 +27,7 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
t_start = time.perf_counter()
created = 0
skipped_existing = 0
orphans_pruned = 0
paths: list[str] = []
try:
existing_paths: set[str] = set()
@ -38,6 +39,11 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
except Exception as e:
logging.exception("fast DB scan failed for %s: %s", r, e)
try:
orphans_pruned = _prune_orphaned_assets(roots)
except Exception as e:
logging.exception("orphan pruning failed: %s", e)
if "models" in roots:
paths.extend(collect_models_files())
if "input" in roots:
@ -85,15 +91,43 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
finally:
if enable_logging:
logging.info(
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
roots,
time.perf_counter() - t_start,
created,
skipped_existing,
orphans_pruned,
len(paths),
)
def _prune_orphaned_assets(roots: tuple[RootType, ...]) -> int:
"""Prune cache states outside configured prefixes, then delete orphaned seed assets."""
all_prefixes = [os.path.abspath(p) for r in roots for p in prefixes_for_root(r)]
if not all_prefixes:
return 0
def make_prefix_condition(prefix: str):
base = prefix if prefix.endswith(os.sep) else prefix + os.sep
escaped, esc = escape_like_prefix(base)
return AssetCacheState.file_path.like(escaped + "%", escape=esc)
matches_valid_prefix = sqlalchemy.or_(*[make_prefix_condition(p) for p in all_prefixes])
orphan_subq = (
sqlalchemy.select(Asset.id)
.outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id)
.where(Asset.hash.is_(None), AssetCacheState.id.is_(None))
).scalar_subquery()
with create_session() as sess:
sess.execute(sqlalchemy.delete(AssetCacheState).where(~matches_valid_prefix))
sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id.in_(orphan_subq)))
result = sess.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(orphan_subq)))
sess.commit()
return result.rowcount
def _fast_db_consistency_pass(
root: RootType,
*,

View File

@ -0,0 +1,141 @@
import uuid
from pathlib import Path
import pytest
import requests
from conftest import get_asset_filename, trigger_sync_seed_assets
@pytest.fixture
def create_seed_file(comfy_tmp_base_dir: Path):
"""Create a file on disk that will become a seed asset after sync."""
created: list[Path] = []
def _create(root: str, scope: str, name: str | None = None, data: bytes = b"TEST") -> Path:
name = name or f"seed_{uuid.uuid4().hex[:8]}.bin"
path = comfy_tmp_base_dir / root / "unit-tests" / scope / name
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(data)
created.append(path)
return path
yield _create
for p in created:
p.unlink(missing_ok=True)
@pytest.fixture
def find_asset(http: requests.Session, api_base: str):
"""Query API for assets matching scope and optional name."""
def _find(scope: str, name: str | None = None) -> list[dict]:
params = {"include_tags": f"unit-tests,{scope}"}
if name:
params["name_contains"] = name
r = http.get(f"{api_base}/api/assets", params=params, timeout=120)
assert r.status_code == 200
assets = r.json().get("assets", [])
if name:
return [a for a in assets if a.get("name") == name]
return assets
return _find
@pytest.mark.parametrize("root", ["input", "output"])
def test_orphaned_seed_asset_is_pruned(
root: str,
create_seed_file,
find_asset,
http: requests.Session,
api_base: str,
):
"""Seed asset with deleted file is removed; with file present, it survives."""
scope = f"prune-{uuid.uuid4().hex[:6]}"
fp = create_seed_file(root, scope)
name = fp.name
trigger_sync_seed_assets(http, api_base)
assert find_asset(scope, name), "Seed asset should exist"
fp.unlink()
trigger_sync_seed_assets(http, api_base)
assert not find_asset(scope, name), "Orphaned seed should be pruned"
def test_seed_asset_with_file_survives_prune(
create_seed_file,
find_asset,
http: requests.Session,
api_base: str,
):
"""Seed asset with file still on disk is NOT pruned."""
scope = f"keep-{uuid.uuid4().hex[:6]}"
fp = create_seed_file("input", scope)
trigger_sync_seed_assets(http, api_base)
trigger_sync_seed_assets(http, api_base)
assert find_asset(scope, fp.name), "Seed with valid file should survive"
def test_hashed_asset_not_pruned_when_file_missing(
http: requests.Session,
api_base: str,
comfy_tmp_base_dir: Path,
asset_factory,
make_asset_bytes,
):
"""Hashed assets are never deleted by prune, even without file."""
scope = f"hashed-{uuid.uuid4().hex[:6]}"
data = make_asset_bytes("test", 2048)
a = asset_factory("test.bin", ["input", "unit-tests", scope], {}, data)
path = comfy_tmp_base_dir / "input" / "unit-tests" / scope / get_asset_filename(a["asset_hash"], ".bin")
path.unlink()
trigger_sync_seed_assets(http, api_base)
r = http.get(f"{api_base}/api/assets/{a['id']}", timeout=120)
assert r.status_code == 200, "Hashed asset should NOT be pruned"
def test_prune_across_multiple_roots(
create_seed_file,
find_asset,
http: requests.Session,
api_base: str,
):
"""Prune correctly handles assets across input and output roots."""
scope = f"multi-{uuid.uuid4().hex[:6]}"
input_fp = create_seed_file("input", scope, "input.bin")
create_seed_file("output", scope, "output.bin")
trigger_sync_seed_assets(http, api_base)
assert len(find_asset(scope)) == 2
input_fp.unlink()
trigger_sync_seed_assets(http, api_base)
remaining = find_asset(scope)
assert len(remaining) == 1
assert remaining[0]["name"] == "output.bin"
@pytest.mark.parametrize("dirname", ["100%_done", "my_folder_name", "has spaces"])
def test_special_chars_in_path_escaped_correctly(
dirname: str,
create_seed_file,
find_asset,
http: requests.Session,
api_base: str,
comfy_tmp_base_dir: Path,
):
"""SQL LIKE wildcards (%, _) and spaces in paths don't cause false matches."""
scope = f"special-{uuid.uuid4().hex[:6]}/{dirname}"
fp = create_seed_file("input", scope)
trigger_sync_seed_assets(http, api_base)
trigger_sync_seed_assets(http, api_base)
assert find_asset(scope.split("/")[0], fp.name), "Asset with special chars should survive"