From 5b7288d700b72a2787e10d45b778d3d651ccb1d8 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Tue, 19 May 2026 14:48:49 -0700 Subject: [PATCH] feat(assets): collapse nested asset path into a single slash-joined tag The /api/assets response previously emitted one tag per parent directory between the root category and the filename. For nested categories like diffusers, this produced ["models", "diffusers", "Kolors", "text_encoder"] where consumers that look up a category via tags[1] would only see the top-level bucket name and miss the model-specific sub-path that uniquely identifies the component. This collapses the parent subpath into a single slash-joined tag so the result is ["models", "diffusers/Kolors/text_encoder"]. Consumers can now read tags[1] as a stable category identifier regardless of how deep the file lives in the bucket. Case is preserved on the subpath so providers keyed on the original-case path (e.g. "diffusers/Kolors/text_encoder") resolve correctly. Same shape applies uniformly: - input/foo.png -> ["input"] - output/00001.png -> ["output"] - models/checkpoints/flux.safetensors -> ["models", "checkpoints"] - models/diffusers/Kolors/text_encoder/m.sft -> ["models", "diffusers/Kolors/text_encoder"] - models/loras/my/custom/path/v1.safetensors -> ["models", "loras/my/custom/path"] Integration tests that filtered by individual subdirectory tags (`include_tags=unit-tests,scope`) updated to use the new slash-joined shape (`include_tags=unit-tests/scope`). Unit tests cover flat input, flat output, flat models, diffusers-style nested, and deep user-subpath cases. --- app/assets/services/path_utils.py | 18 ++- .../assets_test/services/test_path_utils.py | 110 +++++++++++++++++- .../assets_test/test_assets_missing_sync.py | 6 +- tests-unit/assets_test/test_crud.py | 8 +- .../assets_test/test_prune_orphaned_assets.py | 10 +- 5 files changed, 143 insertions(+), 9 deletions(-) diff --git a/app/assets/services/path_utils.py b/app/assets/services/path_utils.py index 892140ffb..2363d01f3 100644 --- a/app/assets/services/path_utils.py +++ b/app/assets/services/path_utils.py @@ -160,7 +160,18 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: """Return (name, tags) derived from a filesystem path. - name: base filename with extension - - tags: [root_category] + parent folder names in order + - tags: [root_category] for paths with no parent subdirectories, + [root_category, slash_joined_subpath] otherwise. The parent subpath + (everything between the root category and the filename) is collapsed + into a single tag rather than emitted as one tag per directory, so + consumers can use ``tags[1]`` as a stable category identifier that + survives nested directory layouts (e.g. diffusers components). + + Case is preserved on the subpath so that consumers can look up + providers keyed on the original-case path (e.g. + ``"diffusers/Kolors/text_encoder"``). The root category is always + lowercase by construction in + :func:`get_asset_category_and_relative_path`. Raises: ValueError: path does not belong to any known root. @@ -170,4 +181,7 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: parent_parts = [ part for part in p.parent.parts if part not in (".", "..", p.anchor) ] - return p.name, list(dict.fromkeys(normalize_tags([root_category, *parent_parts]))) + tags = [root_category] + if parent_parts: + tags.append("/".join(parent_parts)) + return p.name, list(dict.fromkeys(t.strip() for t in tags if t.strip())) diff --git a/tests-unit/assets_test/services/test_path_utils.py b/tests-unit/assets_test/services/test_path_utils.py index 3fa905f9a..808d15e15 100644 --- a/tests-unit/assets_test/services/test_path_utils.py +++ b/tests-unit/assets_test/services/test_path_utils.py @@ -6,7 +6,10 @@ from unittest.mock import patch import pytest -from app.assets.services.path_utils import get_asset_category_and_relative_path +from app.assets.services.path_utils import ( + get_asset_category_and_relative_path, + get_name_and_tags_from_asset_path, +) @pytest.fixture @@ -38,6 +41,50 @@ def fake_dirs(): } +@pytest.fixture +def fake_dirs_multi_bucket(): + """Variant fixture with multiple model buckets (checkpoints + diffusers + loras).""" + with tempfile.TemporaryDirectory() as root: + root_path = Path(root) + input_dir = root_path / "input" + output_dir = root_path / "output" + temp_dir = root_path / "temp" + checkpoints_dir = root_path / "models" / "checkpoints" + diffusers_dir = root_path / "models" / "diffusers" + loras_dir = root_path / "models" / "loras" + for d in ( + input_dir, + output_dir, + temp_dir, + checkpoints_dir, + diffusers_dir, + loras_dir, + ): + d.mkdir(parents=True) + + with patch("app.assets.services.path_utils.folder_paths") as mock_fp: + mock_fp.get_input_directory.return_value = str(input_dir) + mock_fp.get_output_directory.return_value = str(output_dir) + mock_fp.get_temp_directory.return_value = str(temp_dir) + + with patch( + "app.assets.services.path_utils.get_comfy_models_folders", + return_value=[ + ("checkpoints", [str(checkpoints_dir)]), + ("diffusers", [str(diffusers_dir)]), + ("loras", [str(loras_dir)]), + ], + ): + yield { + "input": input_dir, + "output": output_dir, + "temp": temp_dir, + "checkpoints": checkpoints_dir, + "diffusers": diffusers_dir, + "loras": loras_dir, + } + + class TestGetAssetCategoryAndRelativePath: def test_input_file(self, fake_dirs): f = fake_dirs["input"] / "photo.png" @@ -79,3 +126,64 @@ class TestGetAssetCategoryAndRelativePath: def test_unknown_path_raises(self, fake_dirs): with pytest.raises(ValueError, match="not within"): get_asset_category_and_relative_path("/some/random/path.png") + + +class TestGetNameAndTagsFromAssetPath: + """tags collapse the parent subpath into a single slash-joined tag. + + Consumers should be able to read ``tags[1]`` as a stable category + identifier regardless of how deep the file lives in the bucket. + """ + + def test_flat_input(self, fake_dirs_multi_bucket): + f = fake_dirs_multi_bucket["input"] / "photo.png" + f.touch() + name, tags = get_name_and_tags_from_asset_path(str(f)) + assert name == "photo.png" + assert tags == ["input"] + + def test_flat_output(self, fake_dirs_multi_bucket): + f = fake_dirs_multi_bucket["output"] / "result_00001.png" + f.touch() + name, tags = get_name_and_tags_from_asset_path(str(f)) + assert name == "result_00001.png" + assert tags == ["output"] + + def test_flat_models_checkpoint(self, fake_dirs_multi_bucket): + f = fake_dirs_multi_bucket["checkpoints"] / "flux.safetensors" + f.touch() + name, tags = get_name_and_tags_from_asset_path(str(f)) + assert name == "flux.safetensors" + assert tags == ["models", "checkpoints"] + + def test_diffusers_nested_subpath_slash_joined(self, fake_dirs_multi_bucket): + """Diffusers components live in nested directories — the full subpath + must collapse into one tag so consumers can look up the model category + via tags[1] regardless of nesting depth.""" + nested = ( + fake_dirs_multi_bucket["diffusers"] + / "Kolors" + / "text_encoder" + ) + nested.mkdir(parents=True) + f = nested / "model.safetensors" + f.touch() + name, tags = get_name_and_tags_from_asset_path(str(f)) + assert name == "model.safetensors" + assert tags == ["models", "diffusers/Kolors/text_encoder"] + + def test_deep_lora_user_subpath_slash_joined(self, fake_dirs_multi_bucket): + """User-created subdirectories under a model bucket also collapse to a + single tag rather than one tag per directory.""" + nested = ( + fake_dirs_multi_bucket["loras"] + / "my" + / "custom" + / "path" + ) + nested.mkdir(parents=True) + f = nested / "v0001.safetensors" + f.touch() + name, tags = get_name_and_tags_from_asset_path(str(f)) + assert name == "v0001.safetensors" + assert tags == ["models", "loras/my/custom/path"] diff --git a/tests-unit/assets_test/test_assets_missing_sync.py b/tests-unit/assets_test/test_assets_missing_sync.py index 47dc130cb..f87846a1c 100644 --- a/tests-unit/assets_test/test_assets_missing_sync.py +++ b/tests-unit/assets_test/test_assets_missing_sync.py @@ -32,7 +32,7 @@ def test_seed_asset_removed_when_file_is_deleted( # Verify it is visible via API and carries no hash (seed) r1 = http.get( api_base + "/api/assets", - params={"include_tags": "unit-tests,syncseed", "name_contains": name}, + params={"include_tags": "unit-tests/syncseed", "name_contains": name}, timeout=120, ) body1 = r1.json() @@ -52,7 +52,7 @@ def test_seed_asset_removed_when_file_is_deleted( # It should disappear (AssetInfo and seed Asset gone) r2 = http.get( api_base + "/api/assets", - params={"include_tags": "unit-tests,syncseed", "name_contains": name}, + params={"include_tags": "unit-tests/syncseed", "name_contains": name}, timeout=120, ) body2 = r2.json() @@ -332,7 +332,7 @@ def test_fastpass_removes_stale_state_row_no_missing( rl = http.get( api_base + "/api/assets", - params={"include_tags": f"unit-tests,{scope}"}, + params={"include_tags": f"unit-tests/{scope}"}, timeout=120, ) bl = rl.json() diff --git a/tests-unit/assets_test/test_crud.py b/tests-unit/assets_test/test_crud.py index 07310223e..7fc1139fe 100644 --- a/tests-unit/assets_test/test_crud.py +++ b/tests-unit/assets_test/test_crud.py @@ -280,9 +280,15 @@ def test_metadata_filename_is_set_for_seed_asset_without_hash( trigger_sync_seed_assets(http, api_base) + # Scanner emits tags as ``[root, "//..."]`` — the second tag + # is the slash-joined parent subpath. For ``/unit-tests//a/b/`` + # the second tag is ``"unit-tests//a/b"``. r1 = http.get( api_base + "/api/assets", - params={"include_tags": f"unit-tests,{scope}", "name_contains": name}, + params={ + "include_tags": f"unit-tests/{scope}/a/b", + "name_contains": name, + }, timeout=120, ) body = r1.json() diff --git a/tests-unit/assets_test/test_prune_orphaned_assets.py b/tests-unit/assets_test/test_prune_orphaned_assets.py index 1fbd4d4e2..8e26697f3 100644 --- a/tests-unit/assets_test/test_prune_orphaned_assets.py +++ b/tests-unit/assets_test/test_prune_orphaned_assets.py @@ -29,7 +29,10 @@ def create_seed_file(comfy_tmp_base_dir: Path): def find_asset(http: requests.Session, api_base: str): """Query API for assets matching scope and optional name.""" def _find(scope: str, name: str | None = None) -> list[dict]: - params = {"include_tags": f"unit-tests,{scope}"} + # Scanner now emits tags as ``[root, "//..."]`` rather than + # one tag per directory. For files at ``/unit-tests//...`` + # the second tag is exactly ``"unit-tests/"``. + params = {"include_tags": f"unit-tests/{scope}"} if name: params["name_contains"] = name r = http.get(f"{api_base}/api/assets", params=params, timeout=120) @@ -138,4 +141,7 @@ def test_special_chars_in_path_escaped_correctly( trigger_sync_seed_assets(http, api_base) trigger_sync_seed_assets(http, api_base) - assert find_asset(scope.split("/")[0], fp.name), "Asset with special chars should survive" + # Scanner emits the full parent subpath as a single slash-joined tag, so + # the lookup tag is ``unit-tests/`` even when itself + # contains a slash (parent + special-char dirname). + assert find_asset(scope, fp.name), "Asset with special chars should survive"