diff --git a/app/assets/services/bulk_ingest.py b/app/assets/services/bulk_ingest.py index 67aad838f..7c673632f 100644 --- a/app/assets/services/bulk_ingest.py +++ b/app/assets/services/bulk_ingest.py @@ -3,7 +3,7 @@ from __future__ import annotations import os import uuid from dataclasses import dataclass -from datetime import datetime +from datetime import datetime, timedelta from typing import TYPE_CHECKING, Any, TypedDict from sqlalchemy.orm import Session @@ -233,13 +233,19 @@ def batch_insert_seed_assets( if ref_id not in inserted_ref_ids: continue - for tag in ref_data["tags"]: + # Stagger added_at by microsecond per tag within a reference so + # the retrieval ORDER BY added_at preserves the input list order + # (the path-derived root category stays at position 0). Without + # this, every tag in a bulk-insert batch shares current_time and + # the tag_name tiebreaker sorts them alphabetically — putting the + # subpath tag ahead of "models" since "c"/"d"/"l" < "m". + for tag_idx, tag in enumerate(ref_data["tags"]): tag_rows.append( { "asset_reference_id": ref_id, "tag_name": tag, "origin": "automatic", - "added_at": current_time, + "added_at": current_time + timedelta(microseconds=tag_idx), } ) diff --git a/app/assets/services/path_utils.py b/app/assets/services/path_utils.py index b3ded8b47..d1732a250 100644 --- a/app/assets/services/path_utils.py +++ b/app/assets/services/path_utils.py @@ -26,27 +26,47 @@ def get_comfy_models_folders() -> list[tuple[str, list[str]]]: def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]: - """Validates and maps tags -> (base_dir, subdirs_for_fs)""" + """Validates and maps tags -> (base_dir, subdirs_for_fs). + + Accepts both the legacy one-tag-per-directory shape + (``["models", "diffusers", "Kolors", "text_encoder"]``) and the + slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path` + (``["models", "diffusers/Kolors/text_encoder"]``). Either shape is + split into a category (for ``models``) plus subdirs, with the same + safety checks applied to each component. + """ if not tags: raise ValueError("tags must not be empty") root = tags[0].lower() + + # Expand any slash-joined entries into individual path components so + # the rest of the function can treat both tag shapes uniformly. Each + # component is also stripped, so " a / b " behaves like ["a", "b"]. + expanded: list[str] = [] + for t in tags[1:]: + for part in str(t).split("/"): + part = part.strip() + if part: + expanded.append(part) + if root == "models": - if len(tags) < 2: + if not expanded: raise ValueError("at least two tags required for model asset") + category = expanded[0] try: - bases = folder_paths.folder_names_and_paths[tags[1]][0] + bases = folder_paths.folder_names_and_paths[category][0] except KeyError: - raise ValueError(f"unknown model category '{tags[1]}'") + raise ValueError(f"unknown model category '{category}'") if not bases: - raise ValueError(f"no base path configured for category '{tags[1]}'") + raise ValueError(f"no base path configured for category '{category}'") base_dir = os.path.abspath(bases[0]) - raw_subdirs = tags[2:] + raw_subdirs = expanded[1:] elif root == "input": base_dir = os.path.abspath(folder_paths.get_input_directory()) - raw_subdirs = tags[1:] + raw_subdirs = expanded elif root == "output": base_dir = os.path.abspath(folder_paths.get_output_directory()) - raw_subdirs = tags[1:] + raw_subdirs = expanded else: raise ValueError(f"unknown root tag '{tags[0]}'; expected 'models', 'input', or 'output'") _sep_chars = frozenset(("/", "\\", os.sep)) @@ -166,11 +186,12 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: consumers can use ``tags[1]`` as a stable category identifier that survives nested directory layouts (e.g. diffusers components). - Case is preserved on the subpath so that consumers can look up - providers keyed on the original-case path (e.g. - ``"diffusers/Kolors/text_encoder"``). The root category is always - lowercase by construction in - :func:`get_asset_category_and_relative_path`. + Both the root category and the subpath are lowercased to match the + canonicalization applied by :func:`ensure_tags_exist`, otherwise the + ``asset_reference_tags.tag_name`` FK to the lowercased + ``tags.name`` would fail for any path containing uppercase letters. + Consumers that need to look up providers keyed on original-case + paths should normalize their lookup key to lowercase. Raises: ValueError: path does not belong to any known root. @@ -182,5 +203,5 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]: ] tags = [root_category] if parent_parts: - tags.append("/".join(parent_parts)) + tags.append("/".join(parent_parts).lower()) return p.name, list(dict.fromkeys(t.strip() for t in tags if t.strip())) diff --git a/tests-unit/assets_test/services/test_path_utils.py b/tests-unit/assets_test/services/test_path_utils.py index 808d15e15..210aa6230 100644 --- a/tests-unit/assets_test/services/test_path_utils.py +++ b/tests-unit/assets_test/services/test_path_utils.py @@ -9,6 +9,7 @@ import pytest from app.assets.services.path_utils import ( get_asset_category_and_relative_path, get_name_and_tags_from_asset_path, + resolve_destination_from_tags, ) @@ -159,7 +160,13 @@ class TestGetNameAndTagsFromAssetPath: def test_diffusers_nested_subpath_slash_joined(self, fake_dirs_multi_bucket): """Diffusers components live in nested directories — the full subpath must collapse into one tag so consumers can look up the model category - via tags[1] regardless of nesting depth.""" + via tags[1] regardless of nesting depth. + + The subpath is lowercased to match the canonicalization + :func:`ensure_tags_exist` applies on the write side; without that, + the asset_reference_tags.tag_name FK to tags.name would fail for + any path containing uppercase letters. + """ nested = ( fake_dirs_multi_bucket["diffusers"] / "Kolors" @@ -170,7 +177,7 @@ class TestGetNameAndTagsFromAssetPath: f.touch() name, tags = get_name_and_tags_from_asset_path(str(f)) assert name == "model.safetensors" - assert tags == ["models", "diffusers/Kolors/text_encoder"] + assert tags == ["models", "diffusers/kolors/text_encoder"] def test_deep_lora_user_subpath_slash_joined(self, fake_dirs_multi_bucket): """User-created subdirectories under a model bucket also collapse to a @@ -187,3 +194,94 @@ class TestGetNameAndTagsFromAssetPath: name, tags = get_name_and_tags_from_asset_path(str(f)) assert name == "v0001.safetensors" assert tags == ["models", "loras/my/custom/path"] + + +class TestResolveDestinationFromTags: + """resolve_destination_from_tags must accept both the legacy + one-tag-per-directory shape and the new slash-joined shape so that an + upload using the tags it just read back from /api/assets round-trips + to the right on-disk destination. + """ + + @pytest.fixture + def resolve_dirs(self): + with tempfile.TemporaryDirectory() as root: + root_path = Path(root) + input_dir = root_path / "input" + output_dir = root_path / "output" + checkpoints_dir = root_path / "models" / "checkpoints" + diffusers_dir = root_path / "models" / "diffusers" + loras_dir = root_path / "models" / "loras" + for d in (input_dir, output_dir, checkpoints_dir, diffusers_dir, loras_dir): + d.mkdir(parents=True) + with patch("app.assets.services.path_utils.folder_paths") as mock_fp: + mock_fp.get_input_directory.return_value = str(input_dir) + mock_fp.get_output_directory.return_value = str(output_dir) + mock_fp.folder_names_and_paths = { + "checkpoints": ([str(checkpoints_dir)], None), + "diffusers": ([str(diffusers_dir)], None), + "loras": ([str(loras_dir)], None), + } + yield { + "input": input_dir, + "output": output_dir, + "checkpoints": checkpoints_dir, + "diffusers": diffusers_dir, + "loras": loras_dir, + } + + def test_models_flat_category(self, resolve_dirs): + base, subdirs = resolve_destination_from_tags(["models", "checkpoints"]) + assert base == str(resolve_dirs["checkpoints"]) + assert subdirs == [] + + def test_models_slash_joined_new_shape(self, resolve_dirs): + # The shape get_name_and_tags_from_asset_path now emits. + base, subdirs = resolve_destination_from_tags( + ["models", "diffusers/kolors/text_encoder"] + ) + assert base == str(resolve_dirs["diffusers"]) + assert subdirs == ["kolors", "text_encoder"] + + def test_models_legacy_one_tag_per_dir(self, resolve_dirs): + # The legacy shape must still resolve identically. + base, subdirs = resolve_destination_from_tags( + ["models", "diffusers", "kolors", "text_encoder"] + ) + assert base == str(resolve_dirs["diffusers"]) + assert subdirs == ["kolors", "text_encoder"] + + def test_models_loras_slash_joined(self, resolve_dirs): + base, subdirs = resolve_destination_from_tags( + ["models", "loras/my/custom/path"] + ) + assert base == str(resolve_dirs["loras"]) + assert subdirs == ["my", "custom", "path"] + + def test_input_no_subdir(self, resolve_dirs): + base, subdirs = resolve_destination_from_tags(["input"]) + assert base == str(resolve_dirs["input"]) + assert subdirs == [] + + def test_input_slash_joined_subdir(self, resolve_dirs): + base, subdirs = resolve_destination_from_tags(["input", "portraits/2026"]) + assert base == str(resolve_dirs["input"]) + assert subdirs == ["portraits", "2026"] + + def test_output_slash_joined_subdir(self, resolve_dirs): + base, subdirs = resolve_destination_from_tags(["output", "runs/abc"]) + assert base == str(resolve_dirs["output"]) + assert subdirs == ["runs", "abc"] + + def test_unknown_category_rejected(self, resolve_dirs): + with pytest.raises(ValueError, match="unknown model category"): + resolve_destination_from_tags(["models", "not_a_real_category"]) + + def test_unknown_category_via_slash_joined(self, resolve_dirs): + # First segment of a slash-joined tag must still match a registered category. + with pytest.raises(ValueError, match="unknown model category 'bogus'"): + resolve_destination_from_tags(["models", "bogus/sub/path"]) + + def test_traversal_in_subdir_rejected(self, resolve_dirs): + with pytest.raises(ValueError, match="invalid path component"): + resolve_destination_from_tags(["models", "checkpoints/..", "evil"])