fix(assets): lowercase subpath, parse slash-joined upload tags, stagger added_at

Three bugs surfaced by an end-to-end smoke test of the read+write
round-trip; all in this PR's scope.

1. FK violation on uppercase paths
   get_name_and_tags_from_asset_path was preserving case on the
   subpath (e.g. "diffusers/Kolors/text_encoder"). ensure_tags_exist
   lowercases via normalize_tags before inserting into the tags
   table, so the asset_reference_tags.tag_name FK to tags.name
   failed for any path containing uppercase letters — including
   the diffusers case the PR was designed to support.

   Fix: lowercase the slash-joined subpath in
   get_name_and_tags_from_asset_path to match the canonicalization
   ensure_tags_exist applies. Providers keyed on original-case
   subpaths need to normalize their lookup key to lowercase.

2. resolve_destination_from_tags rejected the new tag shape
   The inverse function only accepted the legacy one-tag-per-dir
   shape (["models", "diffusers", "Kolors", "text_encoder"]).
   An upload using the slash-joined shape returned by /api/assets
   raised "unknown model category" or "invalid path component".

   Fix: pre-split every entry after tags[0] on "/" so both shapes
   resolve identically. For models, the first expanded segment is
   the category and the rest are subdirs; for input/output the
   full expansion becomes the subdirs.

3. Within-batch tag order was lost
   bulk_ingest wrote every tag in a single batch with the same
   added_at = current_time. The retrieval ORDER BY added_at, tag_name
   then fell back to the tag_name tiebreaker, sorting the path-derived
   pair alphabetically — putting "checkpoints/..." ahead of "models"
   since "c" < "m". The tags[0] = root contract was lost on bulk-
   ingested rows.

   Fix: stagger added_at by microseconds per tag index within a
   reference so the retrieval order matches the input list order.
   Path-derived tags now consistently land in position-0 = root,
   position-1 = subpath.

Tests
- TestGetNameAndTagsFromAssetPath updated: subpath is now lowercase.
- New TestResolveDestinationFromTags covers both tag shapes, the
  unknown-category case for slash-joined input, traversal rejection,
  and input/output paths.
- Full suite: 333 passed, 10 pre-existing skipped.
This commit is contained in:
Matt Miller 2026-05-19 20:30:04 -07:00
parent 36f9a6fdef
commit 3ffc49aa0e
3 changed files with 144 additions and 19 deletions

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import os import os
import uuid import uuid
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any, TypedDict from typing import TYPE_CHECKING, Any, TypedDict
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@ -233,13 +233,19 @@ def batch_insert_seed_assets(
if ref_id not in inserted_ref_ids: if ref_id not in inserted_ref_ids:
continue continue
for tag in ref_data["tags"]: # Stagger added_at by microsecond per tag within a reference so
# the retrieval ORDER BY added_at preserves the input list order
# (the path-derived root category stays at position 0). Without
# this, every tag in a bulk-insert batch shares current_time and
# the tag_name tiebreaker sorts them alphabetically — putting the
# subpath tag ahead of "models" since "c"/"d"/"l" < "m".
for tag_idx, tag in enumerate(ref_data["tags"]):
tag_rows.append( tag_rows.append(
{ {
"asset_reference_id": ref_id, "asset_reference_id": ref_id,
"tag_name": tag, "tag_name": tag,
"origin": "automatic", "origin": "automatic",
"added_at": current_time, "added_at": current_time + timedelta(microseconds=tag_idx),
} }
) )

View File

@ -26,27 +26,47 @@ def get_comfy_models_folders() -> list[tuple[str, list[str]]]:
def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]: def resolve_destination_from_tags(tags: list[str]) -> tuple[str, list[str]]:
"""Validates and maps tags -> (base_dir, subdirs_for_fs)""" """Validates and maps tags -> (base_dir, subdirs_for_fs).
Accepts both the legacy one-tag-per-directory shape
(``["models", "diffusers", "Kolors", "text_encoder"]``) and the
slash-joined shape emitted by :func:`get_name_and_tags_from_asset_path`
(``["models", "diffusers/Kolors/text_encoder"]``). Either shape is
split into a category (for ``models``) plus subdirs, with the same
safety checks applied to each component.
"""
if not tags: if not tags:
raise ValueError("tags must not be empty") raise ValueError("tags must not be empty")
root = tags[0].lower() root = tags[0].lower()
# Expand any slash-joined entries into individual path components so
# the rest of the function can treat both tag shapes uniformly. Each
# component is also stripped, so " a / b " behaves like ["a", "b"].
expanded: list[str] = []
for t in tags[1:]:
for part in str(t).split("/"):
part = part.strip()
if part:
expanded.append(part)
if root == "models": if root == "models":
if len(tags) < 2: if not expanded:
raise ValueError("at least two tags required for model asset") raise ValueError("at least two tags required for model asset")
category = expanded[0]
try: try:
bases = folder_paths.folder_names_and_paths[tags[1]][0] bases = folder_paths.folder_names_and_paths[category][0]
except KeyError: except KeyError:
raise ValueError(f"unknown model category '{tags[1]}'") raise ValueError(f"unknown model category '{category}'")
if not bases: if not bases:
raise ValueError(f"no base path configured for category '{tags[1]}'") raise ValueError(f"no base path configured for category '{category}'")
base_dir = os.path.abspath(bases[0]) base_dir = os.path.abspath(bases[0])
raw_subdirs = tags[2:] raw_subdirs = expanded[1:]
elif root == "input": elif root == "input":
base_dir = os.path.abspath(folder_paths.get_input_directory()) base_dir = os.path.abspath(folder_paths.get_input_directory())
raw_subdirs = tags[1:] raw_subdirs = expanded
elif root == "output": elif root == "output":
base_dir = os.path.abspath(folder_paths.get_output_directory()) base_dir = os.path.abspath(folder_paths.get_output_directory())
raw_subdirs = tags[1:] raw_subdirs = expanded
else: else:
raise ValueError(f"unknown root tag '{tags[0]}'; expected 'models', 'input', or 'output'") raise ValueError(f"unknown root tag '{tags[0]}'; expected 'models', 'input', or 'output'")
_sep_chars = frozenset(("/", "\\", os.sep)) _sep_chars = frozenset(("/", "\\", os.sep))
@ -166,11 +186,12 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
consumers can use ``tags[1]`` as a stable category identifier that consumers can use ``tags[1]`` as a stable category identifier that
survives nested directory layouts (e.g. diffusers components). survives nested directory layouts (e.g. diffusers components).
Case is preserved on the subpath so that consumers can look up Both the root category and the subpath are lowercased to match the
providers keyed on the original-case path (e.g. canonicalization applied by :func:`ensure_tags_exist`, otherwise the
``"diffusers/Kolors/text_encoder"``). The root category is always ``asset_reference_tags.tag_name`` FK to the lowercased
lowercase by construction in ``tags.name`` would fail for any path containing uppercase letters.
:func:`get_asset_category_and_relative_path`. Consumers that need to look up providers keyed on original-case
paths should normalize their lookup key to lowercase.
Raises: Raises:
ValueError: path does not belong to any known root. ValueError: path does not belong to any known root.
@ -182,5 +203,5 @@ def get_name_and_tags_from_asset_path(file_path: str) -> tuple[str, list[str]]:
] ]
tags = [root_category] tags = [root_category]
if parent_parts: if parent_parts:
tags.append("/".join(parent_parts)) tags.append("/".join(parent_parts).lower())
return p.name, list(dict.fromkeys(t.strip() for t in tags if t.strip())) return p.name, list(dict.fromkeys(t.strip() for t in tags if t.strip()))

View File

@ -9,6 +9,7 @@ import pytest
from app.assets.services.path_utils import ( from app.assets.services.path_utils import (
get_asset_category_and_relative_path, get_asset_category_and_relative_path,
get_name_and_tags_from_asset_path, get_name_and_tags_from_asset_path,
resolve_destination_from_tags,
) )
@ -159,7 +160,13 @@ class TestGetNameAndTagsFromAssetPath:
def test_diffusers_nested_subpath_slash_joined(self, fake_dirs_multi_bucket): def test_diffusers_nested_subpath_slash_joined(self, fake_dirs_multi_bucket):
"""Diffusers components live in nested directories — the full subpath """Diffusers components live in nested directories — the full subpath
must collapse into one tag so consumers can look up the model category must collapse into one tag so consumers can look up the model category
via tags[1] regardless of nesting depth.""" via tags[1] regardless of nesting depth.
The subpath is lowercased to match the canonicalization
:func:`ensure_tags_exist` applies on the write side; without that,
the asset_reference_tags.tag_name FK to tags.name would fail for
any path containing uppercase letters.
"""
nested = ( nested = (
fake_dirs_multi_bucket["diffusers"] fake_dirs_multi_bucket["diffusers"]
/ "Kolors" / "Kolors"
@ -170,7 +177,7 @@ class TestGetNameAndTagsFromAssetPath:
f.touch() f.touch()
name, tags = get_name_and_tags_from_asset_path(str(f)) name, tags = get_name_and_tags_from_asset_path(str(f))
assert name == "model.safetensors" assert name == "model.safetensors"
assert tags == ["models", "diffusers/Kolors/text_encoder"] assert tags == ["models", "diffusers/kolors/text_encoder"]
def test_deep_lora_user_subpath_slash_joined(self, fake_dirs_multi_bucket): def test_deep_lora_user_subpath_slash_joined(self, fake_dirs_multi_bucket):
"""User-created subdirectories under a model bucket also collapse to a """User-created subdirectories under a model bucket also collapse to a
@ -187,3 +194,94 @@ class TestGetNameAndTagsFromAssetPath:
name, tags = get_name_and_tags_from_asset_path(str(f)) name, tags = get_name_and_tags_from_asset_path(str(f))
assert name == "v0001.safetensors" assert name == "v0001.safetensors"
assert tags == ["models", "loras/my/custom/path"] assert tags == ["models", "loras/my/custom/path"]
class TestResolveDestinationFromTags:
"""resolve_destination_from_tags must accept both the legacy
one-tag-per-directory shape and the new slash-joined shape so that an
upload using the tags it just read back from /api/assets round-trips
to the right on-disk destination.
"""
@pytest.fixture
def resolve_dirs(self):
with tempfile.TemporaryDirectory() as root:
root_path = Path(root)
input_dir = root_path / "input"
output_dir = root_path / "output"
checkpoints_dir = root_path / "models" / "checkpoints"
diffusers_dir = root_path / "models" / "diffusers"
loras_dir = root_path / "models" / "loras"
for d in (input_dir, output_dir, checkpoints_dir, diffusers_dir, loras_dir):
d.mkdir(parents=True)
with patch("app.assets.services.path_utils.folder_paths") as mock_fp:
mock_fp.get_input_directory.return_value = str(input_dir)
mock_fp.get_output_directory.return_value = str(output_dir)
mock_fp.folder_names_and_paths = {
"checkpoints": ([str(checkpoints_dir)], None),
"diffusers": ([str(diffusers_dir)], None),
"loras": ([str(loras_dir)], None),
}
yield {
"input": input_dir,
"output": output_dir,
"checkpoints": checkpoints_dir,
"diffusers": diffusers_dir,
"loras": loras_dir,
}
def test_models_flat_category(self, resolve_dirs):
base, subdirs = resolve_destination_from_tags(["models", "checkpoints"])
assert base == str(resolve_dirs["checkpoints"])
assert subdirs == []
def test_models_slash_joined_new_shape(self, resolve_dirs):
# The shape get_name_and_tags_from_asset_path now emits.
base, subdirs = resolve_destination_from_tags(
["models", "diffusers/kolors/text_encoder"]
)
assert base == str(resolve_dirs["diffusers"])
assert subdirs == ["kolors", "text_encoder"]
def test_models_legacy_one_tag_per_dir(self, resolve_dirs):
# The legacy shape must still resolve identically.
base, subdirs = resolve_destination_from_tags(
["models", "diffusers", "kolors", "text_encoder"]
)
assert base == str(resolve_dirs["diffusers"])
assert subdirs == ["kolors", "text_encoder"]
def test_models_loras_slash_joined(self, resolve_dirs):
base, subdirs = resolve_destination_from_tags(
["models", "loras/my/custom/path"]
)
assert base == str(resolve_dirs["loras"])
assert subdirs == ["my", "custom", "path"]
def test_input_no_subdir(self, resolve_dirs):
base, subdirs = resolve_destination_from_tags(["input"])
assert base == str(resolve_dirs["input"])
assert subdirs == []
def test_input_slash_joined_subdir(self, resolve_dirs):
base, subdirs = resolve_destination_from_tags(["input", "portraits/2026"])
assert base == str(resolve_dirs["input"])
assert subdirs == ["portraits", "2026"]
def test_output_slash_joined_subdir(self, resolve_dirs):
base, subdirs = resolve_destination_from_tags(["output", "runs/abc"])
assert base == str(resolve_dirs["output"])
assert subdirs == ["runs", "abc"]
def test_unknown_category_rejected(self, resolve_dirs):
with pytest.raises(ValueError, match="unknown model category"):
resolve_destination_from_tags(["models", "not_a_real_category"])
def test_unknown_category_via_slash_joined(self, resolve_dirs):
# First segment of a slash-joined tag must still match a registered category.
with pytest.raises(ValueError, match="unknown model category 'bogus'"):
resolve_destination_from_tags(["models", "bogus/sub/path"])
def test_traversal_in_subdir_rejected(self, resolve_dirs):
with pytest.raises(ValueError, match="invalid path component"):
resolve_destination_from_tags(["models", "checkpoints/..", "evil"])