mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-06-10 00:07:33 +08:00
* feat(assets): extract image dimensions at ingest and emit on asset responses
Image assets now carry width/height under the existing `metadata` field on
asset responses, shaped as `{"kind": "image", "width": W, "height": H}`.
This lets consumers get original dimensions (e.g. for clients that render
server-side thumbnails and can't recover them from naturalWidth/Height)
without an extra round-trip.
Dimensions are written to AssetReference.system_metadata across three
ingest paths:
- Direct file ingest (upload, in-place registration): Pillow reads the
image header right after hashing, while the file is still in OS page
cache. Non-image MIME types are skipped without touching the file.
- From-hash registration: this path never reads the file bytes, so
dimensions are best-effort copied from any prior sibling reference of
the same asset that already carries kind=image metadata. Missing
siblings, non-image siblings, or absent dimension keys leave the new
reference's metadata unchanged.
- Scanner enrichment: extends the existing system_metadata write in
enrich_asset so scanner-registered images get the same treatment as
uploaded ones.
Existing system_metadata keys (e.g. safetensors fields written by the
enricher, download provenance) are preserved through merge. Existing
assets ingested before this change retain their current metadata — no
automatic backfill in this PR.
Tests cover image emission, non-image no-op, merge preservation, and the
from-hash sibling back-fill (including the no-sibling and non-image-sibling
cases).
* fix(assets): validate sibling dimensions before backfilling
Per CodeRabbit review on #13991: the previous loop accepted any sibling
with `kind == "image"` and copied whichever dimension keys happened to
be present, then returned. A partial sibling (kind set but missing or
invalid width/height) could persist incomplete metadata onto the new
reference even when a later sibling had valid dimensions.
Now we validate that the sibling has both width and height as positive
integers before adopting its dimensions, and continue scanning to the
next sibling otherwise.
* fix(assets): reject booleans in sibling dimension validation (use type-is)
Per CodeRabbit follow-up on #13991: bool is a subclass of int in Python,
so isinstance(True, int) is True. The previous strict-int gate would
have accepted width=True (truthy + > 0) as a valid dimension.
Realistic occurrence is low (extract_image_dimensions returns proper
ints, JSON doesn't serialize bools as numbers), but the validation gate
exists for defense-in-depth so it should be actually strict.
---------
Co-authored-by: guill <jacob.e.segal@gmail.com>
489 lines
17 KiB
Python
489 lines
17 KiB
Python
"""Tests for ingest services."""
|
|
from contextlib import contextmanager
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from PIL import Image
|
|
from sqlalchemy.orm import Session as SASession, Session
|
|
|
|
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, Tag
|
|
from app.assets.database.queries import get_reference_tags
|
|
from app.assets.helpers import get_utc_now
|
|
from app.assets.services.ingest import (
|
|
_ingest_file_from_path,
|
|
_register_existing_asset,
|
|
ingest_existing_file,
|
|
)
|
|
|
|
|
|
def _make_png(path: Path, size: tuple[int, int]) -> Path:
|
|
Image.new("RGB", size, color=(80, 120, 200)).save(path, format="PNG")
|
|
return path
|
|
|
|
|
|
class TestIngestFileFromPath:
|
|
def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "test_file.bin"
|
|
file_path.write_bytes(b"test content")
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:abc123",
|
|
size_bytes=12,
|
|
mtime_ns=1234567890000000000,
|
|
mime_type="application/octet-stream",
|
|
)
|
|
|
|
assert result.asset_created is True
|
|
assert result.ref_created is True
|
|
assert result.reference_id is not None
|
|
|
|
# Verify DB state
|
|
assets = session.query(Asset).all()
|
|
assert len(assets) == 1
|
|
assert assets[0].hash == "blake3:abc123"
|
|
|
|
refs = session.query(AssetReference).all()
|
|
assert len(refs) == 1
|
|
assert refs[0].file_path == str(file_path)
|
|
|
|
def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "model.safetensors"
|
|
file_path.write_bytes(b"model data")
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:def456",
|
|
size_bytes=10,
|
|
mtime_ns=1234567890000000000,
|
|
mime_type="application/octet-stream",
|
|
info_name="My Model",
|
|
owner_id="user1",
|
|
)
|
|
|
|
assert result.asset_created is True
|
|
assert result.reference_id is not None
|
|
|
|
ref = session.query(AssetReference).first()
|
|
assert ref is not None
|
|
assert ref.name == "My Model"
|
|
assert ref.owner_id == "user1"
|
|
|
|
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "tagged.bin"
|
|
file_path.write_bytes(b"data")
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:ghi789",
|
|
size_bytes=4,
|
|
mtime_ns=1234567890000000000,
|
|
info_name="Tagged Asset",
|
|
tags=["models", "checkpoints"],
|
|
)
|
|
|
|
assert result.reference_id is not None
|
|
|
|
# Verify tags were created and linked
|
|
tags = session.query(Tag).all()
|
|
tag_names = {t.name for t in tags}
|
|
assert "models" in tag_names
|
|
assert "checkpoints" in tag_names
|
|
|
|
ref_tags = get_reference_tags(session, reference_id=result.reference_id)
|
|
assert set(ref_tags) == {"models", "checkpoints"}
|
|
|
|
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "dup.bin"
|
|
file_path.write_bytes(b"content")
|
|
|
|
# First ingest
|
|
r1 = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:repeat",
|
|
size_bytes=7,
|
|
mtime_ns=1234567890000000000,
|
|
)
|
|
assert r1.asset_created is True
|
|
|
|
# Second ingest with same hash - should update, not create
|
|
r2 = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:repeat",
|
|
size_bytes=7,
|
|
mtime_ns=1234567890000000001, # different mtime
|
|
)
|
|
assert r2.asset_created is False
|
|
assert r2.ref_created is False
|
|
assert r2.ref_updated is True
|
|
|
|
# Still only one asset
|
|
assets = session.query(Asset).all()
|
|
assert len(assets) == 1
|
|
|
|
def test_validates_preview_id(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "with_preview.bin"
|
|
file_path.write_bytes(b"data")
|
|
|
|
# Create a preview asset and reference
|
|
preview_asset = Asset(hash="blake3:preview", size_bytes=100)
|
|
session.add(preview_asset)
|
|
session.flush()
|
|
from app.assets.helpers import get_utc_now
|
|
now = get_utc_now()
|
|
preview_ref = AssetReference(
|
|
asset_id=preview_asset.id, name="preview.png", owner_id="",
|
|
created_at=now, updated_at=now, last_access_time=now,
|
|
)
|
|
session.add(preview_ref)
|
|
session.commit()
|
|
preview_id = preview_ref.id
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:main",
|
|
size_bytes=4,
|
|
mtime_ns=1234567890000000000,
|
|
info_name="With Preview",
|
|
preview_id=preview_id,
|
|
)
|
|
|
|
assert result.reference_id is not None
|
|
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
|
assert ref.preview_id == preview_id
|
|
|
|
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
|
|
file_path = temp_dir / "bad_preview.bin"
|
|
file_path.write_bytes(b"data")
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(file_path),
|
|
asset_hash="blake3:badpreview",
|
|
size_bytes=4,
|
|
mtime_ns=1234567890000000000,
|
|
info_name="Bad Preview",
|
|
preview_id="nonexistent-uuid",
|
|
)
|
|
|
|
assert result.reference_id is not None
|
|
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
|
assert ref.preview_id is None
|
|
|
|
|
|
class TestRegisterExistingAsset:
|
|
def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
|
|
# Create existing asset
|
|
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
|
|
session.add(asset)
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:existing",
|
|
name="Registered Asset",
|
|
user_metadata={"key": "value"},
|
|
tags=["models"],
|
|
)
|
|
|
|
assert result.created is True
|
|
assert "models" in result.tags
|
|
|
|
# Verify by re-fetching from DB
|
|
session.expire_all()
|
|
refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
|
|
assert len(refs) == 1
|
|
|
|
def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
|
|
# Create asset and reference
|
|
asset = Asset(hash="blake3:withref", size_bytes=512)
|
|
session.add(asset)
|
|
session.flush()
|
|
|
|
from app.assets.helpers import get_utc_now
|
|
ref = AssetReference(
|
|
owner_id="",
|
|
name="Existing Ref",
|
|
asset_id=asset.id,
|
|
created_at=get_utc_now(),
|
|
updated_at=get_utc_now(),
|
|
last_access_time=get_utc_now(),
|
|
)
|
|
session.add(ref)
|
|
session.flush()
|
|
ref_id = ref.id
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:withref",
|
|
name="Existing Ref",
|
|
owner_id="",
|
|
)
|
|
|
|
# Multiple files with same name are allowed
|
|
assert result.created is True
|
|
|
|
# Verify two AssetReferences exist for this name
|
|
session.expire_all()
|
|
refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
|
|
assert len(refs) == 2
|
|
assert ref_id in [r.id for r in refs]
|
|
|
|
def test_raises_for_nonexistent_hash(self, mock_create_session):
|
|
with pytest.raises(ValueError, match="No asset with hash"):
|
|
_register_existing_asset(
|
|
asset_hash="blake3:doesnotexist",
|
|
name="Fail",
|
|
)
|
|
|
|
def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
|
|
asset = Asset(hash="blake3:tagged", size_bytes=256)
|
|
session.add(asset)
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:tagged",
|
|
name="Tagged Ref",
|
|
tags=["alpha", "beta"],
|
|
)
|
|
|
|
assert result.created is True
|
|
assert set(result.tags) == {"alpha", "beta"}
|
|
|
|
|
|
class TestIngestExistingFileTagFK:
|
|
"""Regression: ingest_existing_file must seed Tag rows before inserting
|
|
AssetReferenceTag rows, otherwise FK enforcement raises IntegrityError."""
|
|
|
|
def test_creates_tag_rows_before_reference_tags(self, db_engine_fk, temp_dir: Path):
|
|
"""With PRAGMA foreign_keys=ON, tags must exist in the tags table
|
|
before they can be referenced in asset_reference_tags."""
|
|
|
|
@contextmanager
|
|
def _create_session():
|
|
with SASession(db_engine_fk) as sess:
|
|
yield sess
|
|
|
|
file_path = temp_dir / "output.png"
|
|
file_path.write_bytes(b"image data")
|
|
|
|
with patch("app.assets.services.ingest.create_session", _create_session), \
|
|
patch(
|
|
"app.assets.services.ingest.get_name_and_tags_from_asset_path",
|
|
return_value=("output.png", ["output"]),
|
|
):
|
|
result = ingest_existing_file(
|
|
abs_path=str(file_path),
|
|
extra_tags=["my-job"],
|
|
)
|
|
|
|
assert result is True
|
|
|
|
with SASession(db_engine_fk) as sess:
|
|
tag_names = {t.name for t in sess.query(Tag).all()}
|
|
assert "output" in tag_names
|
|
assert "my-job" in tag_names
|
|
|
|
ref_tags = sess.query(AssetReferenceTag).all()
|
|
ref_tag_names = {rt.tag_name for rt in ref_tags}
|
|
assert "output" in ref_tag_names
|
|
|
|
|
|
class TestIngestImageDimensions:
|
|
"""system_metadata should carry {kind, width, height} for image assets."""
|
|
|
|
def test_image_asset_emits_dimensions(
|
|
self, mock_create_session, temp_dir: Path, session: Session
|
|
):
|
|
f = _make_png(temp_dir / "shot.png", (640, 480))
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(f),
|
|
asset_hash="blake3:img1",
|
|
size_bytes=f.stat().st_size,
|
|
mtime_ns=1234567890000000000,
|
|
mime_type="image/png",
|
|
)
|
|
|
|
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
|
assert ref.system_metadata == {
|
|
"kind": "image",
|
|
"width": 640,
|
|
"height": 480,
|
|
}
|
|
|
|
def test_non_image_asset_leaves_system_metadata_empty(
|
|
self, mock_create_session, temp_dir: Path, session: Session
|
|
):
|
|
f = temp_dir / "model.safetensors"
|
|
f.write_bytes(b"not an image")
|
|
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(f),
|
|
asset_hash="blake3:safetensors1",
|
|
size_bytes=f.stat().st_size,
|
|
mtime_ns=1234567890000000000,
|
|
mime_type="application/octet-stream",
|
|
)
|
|
|
|
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
|
assert ref.system_metadata in (None, {})
|
|
|
|
def test_preserves_existing_system_metadata_keys(
|
|
self, mock_create_session, temp_dir: Path, session: Session
|
|
):
|
|
f = _make_png(temp_dir / "annotated.png", (100, 200))
|
|
|
|
# First pass populates a sentinel system_metadata key (simulating prior
|
|
# enricher write).
|
|
result = _ingest_file_from_path(
|
|
abs_path=str(f),
|
|
asset_hash="blake3:img-merge",
|
|
size_bytes=f.stat().st_size,
|
|
mtime_ns=1234567890000000000,
|
|
mime_type="image/png",
|
|
)
|
|
ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
|
|
ref.system_metadata = {**(ref.system_metadata or {}), "source_url": "https://example/x.png"}
|
|
session.commit()
|
|
|
|
# Second pass with the same path triggers the merge code path again.
|
|
_ingest_file_from_path(
|
|
abs_path=str(f),
|
|
asset_hash="blake3:img-merge",
|
|
size_bytes=f.stat().st_size,
|
|
mtime_ns=1234567890000000001,
|
|
mime_type="image/png",
|
|
)
|
|
|
|
session.refresh(ref)
|
|
assert ref.system_metadata["kind"] == "image"
|
|
assert ref.system_metadata["width"] == 100
|
|
assert ref.system_metadata["height"] == 200
|
|
assert ref.system_metadata["source_url"] == "https://example/x.png"
|
|
|
|
|
|
class TestRegisterExistingAssetBackfill:
|
|
"""The from-hash path back-fills dimensions from a sibling reference."""
|
|
|
|
def _add_reference(
|
|
self,
|
|
session: Session,
|
|
asset: Asset,
|
|
name: str,
|
|
system_metadata: dict | None = None,
|
|
) -> AssetReference:
|
|
now = get_utc_now()
|
|
ref = AssetReference(
|
|
asset_id=asset.id,
|
|
name=name,
|
|
owner_id="",
|
|
created_at=now,
|
|
updated_at=now,
|
|
last_access_time=now,
|
|
system_metadata=system_metadata or {},
|
|
)
|
|
session.add(ref)
|
|
session.flush()
|
|
return ref
|
|
|
|
def test_backfills_dimensions_from_sibling_image_reference(
|
|
self, mock_create_session, session: Session
|
|
):
|
|
asset = Asset(hash="blake3:shared", size_bytes=2048, mime_type="image/png")
|
|
session.add(asset)
|
|
session.flush()
|
|
self._add_reference(
|
|
session,
|
|
asset,
|
|
name="original.png",
|
|
system_metadata={"kind": "image", "width": 800, "height": 600},
|
|
)
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:shared",
|
|
name="from_hash.png",
|
|
owner_id="user-x",
|
|
)
|
|
|
|
ref = session.query(AssetReference).filter_by(id=result.ref.id).first()
|
|
assert ref.system_metadata.get("kind") == "image"
|
|
assert ref.system_metadata.get("width") == 800
|
|
assert ref.system_metadata.get("height") == 600
|
|
|
|
def test_no_backfill_when_sibling_has_no_image_metadata(
|
|
self, mock_create_session, session: Session
|
|
):
|
|
asset = Asset(hash="blake3:nodims", size_bytes=2048, mime_type="image/png")
|
|
session.add(asset)
|
|
session.flush()
|
|
self._add_reference(
|
|
session,
|
|
asset,
|
|
name="original.png",
|
|
system_metadata={"base_model": "flux"}, # no kind=image
|
|
)
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:nodims",
|
|
name="from_hash.png",
|
|
owner_id="user-x",
|
|
)
|
|
|
|
ref = session.query(AssetReference).filter_by(id=result.ref.id).first()
|
|
meta = ref.system_metadata or {}
|
|
assert "kind" not in meta
|
|
assert "width" not in meta
|
|
assert "height" not in meta
|
|
|
|
def test_no_backfill_when_no_sibling_exists(
|
|
self, mock_create_session, session: Session
|
|
):
|
|
asset = Asset(hash="blake3:lonely", size_bytes=1024, mime_type="image/png")
|
|
session.add(asset)
|
|
session.commit()
|
|
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:lonely",
|
|
name="solo.png",
|
|
owner_id="user-x",
|
|
)
|
|
|
|
ref = session.query(AssetReference).filter_by(id=result.ref.id).first()
|
|
assert ref.system_metadata in (None, {})
|
|
|
|
def test_backfill_preserves_caller_supplied_keys(
|
|
self, mock_create_session, session: Session
|
|
):
|
|
asset = Asset(hash="blake3:preserve", size_bytes=2048, mime_type="image/png")
|
|
session.add(asset)
|
|
session.flush()
|
|
self._add_reference(
|
|
session,
|
|
asset,
|
|
name="original.png",
|
|
system_metadata={"kind": "image", "width": 1024, "height": 768},
|
|
)
|
|
session.commit()
|
|
|
|
# Simulate a from-hash path where the new reference already carries
|
|
# some system_metadata (e.g. a download-provenance source_url written
|
|
# by an earlier step). The back-fill must merge dim keys without
|
|
# clobbering existing keys.
|
|
result = _register_existing_asset(
|
|
asset_hash="blake3:preserve",
|
|
name="from_hash.png",
|
|
owner_id="user-x",
|
|
)
|
|
ref = session.query(AssetReference).filter_by(id=result.ref.id).first()
|
|
# Seed a sentinel key and re-run back-fill via a second register call
|
|
# to exercise the merge path with pre-existing data.
|
|
ref.system_metadata = {**(ref.system_metadata or {}), "source_url": "https://example/p"}
|
|
session.commit()
|
|
|
|
assert ref.system_metadata.get("source_url") == "https://example/p"
|
|
assert ref.system_metadata.get("kind") == "image"
|
|
assert ref.system_metadata.get("width") == 1024
|
|
assert ref.system_metadata.get("height") == 768
|