From 1b6f43eefb3c624fb91dd781428fb09fc3d6f06b Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Tue, 24 Mar 2026 15:33:53 -0700 Subject: [PATCH] Fix enqueue_enrich race, missing tag seeding in ingest, and stale test mtime - Make enqueue_enrich atomic by moving start_enrich call inside self._lock, preventing pending work from being lost when a scan finishes between the start attempt and the queue write. - Call ensure_tags_exist before batch_insert_seed_assets in ingest_existing_file to avoid FK violations on asset_reference_tags. - Fix test_enrich helper to use real file mtime instead of hardcoded value so the optimistic staleness guard in enrich_asset passes correctly. - Add db_engine_fk fixture (SQLite with PRAGMA foreign_keys=ON) and a regression test proving ingest_existing_file seeds Tag rows before inserting reference tags. --- app/assets/seeder.py | 4 +- app/assets/services/ingest.py | 3 ++ tests-unit/assets_test/services/conftest.py | 17 ++++++- .../assets_test/services/test_enrich.py | 11 +++- .../assets_test/services/test_ingest.py | 51 +++++++++++++++++-- 5 files changed, 79 insertions(+), 7 deletions(-) diff --git a/app/assets/seeder.py b/app/assets/seeder.py index 1ae2d3149..2262928e5 100644 --- a/app/assets/seeder.py +++ b/app/assets/seeder.py @@ -217,9 +217,9 @@ class _AssetSeeder: Returns: True if started immediately, False if queued for later """ - if self.start_enrich(roots=roots, compute_hashes=compute_hashes): - return True with self._lock: + if self.start_enrich(roots=roots, compute_hashes=compute_hashes): + return True if self._pending_enrich is not None: existing_roots = set(self._pending_enrich["roots"]) existing_roots.update(roots) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 7899b68cf..fe97691bb 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -11,6 +11,7 @@ from app.assets.database.queries import ( add_tags_to_reference, count_active_siblings, create_stub_asset, + ensure_tags_exist, fetch_reference_and_asset, get_asset_by_hash, get_reference_by_file_path, @@ -222,6 +223,8 @@ def ingest_existing_file( "mime_type": mime_type, "job_id": job_id, } + if tags: + ensure_tags_exist(session, tags) result = batch_insert_seed_assets(session, [spec], owner_id=owner_id) session.commit() return result.won_paths > 0 diff --git a/tests-unit/assets_test/services/conftest.py b/tests-unit/assets_test/services/conftest.py index 31c763d48..bc0723e61 100644 --- a/tests-unit/assets_test/services/conftest.py +++ b/tests-unit/assets_test/services/conftest.py @@ -3,7 +3,7 @@ from pathlib import Path from unittest.mock import patch import pytest -from sqlalchemy import create_engine +from sqlalchemy import create_engine, event from sqlalchemy.orm import Session from app.assets.database.models import Base @@ -23,6 +23,21 @@ def db_engine(): return engine +@pytest.fixture +def db_engine_fk(): + """In-memory SQLite engine with foreign key enforcement enabled.""" + engine = create_engine("sqlite:///:memory:") + + @event.listens_for(engine, "connect") + def _set_pragma(dbapi_connection, connection_record): + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + + Base.metadata.create_all(engine) + return engine + + @pytest.fixture def session(db_engine): """Session fixture for tests that need direct DB access.""" diff --git a/tests-unit/assets_test/services/test_enrich.py b/tests-unit/assets_test/services/test_enrich.py index 2bd79a01a..6a6561f7f 100644 --- a/tests-unit/assets_test/services/test_enrich.py +++ b/tests-unit/assets_test/services/test_enrich.py @@ -1,9 +1,11 @@ """Tests for asset enrichment (mime_type and hash population).""" +import os from pathlib import Path from sqlalchemy.orm import Session from app.assets.database.models import Asset, AssetReference +from app.assets.services.file_utils import get_mtime_ns from app.assets.scanner import ( ENRICHMENT_HASHED, ENRICHMENT_METADATA, @@ -20,6 +22,13 @@ def _create_stub_asset( name: str | None = None, ) -> tuple[Asset, AssetReference]: """Create a stub asset with reference for testing enrichment.""" + # Use the real file's mtime so the optimistic guard in enrich_asset passes + try: + stat_result = os.stat(file_path, follow_symlinks=True) + mtime_ns = get_mtime_ns(stat_result) + except OSError: + mtime_ns = 1234567890000000000 + asset = Asset( id=asset_id, hash=None, @@ -35,7 +44,7 @@ def _create_stub_asset( name=name or f"test-asset-{asset_id}", owner_id="system", file_path=file_path, - mtime_ns=1234567890000000000, + mtime_ns=mtime_ns, enrichment_level=ENRICHMENT_STUB, ) session.add(ref) diff --git a/tests-unit/assets_test/services/test_ingest.py b/tests-unit/assets_test/services/test_ingest.py index dbb8441c2..b153f9795 100644 --- a/tests-unit/assets_test/services/test_ingest.py +++ b/tests-unit/assets_test/services/test_ingest.py @@ -1,12 +1,18 @@ """Tests for ingest services.""" +from contextlib import contextmanager from pathlib import Path +from unittest.mock import patch import pytest -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session as SASession, Session -from app.assets.database.models import Asset, AssetReference, Tag +from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, Tag from app.assets.database.queries import get_reference_tags -from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset +from app.assets.services.ingest import ( + _ingest_file_from_path, + _register_existing_asset, + ingest_existing_file, +) class TestIngestFileFromPath: @@ -235,3 +241,42 @@ class TestRegisterExistingAsset: assert result.created is True assert set(result.tags) == {"alpha", "beta"} + + +class TestIngestExistingFileTagFK: + """Regression: ingest_existing_file must seed Tag rows before inserting + AssetReferenceTag rows, otherwise FK enforcement raises IntegrityError.""" + + def test_creates_tag_rows_before_reference_tags(self, db_engine_fk, temp_dir: Path): + """With PRAGMA foreign_keys=ON, tags must exist in the tags table + before they can be referenced in asset_reference_tags.""" + + @contextmanager + def _create_session(): + with SASession(db_engine_fk) as sess: + yield sess + + file_path = temp_dir / "output.png" + file_path.write_bytes(b"image data") + + with patch("app.assets.services.ingest.create_session", _create_session), \ + patch( + "app.assets.services.ingest.get_name_and_tags_from_asset_path", + return_value=("output.png", ["output"]), + ): + result = ingest_existing_file( + abs_path=str(file_path), + extra_tags=["my-job"], + ) + + assert result is True + + with SASession(db_engine_fk) as sess: + tag_names = {t.name for t in sess.query(Tag).all()} + assert "output" in tag_names + assert "my-job" in tag_names + + ref_tags = sess.query(AssetReferenceTag).all() + ref_tag_names = {rt.tag_name for rt in ref_tags} + assert "output" in ref_tag_names + assert "my-job" in ref_tag_names