Fix enqueue_enrich race, missing tag seeding in ingest, and stale test mtime

- Make enqueue_enrich atomic by moving start_enrich call inside self._lock,
  preventing pending work from being lost when a scan finishes between the
  start attempt and the queue write.
- Call ensure_tags_exist before batch_insert_seed_assets in
  ingest_existing_file to avoid FK violations on asset_reference_tags.
- Fix test_enrich helper to use real file mtime instead of hardcoded value
  so the optimistic staleness guard in enrich_asset passes correctly.
- Add db_engine_fk fixture (SQLite with PRAGMA foreign_keys=ON) and a
  regression test proving ingest_existing_file seeds Tag rows before
  inserting reference tags.
This commit is contained in:
Luke Mino-Altherr 2026-03-24 15:33:53 -07:00
parent 83997d228d
commit 1b6f43eefb
5 changed files with 79 additions and 7 deletions

View File

@ -217,9 +217,9 @@ class _AssetSeeder:
Returns:
True if started immediately, False if queued for later
"""
if self.start_enrich(roots=roots, compute_hashes=compute_hashes):
return True
with self._lock:
if self.start_enrich(roots=roots, compute_hashes=compute_hashes):
return True
if self._pending_enrich is not None:
existing_roots = set(self._pending_enrich["roots"])
existing_roots.update(roots)

View File

@ -11,6 +11,7 @@ from app.assets.database.queries import (
add_tags_to_reference,
count_active_siblings,
create_stub_asset,
ensure_tags_exist,
fetch_reference_and_asset,
get_asset_by_hash,
get_reference_by_file_path,
@ -222,6 +223,8 @@ def ingest_existing_file(
"mime_type": mime_type,
"job_id": job_id,
}
if tags:
ensure_tags_exist(session, tags)
result = batch_insert_seed_assets(session, [spec], owner_id=owner_id)
session.commit()
return result.won_paths > 0

View File

@ -3,7 +3,7 @@ from pathlib import Path
from unittest.mock import patch
import pytest
from sqlalchemy import create_engine
from sqlalchemy import create_engine, event
from sqlalchemy.orm import Session
from app.assets.database.models import Base
@ -23,6 +23,21 @@ def db_engine():
return engine
@pytest.fixture
def db_engine_fk():
"""In-memory SQLite engine with foreign key enforcement enabled."""
engine = create_engine("sqlite:///:memory:")
@event.listens_for(engine, "connect")
def _set_pragma(dbapi_connection, connection_record):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
Base.metadata.create_all(engine)
return engine
@pytest.fixture
def session(db_engine):
"""Session fixture for tests that need direct DB access."""

View File

@ -1,9 +1,11 @@
"""Tests for asset enrichment (mime_type and hash population)."""
import os
from pathlib import Path
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetReference
from app.assets.services.file_utils import get_mtime_ns
from app.assets.scanner import (
ENRICHMENT_HASHED,
ENRICHMENT_METADATA,
@ -20,6 +22,13 @@ def _create_stub_asset(
name: str | None = None,
) -> tuple[Asset, AssetReference]:
"""Create a stub asset with reference for testing enrichment."""
# Use the real file's mtime so the optimistic guard in enrich_asset passes
try:
stat_result = os.stat(file_path, follow_symlinks=True)
mtime_ns = get_mtime_ns(stat_result)
except OSError:
mtime_ns = 1234567890000000000
asset = Asset(
id=asset_id,
hash=None,
@ -35,7 +44,7 @@ def _create_stub_asset(
name=name or f"test-asset-{asset_id}",
owner_id="system",
file_path=file_path,
mtime_ns=1234567890000000000,
mtime_ns=mtime_ns,
enrichment_level=ENRICHMENT_STUB,
)
session.add(ref)

View File

@ -1,12 +1,18 @@
"""Tests for ingest services."""
from contextlib import contextmanager
from pathlib import Path
from unittest.mock import patch
import pytest
from sqlalchemy.orm import Session
from sqlalchemy.orm import Session as SASession, Session
from app.assets.database.models import Asset, AssetReference, Tag
from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, Tag
from app.assets.database.queries import get_reference_tags
from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
from app.assets.services.ingest import (
_ingest_file_from_path,
_register_existing_asset,
ingest_existing_file,
)
class TestIngestFileFromPath:
@ -235,3 +241,42 @@ class TestRegisterExistingAsset:
assert result.created is True
assert set(result.tags) == {"alpha", "beta"}
class TestIngestExistingFileTagFK:
"""Regression: ingest_existing_file must seed Tag rows before inserting
AssetReferenceTag rows, otherwise FK enforcement raises IntegrityError."""
def test_creates_tag_rows_before_reference_tags(self, db_engine_fk, temp_dir: Path):
"""With PRAGMA foreign_keys=ON, tags must exist in the tags table
before they can be referenced in asset_reference_tags."""
@contextmanager
def _create_session():
with SASession(db_engine_fk) as sess:
yield sess
file_path = temp_dir / "output.png"
file_path.write_bytes(b"image data")
with patch("app.assets.services.ingest.create_session", _create_session), \
patch(
"app.assets.services.ingest.get_name_and_tags_from_asset_path",
return_value=("output.png", ["output"]),
):
result = ingest_existing_file(
abs_path=str(file_path),
extra_tags=["my-job"],
)
assert result is True
with SASession(db_engine_fk) as sess:
tag_names = {t.name for t in sess.query(Tag).all()}
assert "output" in tag_names
assert "my-job" in tag_names
ref_tags = sess.query(AssetReferenceTag).all()
ref_tag_names = {rt.tag_name for rt in ref_tags}
assert "output" in ref_tag_names
assert "my-job" in ref_tag_names