optimization: make list_unhashed_candidates_under_prefixes single-query instead of N+1

This commit is contained in:
bigcat88 2025-09-15 12:46:35 +03:00
parent 025fc49b4e
commit 5f187fe6fb
No known key found for this signature in database
GPG Key ID: 1F0BF0EC3CF22721

View File

@ -408,9 +408,7 @@ async def compute_hash_and_dedup_for_cache_state(
raise raise
async def list_unhashed_candidates_under_prefixes( async def list_unhashed_candidates_under_prefixes(session: AsyncSession, *, prefixes: list[str]) -> list[int]:
session: AsyncSession, *, prefixes: Sequence[str]
) -> list[int]:
if not prefixes: if not prefixes:
return [] return []
@ -421,23 +419,25 @@ async def list_unhashed_candidates_under_prefixes(
base += os.sep base += os.sep
conds.append(AssetCacheState.file_path.like(base + "%")) conds.append(AssetCacheState.file_path.like(base + "%"))
rows = ( path_filter = sa.or_(*conds) if len(conds) > 1 else conds[0]
await session.execute( if session.bind.dialect.name == "postgresql":
stmt = (
sa.select(AssetCacheState.id) sa.select(AssetCacheState.id)
.join(Asset, Asset.id == AssetCacheState.asset_id) .join(Asset, Asset.id == AssetCacheState.asset_id)
.where(Asset.hash.is_(None)) .where(Asset.hash.is_(None), path_filter)
.where(sa.or_(*conds))
.order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc()) .order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc())
.distinct(AssetCacheState.asset_id)
) )
).scalars().all() else:
seen = set() first_id = sa.func.min(AssetCacheState.id).label("first_id")
result: list[int] = [] stmt = (
for sid in rows: sa.select(first_id)
st = await session.get(AssetCacheState, sid) .join(Asset, Asset.id == AssetCacheState.asset_id)
if st and st.asset_id not in seen: .where(Asset.hash.is_(None), path_filter)
seen.add(st.asset_id) .group_by(AssetCacheState.asset_id)
result.append(sid) .order_by(first_id.asc())
return result )
return [int(x) for x in (await session.execute(stmt)).scalars().all()]
async def list_verify_candidates_under_prefixes( async def list_verify_candidates_under_prefixes(