Add pruning of Assets not reachable through the current configs (#12168)

* Not sure about this one, but try removing assets from old sessions.

* Simplify _prune_orphaned_assets: merge functions, use list comprehensions

Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927
Co-authored-by: Amp <amp@ampcode.com>

* Refactor _prune_orphaned_assets for readability

Amp-Thread-ID: https://ampcode.com/threads/T-019c0917-0dc3-75ab-870d-a32b3fdc1927
Co-authored-by: Amp <amp@ampcode.com>

* Add unit tests for pruning

* Add unit tests for _prune_orphaned_assets

Tests cover:

- Orphaned seed assets pruned when file removed

- Seed assets with valid files survive

- Hashed assets not pruned even without file

- Multi-root pruning

- SQL LIKE escape handling for %, _, spaces

Amp-Thread-ID: https://ampcode.com/threads/T-019c0c7a-5c8a-7548-b6c3-823e9829ce74
Co-authored-by: Amp <amp@ampcode.com>

* Ruff fix

---------

Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Alexander Brown
2026-01-29 18:34:14 -08:00
committed by GitHub
parent a999cbcfbc
commit 942b2a6526
2 changed files with 176 additions and 1 deletions

View File

@@ -27,6 +27,7 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
t_start = time.perf_counter()
created = 0
skipped_existing = 0
orphans_pruned = 0
paths: list[str] = []
try:
existing_paths: set[str] = set()
@@ -38,6 +39,11 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
except Exception as e:
logging.exception("fast DB scan failed for %s: %s", r, e)
try:
orphans_pruned = _prune_orphaned_assets(roots)
except Exception as e:
logging.exception("orphan pruning failed: %s", e)
if "models" in roots:
paths.extend(collect_models_files())
if "input" in roots:
@@ -85,15 +91,43 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
finally:
if enable_logging:
logging.info(
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
roots,
time.perf_counter() - t_start,
created,
skipped_existing,
orphans_pruned,
len(paths),
)
def _prune_orphaned_assets(roots: tuple[RootType, ...]) -> int:
"""Prune cache states outside configured prefixes, then delete orphaned seed assets."""
all_prefixes = [os.path.abspath(p) for r in roots for p in prefixes_for_root(r)]
if not all_prefixes:
return 0
def make_prefix_condition(prefix: str):
base = prefix if prefix.endswith(os.sep) else prefix + os.sep
escaped, esc = escape_like_prefix(base)
return AssetCacheState.file_path.like(escaped + "%", escape=esc)
matches_valid_prefix = sqlalchemy.or_(*[make_prefix_condition(p) for p in all_prefixes])
orphan_subq = (
sqlalchemy.select(Asset.id)
.outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id)
.where(Asset.hash.is_(None), AssetCacheState.id.is_(None))
).scalar_subquery()
with create_session() as sess:
sess.execute(sqlalchemy.delete(AssetCacheState).where(~matches_valid_prefix))
sess.execute(sqlalchemy.delete(AssetInfo).where(AssetInfo.asset_id.in_(orphan_subq)))
result = sess.execute(sqlalchemy.delete(Asset).where(Asset.id.in_(orphan_subq)))
sess.commit()
return result.rowcount
def _fast_db_consistency_pass(
root: RootType,
*,