diff --git a/alembic_db/versions/0002_add_is_missing_to_cache_state.py b/alembic_db/versions/0002_add_is_missing_to_cache_state.py new file mode 100644 index 000000000..04377bebc --- /dev/null +++ b/alembic_db/versions/0002_add_is_missing_to_cache_state.py @@ -0,0 +1,37 @@ +""" +Add is_missing column to asset_cache_state for non-destructive soft-delete + +Revision ID: 0002_add_is_missing +Revises: 0001_assets +Create Date: 2025-02-05 00:00:00 +""" + +from alembic import op +import sqlalchemy as sa + +revision = "0002_add_is_missing" +down_revision = "0001_assets" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "asset_cache_state", + sa.Column( + "is_missing", + sa.Boolean(), + nullable=False, + server_default=sa.text("false"), + ), + ) + op.create_index( + "ix_asset_cache_state_is_missing", + "asset_cache_state", + ["is_missing"], + ) + + +def downgrade() -> None: + op.drop_index("ix_asset_cache_state_is_missing", table_name="asset_cache_state") + op.drop_column("asset_cache_state", "is_missing") diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index af29b5f22..93db534e8 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -698,20 +698,21 @@ async def cancel_seed(request: web.Request) -> web.Response: @ROUTES.post("/api/assets/prune") -async def prune_orphans(request: web.Request) -> web.Response: - """Prune orphaned assets that no longer exist on the filesystem. +async def mark_missing_assets(request: web.Request) -> web.Response: + """Mark assets as missing when their cache states point to files outside all known root prefixes. - This removes assets whose cache states point to files outside all known - root prefixes (models, input, output). + This is a non-destructive soft-delete operation. Assets and their metadata + are preserved, but cache states are flagged as missing. They can be restored + if the file reappears in a future scan. Returns: - 200 OK with count of pruned assets + 200 OK with count of marked assets 409 Conflict if a scan is currently running """ - pruned = asset_seeder.prune_orphans() - if pruned == 0 and asset_seeder.get_status().state.value != "IDLE": + marked = asset_seeder.mark_missing_outside_prefixes() + if marked == 0 and asset_seeder.get_status().state.value != "IDLE": return web.json_response( - {"status": "scan_running", "pruned": 0}, + {"status": "scan_running", "marked": 0}, status=409, ) - return web.json_response({"status": "completed", "pruned": pruned}, status=200) + return web.json_response({"status": "completed", "marked": marked}, status=200) diff --git a/app/assets/database/models.py b/app/assets/database/models.py index 67914d9df..3338ce789 100644 --- a/app/assets/database/models.py +++ b/app/assets/database/models.py @@ -83,12 +83,14 @@ class AssetCacheState(Base): file_path: Mapped[str] = mapped_column(Text, nullable=False) mtime_ns: Mapped[int | None] = mapped_column(BigInteger, nullable=True) needs_verify: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + is_missing: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) asset: Mapped[Asset] = relationship(back_populates="cache_states") __table_args__ = ( Index("ix_asset_cache_state_file_path", "file_path"), Index("ix_asset_cache_state_asset_id", "asset_id"), + Index("ix_asset_cache_state_is_missing", "is_missing"), CheckConstraint( "(mtime_ns IS NULL) OR (mtime_ns >= 0)", name="ck_acs_mtime_nonneg" ), diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 4edaed318..ea526e549 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -28,12 +28,13 @@ from app.assets.database.queries.cache_state import ( bulk_set_needs_verify, delete_assets_by_ids, delete_cache_states_by_ids, - delete_cache_states_outside_prefixes, delete_orphaned_seed_asset, get_cache_states_by_paths_and_asset_ids, get_cache_states_for_prefixes, - get_orphaned_seed_asset_ids, + get_unreferenced_unhashed_asset_ids, list_cache_states_by_asset_id, + mark_cache_states_missing_outside_prefixes, + restore_cache_states_by_paths, upsert_cache_state, ) from app.assets.database.queries.tags import ( @@ -68,7 +69,6 @@ __all__ = [ "delete_asset_info_by_id", "delete_assets_by_ids", "delete_cache_states_by_ids", - "delete_cache_states_outside_prefixes", "delete_orphaned_seed_asset", "ensure_tags_exist", "fetch_asset_info_and_asset", @@ -80,13 +80,15 @@ __all__ = [ "get_cache_states_by_paths_and_asset_ids", "get_cache_states_for_prefixes", "get_or_create_asset_info", - "get_orphaned_seed_asset_ids", + "get_unreferenced_unhashed_asset_ids", "insert_asset_info", "list_asset_infos_page", "list_cache_states_by_asset_id", "list_tags_with_usage", + "mark_cache_states_missing_outside_prefixes", "remove_missing_tag_for_asset_id", "remove_tags_from_asset_info", + "restore_cache_states_by_paths", "set_asset_info_metadata", "set_asset_info_preview", "set_asset_info_tags", diff --git a/app/assets/database/queries/cache_state.py b/app/assets/database/queries/cache_state.py index 5a304a641..9ecd8d7f4 100644 --- a/app/assets/database/queries/cache_state.py +++ b/app/assets/database/queries/cache_state.py @@ -49,11 +49,15 @@ def upsert_cache_state( file_path: str, mtime_ns: int, ) -> tuple[bool, bool]: - """Upsert a cache state by file_path. Returns (created, updated).""" + """Upsert a cache state by file_path. Returns (created, updated). + + Also restores cache states that were previously marked as missing. + """ vals = { "asset_id": asset_id, "file_path": file_path, "mtime_ns": int(mtime_ns), + "is_missing": False, } ins = ( sqlite.insert(AssetCacheState) @@ -74,26 +78,30 @@ def upsert_cache_state( AssetCacheState.asset_id != asset_id, AssetCacheState.mtime_ns.is_(None), AssetCacheState.mtime_ns != int(mtime_ns), + AssetCacheState.is_missing == True, # noqa: E712 ) ) - .values(asset_id=asset_id, mtime_ns=int(mtime_ns)) + .values(asset_id=asset_id, mtime_ns=int(mtime_ns), is_missing=False) ) res2 = session.execute(upd) updated = int(res2.rowcount or 0) > 0 return False, updated -def delete_cache_states_outside_prefixes( +def mark_cache_states_missing_outside_prefixes( session: Session, valid_prefixes: list[str] ) -> int: - """Delete cache states with file_path not matching any of the valid prefixes. + """Mark cache states as missing when file_path doesn't match any valid prefix. + + This is a non-destructive soft-delete that preserves user metadata. + Cache states can be restored if the file reappears in a future scan. Args: session: Database session valid_prefixes: List of absolute directory prefixes that are valid Returns: - Number of cache states deleted + Number of cache states marked as missing """ if not valid_prefixes: return 0 @@ -104,22 +112,59 @@ def delete_cache_states_outside_prefixes( return AssetCacheState.file_path.like(escaped + "%", escape=esc) matches_valid_prefix = sa.or_(*[make_prefix_condition(p) for p in valid_prefixes]) - result = session.execute(sa.delete(AssetCacheState).where(~matches_valid_prefix)) + result = session.execute( + sa.update(AssetCacheState) + .where(~matches_valid_prefix) + .where(AssetCacheState.is_missing == False) # noqa: E712 + .values(is_missing=True) + ) return result.rowcount -def get_orphaned_seed_asset_ids(session: Session) -> list[str]: - """Get IDs of seed assets (hash is None) with no remaining cache states. +def restore_cache_states_by_paths(session: Session, file_paths: list[str]) -> int: + """Restore cache states that were previously marked as missing. + + Called when a file path is re-scanned and found to exist. + + Args: + session: Database session + file_paths: List of file paths that exist and should be restored Returns: - List of asset IDs that are orphaned + Number of cache states restored """ - orphan_subq = ( - sa.select(Asset.id) - .outerjoin(AssetCacheState, AssetCacheState.asset_id == Asset.id) - .where(Asset.hash.is_(None), AssetCacheState.id.is_(None)) + if not file_paths: + return 0 + + result = session.execute( + sa.update(AssetCacheState) + .where(AssetCacheState.file_path.in_(file_paths)) + .where(AssetCacheState.is_missing == True) # noqa: E712 + .values(is_missing=False) ) - return [row[0] for row in session.execute(orphan_subq).all()] + return result.rowcount + + +def get_unreferenced_unhashed_asset_ids(session: Session) -> list[str]: + """Get IDs of unhashed assets (hash=None) with no active cache states. + + An asset is considered unreferenced if it has no cache states, + or all its cache states are marked as missing. + + Returns: + List of asset IDs that are unreferenced + """ + active_cache_state_exists = ( + sa.select(sa.literal(1)) + .where(AssetCacheState.asset_id == Asset.id) + .where(AssetCacheState.is_missing == False) # noqa: E712 + .correlate(Asset) + .exists() + ) + unreferenced_subq = sa.select(Asset.id).where( + Asset.hash.is_(None), ~active_cache_state_exists + ) + return [row[0] for row in session.execute(unreferenced_subq).all()] def delete_assets_by_ids(session: Session, asset_ids: list[str]) -> int: @@ -142,12 +187,15 @@ def delete_assets_by_ids(session: Session, asset_ids: list[str]) -> int: def get_cache_states_for_prefixes( session: Session, prefixes: list[str], + *, + include_missing: bool = False, ) -> list[CacheStateRow]: """Get all cache states with paths matching any of the given prefixes. Args: session: Database session prefixes: List of absolute directory prefixes to match + include_missing: If False (default), exclude cache states marked as missing Returns: List of cache state rows with joined asset data, ordered by asset_id, state_id @@ -163,7 +211,7 @@ def get_cache_states_for_prefixes( escaped, esc = escape_sql_like_string(base) conds.append(AssetCacheState.file_path.like(escaped + "%", escape=esc)) - rows = session.execute( + query = ( sa.select( AssetCacheState.id, AssetCacheState.file_path, @@ -175,7 +223,13 @@ def get_cache_states_for_prefixes( ) .join(Asset, Asset.id == AssetCacheState.asset_id) .where(sa.or_(*conds)) - .order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc()) + ) + + if not include_missing: + query = query.where(AssetCacheState.is_missing == False) # noqa: E712 + + rows = session.execute( + query.order_by(AssetCacheState.asset_id.asc(), AssetCacheState.id.asc()) ).all() return [ @@ -240,13 +294,15 @@ def bulk_insert_cache_states_ignore_conflicts( """Bulk insert cache state rows with ON CONFLICT DO NOTHING on file_path. Each dict should have: asset_id, file_path, mtime_ns + The is_missing field is automatically set to False for new inserts. """ if not rows: return + enriched_rows = [{**row, "is_missing": False} for row in rows] ins = sqlite.insert(AssetCacheState).on_conflict_do_nothing( index_elements=[AssetCacheState.file_path] ) - for chunk in iter_chunks(rows, calculate_rows_per_statement(3)): + for chunk in iter_chunks(enriched_rows, calculate_rows_per_statement(4)): session.execute(ins, chunk) diff --git a/app/assets/scanner.py b/app/assets/scanner.py index 3455943ce..c6b68dced 100644 --- a/app/assets/scanner.py +++ b/app/assets/scanner.py @@ -17,7 +17,7 @@ from app.assets.database.queries import ( from app.assets.services.bulk_ingest import ( SeedAssetSpec, batch_insert_seed_assets, - prune_orphaned_assets, + mark_assets_missing_outside_prefixes, ) from app.assets.services.file_utils import ( get_mtime_ns, @@ -221,18 +221,18 @@ def sync_root_safely(root: RootType) -> set[str]: return set() -def prune_orphans_safely(prefixes: list[str]) -> int: - """Prune orphaned assets outside the given prefixes. +def mark_missing_outside_prefixes_safely(prefixes: list[str]) -> int: + """Mark cache states as missing when outside the given prefixes. - Returns count pruned or 0 on failure. + This is a non-destructive soft-delete. Returns count marked or 0 on failure. """ try: with create_session() as sess: - count = prune_orphaned_assets(sess, prefixes) + count = mark_assets_missing_outside_prefixes(sess, prefixes) sess.commit() return count except Exception as e: - logging.exception("orphan pruning failed: %s", e) + logging.exception("marking missing assets failed: %s", e) return 0 @@ -319,7 +319,7 @@ def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int: def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None: """Scan the given roots and seed the assets into the database. - Note: This function does not prune orphaned assets. Call prune_orphaned_assets + Note: This function does not mark missing assets. Call mark_missing_outside_prefixes_safely separately if cleanup is needed. """ if not dependencies_available(): diff --git a/app/assets/seeder.py b/app/assets/seeder.py index c63cd7a4c..23e001e5c 100644 --- a/app/assets/seeder.py +++ b/app/assets/seeder.py @@ -15,7 +15,7 @@ from app.assets.scanner import ( get_all_known_prefixes, get_prefixes_for_root, insert_asset_specs, - prune_orphans_safely, + mark_missing_outside_prefixes_safely, sync_root_safely, ) from app.database.db import dependencies_available @@ -174,35 +174,41 @@ class AssetSeeder: with self._lock: self._thread = None - def prune_orphans(self) -> int: - """Prune orphaned assets that are outside all known root prefixes. + def mark_missing_outside_prefixes(self) -> int: + """Mark cache states as missing when outside all known root prefixes. + + This is a non-destructive soft-delete operation. Assets and their + metadata are preserved, but cache states are flagged as missing. + They can be restored if the file reappears in a future scan. This operation is decoupled from scanning to prevent partial scans - from accidentally deleting assets belonging to other roots. + from accidentally marking assets belonging to other roots. Should be called explicitly when cleanup is desired, typically after a full scan of all roots or during maintenance. Returns: - Number of orphaned assets pruned, or 0 if dependencies unavailable - or a scan is currently running + Number of cache states marked as missing, or 0 if dependencies + unavailable or a scan is currently running """ with self._lock: if self._state != State.IDLE: - logging.warning("Cannot prune orphans while scan is running") + logging.warning( + "Cannot mark missing assets while scan is running" + ) return 0 if not dependencies_available(): logging.warning( - "Database dependencies not available, skipping orphan pruning" + "Database dependencies not available, skipping mark missing" ) return 0 all_prefixes = get_all_known_prefixes() - pruned = prune_orphans_safely(all_prefixes) - if pruned > 0: - logging.info("Pruned %d orphaned assets", pruned) - return pruned + marked = mark_missing_outside_prefixes_safely(all_prefixes) + if marked > 0: + logging.info("Marked %d cache states as missing", marked) + return marked def _is_cancelled(self) -> bool: """Check if cancellation has been requested.""" @@ -290,9 +296,9 @@ class AssetSeeder: if self._prune_first: all_prefixes = get_all_known_prefixes() - pruned = prune_orphans_safely(all_prefixes) - if pruned > 0: - logging.info("Pruned %d orphaned assets before scan", pruned) + marked = mark_missing_outside_prefixes_safely(all_prefixes) + if marked > 0: + logging.info("Marked %d cache states as missing before scan", marked) if self._is_cancelled(): logging.info("Asset scan cancelled after pruning phase") diff --git a/app/assets/services/__init__.py b/app/assets/services/__init__.py index 03209043f..0ae025858 100644 --- a/app/assets/services/__init__.py +++ b/app/assets/services/__init__.py @@ -11,7 +11,8 @@ from app.assets.services.asset_management import ( from app.assets.services.bulk_ingest import ( BulkInsertResult, batch_insert_seed_assets, - prune_orphaned_assets, + cleanup_unreferenced_assets, + mark_assets_missing_outside_prefixes, ) from app.assets.services.file_utils import ( get_mtime_ns, @@ -77,7 +78,8 @@ __all__ = [ "list_assets_page", "list_files_recursively", "list_tags", - "prune_orphaned_assets", + "cleanup_unreferenced_assets", + "mark_assets_missing_outside_prefixes", "remove_tags", "resolve_asset_for_download", "set_asset_preview", diff --git a/app/assets/services/bulk_ingest.py b/app/assets/services/bulk_ingest.py index f2949ee62..ad7a12eb2 100644 --- a/app/assets/services/bulk_ingest.py +++ b/app/assets/services/bulk_ingest.py @@ -28,10 +28,10 @@ from app.assets.database.queries import ( bulk_insert_cache_states_ignore_conflicts, bulk_insert_tags_and_meta, delete_assets_by_ids, - delete_cache_states_outside_prefixes, get_asset_info_ids_by_ids, get_cache_states_by_paths_and_asset_ids, - get_orphaned_seed_asset_ids, + get_unreferenced_unhashed_asset_ids, + mark_cache_states_missing_outside_prefixes, ) from app.assets.helpers import get_utc_now @@ -210,16 +210,37 @@ def batch_insert_seed_assets( ) -def prune_orphaned_assets(session: Session, valid_prefixes: list[str]) -> int: - """Prune cache states outside valid prefixes, then delete orphaned seed assets. +def mark_assets_missing_outside_prefixes( + session: Session, valid_prefixes: list[str] +) -> int: + """Mark cache states as missing when outside valid prefixes. + + This is a non-destructive operation that soft-deletes cache states + by setting is_missing=True. User metadata is preserved and assets + can be restored if the file reappears in a future scan. + + Note: This does NOT delete + unreferenced unhashed assets. Those are preserved so user metadata + remains intact even when base directories change. Args: session: Database session valid_prefixes: List of absolute directory prefixes that are valid Returns: - Number of orphaned assets deleted + Number of cache states marked as missing """ - delete_cache_states_outside_prefixes(session, valid_prefixes) - orphan_ids = get_orphaned_seed_asset_ids(session) - return delete_assets_by_ids(session, orphan_ids) + return mark_cache_states_missing_outside_prefixes(session, valid_prefixes) + + +def cleanup_unreferenced_assets(session: Session) -> int: + """Hard-delete unhashed assets with no active cache states. + + This is a destructive operation intended for explicit cleanup. + Only deletes assets where hash=None and all cache states are missing. + + Returns: + Number of assets deleted + """ + unreferenced_ids = get_unreferenced_unhashed_asset_ids(session) + return delete_assets_by_ids(session, unreferenced_ids) diff --git a/tests-unit/assets_test/queries/test_cache_state.py b/tests-unit/assets_test/queries/test_cache_state.py index 594d33f8a..46603195f 100644 --- a/tests-unit/assets_test/queries/test_cache_state.py +++ b/tests-unit/assets_test/queries/test_cache_state.py @@ -6,8 +6,7 @@ from app.assets.database.models import Asset, AssetCacheState, AssetInfo from app.assets.database.queries import ( list_cache_states_by_asset_id, upsert_cache_state, - delete_cache_states_outside_prefixes, - get_orphaned_seed_asset_ids, + get_unreferenced_unhashed_asset_ids, delete_assets_by_ids, get_cache_states_for_prefixes, bulk_set_needs_verify, @@ -15,6 +14,8 @@ from app.assets.database.queries import ( delete_orphaned_seed_asset, bulk_insert_cache_states_ignore_conflicts, get_cache_states_by_paths_and_asset_ids, + mark_cache_states_missing_outside_prefixes, + restore_cache_states_by_paths, ) from app.assets.helpers import select_best_live_path, get_utc_now @@ -168,9 +169,51 @@ class TestUpsertCacheState: state = session.query(AssetCacheState).filter_by(file_path=file_path).one() assert state.mtime_ns == final_mtime + def test_upsert_restores_missing_state(self, session: Session): + """Upserting a cache state that was marked missing should restore it.""" + asset = _make_asset(session, "hash1") + file_path = "/restored/file.bin" -class TestDeleteCacheStatesOutsidePrefixes: - def test_deletes_states_outside_prefixes(self, session: Session, tmp_path): + state = _make_cache_state(session, asset, file_path, mtime_ns=100) + state.is_missing = True + session.commit() + + created, updated = upsert_cache_state( + session, asset_id=asset.id, file_path=file_path, mtime_ns=100 + ) + session.commit() + + assert created is False + assert updated is True + restored_state = session.query(AssetCacheState).filter_by(file_path=file_path).one() + assert restored_state.is_missing is False + + +class TestRestoreCacheStatesByPaths: + def test_restores_missing_states(self, session: Session): + asset = _make_asset(session, "hash1") + missing_path = "/missing/file.bin" + active_path = "/active/file.bin" + + missing_state = _make_cache_state(session, asset, missing_path) + missing_state.is_missing = True + _make_cache_state(session, asset, active_path) + session.commit() + + restored = restore_cache_states_by_paths(session, [missing_path]) + session.commit() + + assert restored == 1 + state = session.query(AssetCacheState).filter_by(file_path=missing_path).one() + assert state.is_missing is False + + def test_empty_list_restores_nothing(self, session: Session): + restored = restore_cache_states_by_paths(session, []) + assert restored == 0 + + +class TestMarkCacheStatesMissingOutsidePrefixes: + def test_marks_states_missing_outside_prefixes(self, session: Session, tmp_path): asset = _make_asset(session, "hash1") valid_dir = tmp_path / "valid" valid_dir.mkdir() @@ -184,39 +227,48 @@ class TestDeleteCacheStatesOutsidePrefixes: _make_cache_state(session, asset, invalid_path) session.commit() - deleted = delete_cache_states_outside_prefixes(session, [str(valid_dir)]) + marked = mark_cache_states_missing_outside_prefixes(session, [str(valid_dir)]) session.commit() - assert deleted == 1 - remaining = session.query(AssetCacheState).all() - assert len(remaining) == 1 - assert remaining[0].file_path == valid_path + assert marked == 1 + all_states = session.query(AssetCacheState).all() + assert len(all_states) == 2 - def test_empty_prefixes_deletes_nothing(self, session: Session): + valid_state = next(s for s in all_states if s.file_path == valid_path) + invalid_state = next(s for s in all_states if s.file_path == invalid_path) + assert valid_state.is_missing is False + assert invalid_state.is_missing is True + + def test_empty_prefixes_marks_nothing(self, session: Session): asset = _make_asset(session, "hash1") _make_cache_state(session, asset, "/some/path.bin") session.commit() - deleted = delete_cache_states_outside_prefixes(session, []) + marked = mark_cache_states_missing_outside_prefixes(session, []) - assert deleted == 0 + assert marked == 0 -class TestGetOrphanedSeedAssetIds: - def test_returns_orphaned_seed_assets(self, session: Session): - # Seed asset (hash=None) with no cache states - orphan = _make_asset(session, hash_val=None) - # Seed asset with cache state (not orphaned) - with_state = _make_asset(session, hash_val=None) - _make_cache_state(session, with_state, "/has/state.bin") +class TestGetUnreferencedUnhashedAssetIds: + def test_returns_unreferenced_unhashed_assets(self, session: Session): + # Unhashed asset (hash=None) with no cache states + no_states = _make_asset(session, hash_val=None) + # Unhashed asset with active cache state (not unreferenced) + with_active_state = _make_asset(session, hash_val=None) + _make_cache_state(session, with_active_state, "/has/state.bin") + # Unhashed asset with only missing cache state (should be unreferenced) + with_missing_state = _make_asset(session, hash_val=None) + missing_state = _make_cache_state(session, with_missing_state, "/missing/state.bin") + missing_state.is_missing = True # Regular asset (hash not None) - should not be returned _make_asset(session, hash_val="blake3:regular") session.commit() - orphaned = get_orphaned_seed_asset_ids(session) + unreferenced = get_unreferenced_unhashed_asset_ids(session) - assert orphan.id in orphaned - assert with_state.id not in orphaned + assert no_states.id in unreferenced + assert with_missing_state.id in unreferenced + assert with_active_state.id not in unreferenced class TestDeleteAssetsByIds: diff --git a/tests-unit/seeder_test/test_seeder.py b/tests-unit/seeder_test/test_seeder.py index 0c1e17bc6..9acb3f797 100644 --- a/tests-unit/seeder_test/test_seeder.py +++ b/tests-unit/seeder_test/test_seeder.py @@ -349,10 +349,10 @@ class TestSeederThreadSafety: ) -class TestSeederPruneOrphans: - """Test prune_orphans behavior.""" +class TestSeederMarkMissing: + """Test mark_missing_outside_prefixes behavior.""" - def test_prune_orphans_when_idle(self, fresh_seeder: AssetSeeder): + def test_mark_missing_when_idle(self, fresh_seeder: AssetSeeder): with ( patch("app.assets.seeder.dependencies_available", return_value=True), patch( @@ -360,14 +360,14 @@ class TestSeederPruneOrphans: return_value=["/models", "/input", "/output"], ), patch( - "app.assets.seeder.prune_orphans_safely", return_value=5 - ) as mock_prune, + "app.assets.seeder.mark_missing_outside_prefixes_safely", return_value=5 + ) as mock_mark, ): - result = fresh_seeder.prune_orphans() + result = fresh_seeder.mark_missing_outside_prefixes() assert result == 5 - mock_prune.assert_called_once_with(["/models", "/input", "/output"]) + mock_mark.assert_called_once_with(["/models", "/input", "/output"]) - def test_prune_orphans_returns_zero_when_running( + def test_mark_missing_returns_zero_when_running( self, fresh_seeder: AssetSeeder, mock_dependencies ): barrier = threading.Event() @@ -382,35 +382,35 @@ class TestSeederPruneOrphans: fresh_seeder.start(roots=("models",)) time.sleep(0.05) - result = fresh_seeder.prune_orphans() + result = fresh_seeder.mark_missing_outside_prefixes() assert result == 0 barrier.set() - def test_prune_orphans_returns_zero_when_dependencies_unavailable( + def test_mark_missing_returns_zero_when_dependencies_unavailable( self, fresh_seeder: AssetSeeder ): with patch("app.assets.seeder.dependencies_available", return_value=False): - result = fresh_seeder.prune_orphans() + result = fresh_seeder.mark_missing_outside_prefixes() assert result == 0 - def test_prune_first_flag_triggers_pruning_before_scan( + def test_prune_first_flag_triggers_mark_missing_before_scan( self, fresh_seeder: AssetSeeder ): - prune_call_order = [] + call_order = [] - def track_prune(prefixes): - prune_call_order.append("prune") + def track_mark(prefixes): + call_order.append("mark_missing") return 3 def track_sync(root): - prune_call_order.append(f"sync_{root}") + call_order.append(f"sync_{root}") return set() with ( patch("app.assets.seeder.dependencies_available", return_value=True), patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models"]), - patch("app.assets.seeder.prune_orphans_safely", side_effect=track_prune), + patch("app.assets.seeder.mark_missing_outside_prefixes_safely", side_effect=track_mark), patch("app.assets.seeder.sync_root_safely", side_effect=track_sync), patch("app.assets.seeder.collect_paths_for_roots", return_value=[]), patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)), @@ -419,5 +419,5 @@ class TestSeederPruneOrphans: fresh_seeder.start(roots=("models",), prune_first=True) fresh_seeder.wait(timeout=5.0) - assert prune_call_order[0] == "prune" - assert "sync_models" in prune_call_order + assert call_order[0] == "mark_missing" + assert "sync_models" in call_order