diff --git a/app/assets/api/routes.py b/app/assets/api/routes.py index d3039e894..af29b5f22 100644 --- a/app/assets/api/routes.py +++ b/app/assets/api/routes.py @@ -695,3 +695,23 @@ async def cancel_seed(request: web.Request) -> web.Response: if cancelled: return web.json_response({"status": "cancelling"}, status=200) return web.json_response({"status": "idle"}, status=200) + + +@ROUTES.post("/api/assets/prune") +async def prune_orphans(request: web.Request) -> web.Response: + """Prune orphaned assets that no longer exist on the filesystem. + + This removes assets whose cache states point to files outside all known + root prefixes (models, input, output). + + Returns: + 200 OK with count of pruned assets + 409 Conflict if a scan is currently running + """ + pruned = asset_seeder.prune_orphans() + if pruned == 0 and asset_seeder.get_status().state.value != "IDLE": + return web.json_response( + {"status": "scan_running", "pruned": 0}, + status=409, + ) + return web.json_response({"status": "completed", "pruned": pruned}, status=200) diff --git a/app/assets/scanner.py b/app/assets/scanner.py index 9c95736d4..e7cad94b8 100644 --- a/app/assets/scanner.py +++ b/app/assets/scanner.py @@ -61,6 +61,14 @@ def get_prefixes_for_root(root: RootType) -> list[str]: return [] +def get_all_known_prefixes() -> list[str]: + """Get all known asset prefixes across all root types.""" + all_roots: tuple[RootType, ...] = ("models", "input", "output") + return [ + os.path.abspath(p) for root in all_roots for p in get_prefixes_for_root(root) + ] + + def collect_models_files() -> list[str]: out: list[str] = [] for folder_name, bases in get_comfy_models_folders(): @@ -287,7 +295,11 @@ def _insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int: def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None: - """Scan the given roots and seed the assets into the database.""" + """Scan the given roots and seed the assets into the database. + + Note: This function does not prune orphaned assets. Call prune_orphaned_assets + separately if cleanup is needed. + """ if not dependencies_available(): if enable_logging: logging.warning("Database dependencies not available, skipping assets scan") @@ -299,20 +311,16 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No for r in roots: existing_paths.update(_sync_root_safely(r)) - all_prefixes = [os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r)] - orphans_pruned = _prune_orphans_safely(all_prefixes) - paths = _collect_paths_for_roots(roots) specs, tag_pool, skipped_existing = _build_asset_specs(paths, existing_paths) created = _insert_asset_specs(specs, tag_pool) if enable_logging: logging.info( - "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)", + "Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)", roots, time.perf_counter() - t_start, created, skipped_existing, - orphans_pruned, len(paths), ) diff --git a/app/assets/seeder.py b/app/assets/seeder.py index 8722e3d1a..cdf28f0b9 100644 --- a/app/assets/seeder.py +++ b/app/assets/seeder.py @@ -15,6 +15,7 @@ from app.assets.scanner import ( _insert_asset_specs, _prune_orphans_safely, _sync_root_safely, + get_all_known_prefixes, get_prefixes_for_root, ) from app.database.db import dependencies_available @@ -85,14 +86,16 @@ class AssetSeeder: def start( self, - roots: tuple[RootType, ...] = ("models",), + roots: tuple[RootType, ...] = ("models", "input", "output"), progress_callback: ProgressCallback | None = None, + prune_first: bool = False, ) -> bool: """Start a background scan for the given roots. Args: roots: Tuple of root types to scan (models, input, output) progress_callback: Optional callback called with progress updates + prune_first: If True, prune orphaned assets before scanning Returns: True if scan was started, False if already running @@ -104,6 +107,7 @@ class AssetSeeder: self._progress = Progress() self._errors = [] self._roots = roots + self._prune_first = prune_first self._progress_callback = progress_callback self._cancel_event.clear() self._thread = threading.Thread( @@ -170,6 +174,34 @@ class AssetSeeder: with self._lock: self._thread = None + def prune_orphans(self) -> int: + """Prune orphaned assets that are outside all known root prefixes. + + This operation is decoupled from scanning to prevent partial scans + from accidentally deleting assets belonging to other roots. + + Should be called explicitly when cleanup is desired, typically after + a full scan of all roots or during maintenance. + + Returns: + Number of orphaned assets pruned, or 0 if dependencies unavailable + or a scan is currently running + """ + with self._lock: + if self._state != State.IDLE: + logging.warning("Cannot prune orphans while scan is running") + return 0 + + if not dependencies_available(): + logging.warning("Database dependencies not available, skipping orphan pruning") + return 0 + + all_prefixes = get_all_known_prefixes() + pruned = _prune_orphans_safely(all_prefixes) + if pruned > 0: + logging.info("Pruned %d orphaned assets", pruned) + return pruned + def _is_cancelled(self) -> bool: """Check if cancellation has been requested.""" return self._cancel_event.is_set() @@ -254,6 +286,17 @@ class AssetSeeder: ) return + if self._prune_first: + all_prefixes = get_all_known_prefixes() + pruned = _prune_orphans_safely(all_prefixes) + if pruned > 0: + logging.info("Pruned %d orphaned assets before scan", pruned) + + if self._is_cancelled(): + logging.info("Asset scan cancelled after pruning phase") + cancelled = True + return + self._log_scan_config(roots) existing_paths: set[str] = set() @@ -269,16 +312,6 @@ class AssetSeeder: cancelled = True return - all_prefixes = [ - os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r) - ] - orphans_pruned = _prune_orphans_safely(all_prefixes) - - if self._is_cancelled(): - logging.info("Asset scan cancelled after orphan pruning") - cancelled = True - return - paths = _collect_paths_for_roots(roots) total_paths = len(paths) self._update_progress(total=total_paths) @@ -335,12 +368,11 @@ class AssetSeeder: elapsed = time.perf_counter() - t_start logging.info( - "Asset scan(roots=%s) completed in %.3fs (created=%d, skipped=%d, orphans_pruned=%d, total=%d)", + "Asset scan(roots=%s) completed in %.3fs (created=%d, skipped=%d, total=%d)", roots, elapsed, total_created, skipped_existing, - orphans_pruned, len(paths), ) diff --git a/main.py b/main.py index 3119b58ad..aa59b706d 100644 --- a/main.py +++ b/main.py @@ -354,8 +354,8 @@ def setup_database(): if dependencies_available(): init_db() if not args.disable_assets_autoscan: - if asset_seeder.start(roots=("models",)): - logging.info("Background asset scan initiated for models") + if asset_seeder.start(roots=("models", "input", "output"), prune_first=True): + logging.info("Background asset scan initiated for models, input, output") except Exception as e: logging.error(f"Failed to initialize database. Please ensure you have installed the latest requirements. If the error persists, please report this as in future the database will be required: {e}") diff --git a/tests-unit/seeder_test/test_seeder.py b/tests-unit/seeder_test/test_seeder.py index bfb3c5839..eecee8f4d 100644 --- a/tests-unit/seeder_test/test_seeder.py +++ b/tests-unit/seeder_test/test_seeder.py @@ -25,7 +25,6 @@ def mock_dependencies(): with ( patch("app.assets.seeder.dependencies_available", return_value=True), patch("app.assets.seeder._sync_root_safely", return_value=set()), - patch("app.assets.seeder._prune_orphans_safely", return_value=0), patch("app.assets.seeder._collect_paths_for_roots", return_value=[]), patch("app.assets.seeder._build_asset_specs", return_value=([], set(), 0)), patch("app.assets.seeder._insert_asset_specs", return_value=0), @@ -210,7 +209,6 @@ class TestSeederCancellation: with ( patch("app.assets.seeder.dependencies_available", return_value=True), patch("app.assets.seeder._sync_root_safely", return_value=set()), - patch("app.assets.seeder._prune_orphans_safely", return_value=0), patch("app.assets.seeder._collect_paths_for_roots", return_value=paths), patch("app.assets.seeder._build_asset_specs", return_value=(specs, set(), 0)), patch("app.assets.seeder._insert_asset_specs", side_effect=slow_insert), @@ -232,7 +230,6 @@ class TestSeederErrorHandling: with ( patch("app.assets.seeder.dependencies_available", return_value=True), patch("app.assets.seeder._sync_root_safely", return_value=set()), - patch("app.assets.seeder._prune_orphans_safely", return_value=0), patch( "app.assets.seeder._collect_paths_for_roots", return_value=["/path/file.safetensors"], @@ -345,3 +342,72 @@ class TestSeederThreadSafety: barrier.set() assert all(s.state in (State.RUNNING, State.IDLE, State.CANCELLING) for s in statuses) + + +class TestSeederPruneOrphans: + """Test prune_orphans behavior.""" + + def test_prune_orphans_when_idle(self, fresh_seeder: AssetSeeder): + with ( + patch("app.assets.seeder.dependencies_available", return_value=True), + patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models", "/input", "/output"]), + patch("app.assets.seeder._prune_orphans_safely", return_value=5) as mock_prune, + ): + result = fresh_seeder.prune_orphans() + assert result == 5 + mock_prune.assert_called_once_with(["/models", "/input", "/output"]) + + def test_prune_orphans_returns_zero_when_running( + self, fresh_seeder: AssetSeeder, mock_dependencies + ): + barrier = threading.Event() + + def slow_collect(*args): + barrier.wait(timeout=5.0) + return [] + + with patch( + "app.assets.seeder._collect_paths_for_roots", side_effect=slow_collect + ): + fresh_seeder.start(roots=("models",)) + time.sleep(0.05) + + result = fresh_seeder.prune_orphans() + assert result == 0 + + barrier.set() + + def test_prune_orphans_returns_zero_when_dependencies_unavailable( + self, fresh_seeder: AssetSeeder + ): + with patch("app.assets.seeder.dependencies_available", return_value=False): + result = fresh_seeder.prune_orphans() + assert result == 0 + + def test_prune_first_flag_triggers_pruning_before_scan( + self, fresh_seeder: AssetSeeder + ): + prune_call_order = [] + + def track_prune(prefixes): + prune_call_order.append("prune") + return 3 + + def track_sync(root): + prune_call_order.append(f"sync_{root}") + return set() + + with ( + patch("app.assets.seeder.dependencies_available", return_value=True), + patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models"]), + patch("app.assets.seeder._prune_orphans_safely", side_effect=track_prune), + patch("app.assets.seeder._sync_root_safely", side_effect=track_sync), + patch("app.assets.seeder._collect_paths_for_roots", return_value=[]), + patch("app.assets.seeder._build_asset_specs", return_value=([], set(), 0)), + patch("app.assets.seeder._insert_asset_specs", return_value=0), + ): + fresh_seeder.start(roots=("models",), prune_first=True) + fresh_seeder.wait(timeout=5.0) + + assert prune_call_order[0] == "prune" + assert "sync_models" in prune_call_order