Decouple orphan pruning from asset seeding

- Remove automatic pruning from scan loop to prevent partial scans from
  deleting assets belonging to other roots
- Add get_all_known_prefixes() helper to get prefixes for all root types
- Add prune_orphans() method to AssetSeeder for explicit pruning
- Add prune_first parameter to start() for optional pre-scan pruning
- Add POST /api/assets/prune endpoint for explicit pruning via API
- Update main.py startup to use prune_first=True for full startup scans
- Add tests for new prune_orphans functionality

Fixes issue where a models-only scan would delete all input/output assets.

Amp-Thread-ID: https://ampcode.com/threads/T-019c2ba0-e004-7229-81bf-452b2f7f57a1
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-04 19:38:29 -08:00
parent 8fb77c080f
commit 56e9a75ca2
5 changed files with 150 additions and 24 deletions

View File

@@ -695,3 +695,23 @@ async def cancel_seed(request: web.Request) -> web.Response:
if cancelled:
return web.json_response({"status": "cancelling"}, status=200)
return web.json_response({"status": "idle"}, status=200)
@ROUTES.post("/api/assets/prune")
async def prune_orphans(request: web.Request) -> web.Response:
"""Prune orphaned assets that no longer exist on the filesystem.
This removes assets whose cache states point to files outside all known
root prefixes (models, input, output).
Returns:
200 OK with count of pruned assets
409 Conflict if a scan is currently running
"""
pruned = asset_seeder.prune_orphans()
if pruned == 0 and asset_seeder.get_status().state.value != "IDLE":
return web.json_response(
{"status": "scan_running", "pruned": 0},
status=409,
)
return web.json_response({"status": "completed", "pruned": pruned}, status=200)

View File

@@ -61,6 +61,14 @@ def get_prefixes_for_root(root: RootType) -> list[str]:
return []
def get_all_known_prefixes() -> list[str]:
"""Get all known asset prefixes across all root types."""
all_roots: tuple[RootType, ...] = ("models", "input", "output")
return [
os.path.abspath(p) for root in all_roots for p in get_prefixes_for_root(root)
]
def collect_models_files() -> list[str]:
out: list[str] = []
for folder_name, bases in get_comfy_models_folders():
@@ -287,7 +295,11 @@ def _insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> None:
"""Scan the given roots and seed the assets into the database."""
"""Scan the given roots and seed the assets into the database.
Note: This function does not prune orphaned assets. Call prune_orphaned_assets
separately if cleanup is needed.
"""
if not dependencies_available():
if enable_logging:
logging.warning("Database dependencies not available, skipping assets scan")
@@ -299,20 +311,16 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
for r in roots:
existing_paths.update(_sync_root_safely(r))
all_prefixes = [os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r)]
orphans_pruned = _prune_orphans_safely(all_prefixes)
paths = _collect_paths_for_roots(roots)
specs, tag_pool, skipped_existing = _build_asset_specs(paths, existing_paths)
created = _insert_asset_specs(specs, tag_pool)
if enable_logging:
logging.info(
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, orphans_pruned=%d, total_seen=%d)",
"Assets scan(roots=%s) completed in %.3fs (created=%d, skipped_existing=%d, total_seen=%d)",
roots,
time.perf_counter() - t_start,
created,
skipped_existing,
orphans_pruned,
len(paths),
)

View File

@@ -15,6 +15,7 @@ from app.assets.scanner import (
_insert_asset_specs,
_prune_orphans_safely,
_sync_root_safely,
get_all_known_prefixes,
get_prefixes_for_root,
)
from app.database.db import dependencies_available
@@ -85,14 +86,16 @@ class AssetSeeder:
def start(
self,
roots: tuple[RootType, ...] = ("models",),
roots: tuple[RootType, ...] = ("models", "input", "output"),
progress_callback: ProgressCallback | None = None,
prune_first: bool = False,
) -> bool:
"""Start a background scan for the given roots.
Args:
roots: Tuple of root types to scan (models, input, output)
progress_callback: Optional callback called with progress updates
prune_first: If True, prune orphaned assets before scanning
Returns:
True if scan was started, False if already running
@@ -104,6 +107,7 @@ class AssetSeeder:
self._progress = Progress()
self._errors = []
self._roots = roots
self._prune_first = prune_first
self._progress_callback = progress_callback
self._cancel_event.clear()
self._thread = threading.Thread(
@@ -170,6 +174,34 @@ class AssetSeeder:
with self._lock:
self._thread = None
def prune_orphans(self) -> int:
"""Prune orphaned assets that are outside all known root prefixes.
This operation is decoupled from scanning to prevent partial scans
from accidentally deleting assets belonging to other roots.
Should be called explicitly when cleanup is desired, typically after
a full scan of all roots or during maintenance.
Returns:
Number of orphaned assets pruned, or 0 if dependencies unavailable
or a scan is currently running
"""
with self._lock:
if self._state != State.IDLE:
logging.warning("Cannot prune orphans while scan is running")
return 0
if not dependencies_available():
logging.warning("Database dependencies not available, skipping orphan pruning")
return 0
all_prefixes = get_all_known_prefixes()
pruned = _prune_orphans_safely(all_prefixes)
if pruned > 0:
logging.info("Pruned %d orphaned assets", pruned)
return pruned
def _is_cancelled(self) -> bool:
"""Check if cancellation has been requested."""
return self._cancel_event.is_set()
@@ -254,6 +286,17 @@ class AssetSeeder:
)
return
if self._prune_first:
all_prefixes = get_all_known_prefixes()
pruned = _prune_orphans_safely(all_prefixes)
if pruned > 0:
logging.info("Pruned %d orphaned assets before scan", pruned)
if self._is_cancelled():
logging.info("Asset scan cancelled after pruning phase")
cancelled = True
return
self._log_scan_config(roots)
existing_paths: set[str] = set()
@@ -269,16 +312,6 @@ class AssetSeeder:
cancelled = True
return
all_prefixes = [
os.path.abspath(p) for r in roots for p in get_prefixes_for_root(r)
]
orphans_pruned = _prune_orphans_safely(all_prefixes)
if self._is_cancelled():
logging.info("Asset scan cancelled after orphan pruning")
cancelled = True
return
paths = _collect_paths_for_roots(roots)
total_paths = len(paths)
self._update_progress(total=total_paths)
@@ -335,12 +368,11 @@ class AssetSeeder:
elapsed = time.perf_counter() - t_start
logging.info(
"Asset scan(roots=%s) completed in %.3fs (created=%d, skipped=%d, orphans_pruned=%d, total=%d)",
"Asset scan(roots=%s) completed in %.3fs (created=%d, skipped=%d, total=%d)",
roots,
elapsed,
total_created,
skipped_existing,
orphans_pruned,
len(paths),
)

View File

@@ -354,8 +354,8 @@ def setup_database():
if dependencies_available():
init_db()
if not args.disable_assets_autoscan:
if asset_seeder.start(roots=("models",)):
logging.info("Background asset scan initiated for models")
if asset_seeder.start(roots=("models", "input", "output"), prune_first=True):
logging.info("Background asset scan initiated for models, input, output")
except Exception as e:
logging.error(f"Failed to initialize database. Please ensure you have installed the latest requirements. If the error persists, please report this as in future the database will be required: {e}")

View File

@@ -25,7 +25,6 @@ def mock_dependencies():
with (
patch("app.assets.seeder.dependencies_available", return_value=True),
patch("app.assets.seeder._sync_root_safely", return_value=set()),
patch("app.assets.seeder._prune_orphans_safely", return_value=0),
patch("app.assets.seeder._collect_paths_for_roots", return_value=[]),
patch("app.assets.seeder._build_asset_specs", return_value=([], set(), 0)),
patch("app.assets.seeder._insert_asset_specs", return_value=0),
@@ -210,7 +209,6 @@ class TestSeederCancellation:
with (
patch("app.assets.seeder.dependencies_available", return_value=True),
patch("app.assets.seeder._sync_root_safely", return_value=set()),
patch("app.assets.seeder._prune_orphans_safely", return_value=0),
patch("app.assets.seeder._collect_paths_for_roots", return_value=paths),
patch("app.assets.seeder._build_asset_specs", return_value=(specs, set(), 0)),
patch("app.assets.seeder._insert_asset_specs", side_effect=slow_insert),
@@ -232,7 +230,6 @@ class TestSeederErrorHandling:
with (
patch("app.assets.seeder.dependencies_available", return_value=True),
patch("app.assets.seeder._sync_root_safely", return_value=set()),
patch("app.assets.seeder._prune_orphans_safely", return_value=0),
patch(
"app.assets.seeder._collect_paths_for_roots",
return_value=["/path/file.safetensors"],
@@ -345,3 +342,72 @@ class TestSeederThreadSafety:
barrier.set()
assert all(s.state in (State.RUNNING, State.IDLE, State.CANCELLING) for s in statuses)
class TestSeederPruneOrphans:
"""Test prune_orphans behavior."""
def test_prune_orphans_when_idle(self, fresh_seeder: AssetSeeder):
with (
patch("app.assets.seeder.dependencies_available", return_value=True),
patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models", "/input", "/output"]),
patch("app.assets.seeder._prune_orphans_safely", return_value=5) as mock_prune,
):
result = fresh_seeder.prune_orphans()
assert result == 5
mock_prune.assert_called_once_with(["/models", "/input", "/output"])
def test_prune_orphans_returns_zero_when_running(
self, fresh_seeder: AssetSeeder, mock_dependencies
):
barrier = threading.Event()
def slow_collect(*args):
barrier.wait(timeout=5.0)
return []
with patch(
"app.assets.seeder._collect_paths_for_roots", side_effect=slow_collect
):
fresh_seeder.start(roots=("models",))
time.sleep(0.05)
result = fresh_seeder.prune_orphans()
assert result == 0
barrier.set()
def test_prune_orphans_returns_zero_when_dependencies_unavailable(
self, fresh_seeder: AssetSeeder
):
with patch("app.assets.seeder.dependencies_available", return_value=False):
result = fresh_seeder.prune_orphans()
assert result == 0
def test_prune_first_flag_triggers_pruning_before_scan(
self, fresh_seeder: AssetSeeder
):
prune_call_order = []
def track_prune(prefixes):
prune_call_order.append("prune")
return 3
def track_sync(root):
prune_call_order.append(f"sync_{root}")
return set()
with (
patch("app.assets.seeder.dependencies_available", return_value=True),
patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models"]),
patch("app.assets.seeder._prune_orphans_safely", side_effect=track_prune),
patch("app.assets.seeder._sync_root_safely", side_effect=track_sync),
patch("app.assets.seeder._collect_paths_for_roots", return_value=[]),
patch("app.assets.seeder._build_asset_specs", return_value=([], set(), 0)),
patch("app.assets.seeder._insert_asset_specs", return_value=0),
):
fresh_seeder.start(roots=("models",), prune_first=True)
fresh_seeder.wait(timeout=5.0)
assert prune_call_order[0] == "prune"
assert "sync_models" in prune_call_order