diff --git a/app/assets/seeder.py b/app/assets/seeder.py index 029448464..c6107bc55 100644 --- a/app/assets/seeder.py +++ b/app/assets/seeder.py @@ -92,6 +92,7 @@ class _AssetSeeder: self._prune_first: bool = False self._progress_callback: ProgressCallback | None = None self._disabled: bool = False + self._pending_enrich: dict | None = None def disable(self) -> None: """Disable the asset seeder, preventing any scans from starting.""" @@ -196,6 +197,42 @@ class _AssetSeeder: compute_hashes=compute_hashes, ) + def enqueue_enrich( + self, + roots: tuple[RootType, ...] = ("models", "input", "output"), + compute_hashes: bool = False, + ) -> bool: + """Start an enrichment scan now, or queue it for after the current scan. + + If the seeder is idle, starts immediately. Otherwise, the enrich + request is stored and will run automatically when the current scan + finishes. + + Args: + roots: Tuple of root types to scan + compute_hashes: If True, compute blake3 hashes + + Returns: + True if started immediately, False if queued for later + """ + if self.start_enrich(roots=roots, compute_hashes=compute_hashes): + return True + with self._lock: + if self._pending_enrich is not None: + existing_roots = set(self._pending_enrich["roots"]) + existing_roots.update(roots) + self._pending_enrich["roots"] = tuple(existing_roots) + self._pending_enrich["compute_hashes"] = ( + self._pending_enrich["compute_hashes"] or compute_hashes + ) + else: + self._pending_enrich = { + "roots": roots, + "compute_hashes": compute_hashes, + } + logging.info("Enrich scan queued (roots=%s)", self._pending_enrich["roots"]) + return False + def cancel(self) -> bool: """Request cancellation of the current scan. @@ -381,9 +418,13 @@ class _AssetSeeder: return marked finally: with self._lock: - self._last_progress = self._progress - self._state = State.IDLE - self._progress = None + self._reset_to_idle() + + def _reset_to_idle(self) -> None: + """Reset state to IDLE, preserving last progress. Caller must hold _lock.""" + self._last_progress = self._progress + self._state = State.IDLE + self._progress = None def _is_cancelled(self) -> bool: """Check if cancellation has been requested.""" @@ -594,9 +635,14 @@ class _AssetSeeder: }, ) with self._lock: - self._last_progress = self._progress - self._state = State.IDLE - self._progress = None + self._reset_to_idle() + pending = self._pending_enrich + self._pending_enrich = None + if pending is not None: + self.start_enrich( + roots=pending["roots"], + compute_hashes=pending["compute_hashes"], + ) def _run_fast_phase(self, roots: tuple[RootType, ...]) -> tuple[int, int, int]: """Run phase 1: fast scan to create stub records. diff --git a/app/assets/services/__init__.py b/app/assets/services/__init__.py index 11fcb4122..03990966b 100644 --- a/app/assets/services/__init__.py +++ b/app/assets/services/__init__.py @@ -23,6 +23,8 @@ from app.assets.services.ingest import ( DependencyMissingError, HashMismatchError, create_from_hash, + ingest_existing_file, + register_output_files, upload_from_temp_path, ) from app.assets.database.queries import ( @@ -72,6 +74,8 @@ __all__ = [ "delete_asset_reference", "get_asset_by_hash", "get_asset_detail", + "ingest_existing_file", + "register_output_files", "get_mtime_ns", "get_size_and_mtime_ns", "list_assets_page", diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 44d7aef36..01d12b36a 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -23,9 +23,11 @@ from app.assets.database.queries import ( validate_tags_exist, ) from app.assets.helpers import normalize_tags +from app.assets.services.bulk_ingest import batch_insert_seed_assets from app.assets.services.file_utils import get_size_and_mtime_ns from app.assets.services.path_utils import ( compute_relative_filename, + get_name_and_tags_from_asset_path, resolve_destination_from_tags, validate_path_within_base, ) @@ -128,6 +130,59 @@ def _ingest_file_from_path( ) +def register_output_files( + file_paths: Sequence[str], + user_metadata: UserMetadata = None, +) -> int: + """Register a batch of output file paths as assets. + + Returns the number of files successfully registered. + """ + registered = 0 + for abs_path in file_paths: + if not os.path.isfile(abs_path): + continue + try: + ingest_existing_file(abs_path, user_metadata=user_metadata) + registered += 1 + except Exception: + logging.exception("Failed to register output: %s", abs_path) + return registered + + +def ingest_existing_file( + abs_path: str, + user_metadata: UserMetadata = None, + extra_tags: Sequence[str] = (), + owner_id: str = "", +) -> None: + """Register an existing on-disk file as an asset stub. + + Inserts a stub record (hash=NULL) for immediate UX visibility. + The caller is responsible for triggering background enrichment + (hash computation, metadata extraction) via the asset seeder. + """ + size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path) + mime_type = mimetypes.guess_type(abs_path, strict=False)[0] + name, path_tags = get_name_and_tags_from_asset_path(abs_path) + tags = list(dict.fromkeys(path_tags + list(extra_tags))) + + spec = { + "abs_path": abs_path, + "size_bytes": size_bytes, + "mtime_ns": mtime_ns, + "info_name": name, + "tags": tags, + "fname": os.path.basename(abs_path), + "metadata": None, + "hash": None, + "mime_type": mime_type, + } + with create_session() as session: + batch_insert_seed_assets(session, [spec], owner_id=owner_id) + session.commit() + + def _register_existing_asset( asset_hash: str, name: str, diff --git a/main.py b/main.py index 1977f9362..71c49fda0 100644 --- a/main.py +++ b/main.py @@ -8,6 +8,8 @@ import folder_paths import time from comfy.cli_args import args, enables_dynamic_vram from app.logger import setup_logger +from app.assets.seeder import asset_seeder +from app.assets.services import register_output_files import itertools import utils.extra_config from utils.mime_types import init_mime_types @@ -182,7 +184,6 @@ if 'torch' in sys.modules: import comfy.utils -from app.assets.seeder import asset_seeder import execution import server @@ -230,6 +231,24 @@ def cuda_malloc_warning(): logging.warning("\nWARNING: this card most likely does not support cuda-malloc, if you get \"CUDA error\" please run ComfyUI with: --disable-cuda-malloc\n") +def _collect_output_absolute_paths(history_result: dict) -> list[str]: + """Extract absolute file paths for output items from a history result.""" + paths = [] + base_dir = folder_paths.get_directory_by_type("output") + for node_output in history_result.get("outputs", {}).values(): + for items in node_output.values(): + if not isinstance(items, list): + continue + for item in items: + if not isinstance(item, dict) or item.get("type") != "output": + continue + filename = item.get("filename") + if not filename: + continue + paths.append(os.path.join(base_dir, item.get("subfolder", ""), filename)) + return paths + + def prompt_worker(q, server_instance): current_time: float = 0.0 cache_type = execution.CacheType.CLASSIC @@ -264,6 +283,7 @@ def prompt_worker(q, server_instance): asset_seeder.pause() e.execute(item[2], prompt_id, extra_data, item[4]) + need_gc = True remove_sensitive = lambda prompt: prompt[:5] + prompt[6:] @@ -307,6 +327,11 @@ def prompt_worker(q, server_instance): last_gc_collect = current_time need_gc = False hook_breaker_ac10a0.restore_functions() + + if not asset_seeder.is_disabled(): + paths = _collect_output_absolute_paths(e.history_result) + if register_output_files(paths, user_metadata={"prompt_id": prompt_id}) > 0: + asset_seeder.enqueue_enrich(roots=("output",), compute_hashes=True) asset_seeder.resume() diff --git a/tests/test_asset_seeder.py b/tests/test_asset_seeder.py new file mode 100644 index 000000000..4274dab8e --- /dev/null +++ b/tests/test_asset_seeder.py @@ -0,0 +1,250 @@ +"""Tests for app.assets.seeder – enqueue_enrich and pending-queue behaviour.""" + +import threading +from unittest.mock import patch + +import pytest + +from app.assets.seeder import Progress, _AssetSeeder, State + + +@pytest.fixture() +def seeder(): + """Fresh seeder instance for each test.""" + return _AssetSeeder() + + +# --------------------------------------------------------------------------- +# _reset_to_idle +# --------------------------------------------------------------------------- + + +class TestResetToIdle: + def test_sets_idle_and_clears_progress(self, seeder): + """_reset_to_idle should move state to IDLE and snapshot progress.""" + progress = Progress(scanned=10, total=20, created=5, skipped=3) + seeder._state = State.RUNNING + seeder._progress = progress + + with seeder._lock: + seeder._reset_to_idle() + + assert seeder._state is State.IDLE + assert seeder._progress is None + assert seeder._last_progress is progress + + def test_noop_when_progress_already_none(self, seeder): + """_reset_to_idle should handle None progress gracefully.""" + seeder._state = State.CANCELLING + seeder._progress = None + + with seeder._lock: + seeder._reset_to_idle() + + assert seeder._state is State.IDLE + assert seeder._progress is None + assert seeder._last_progress is None + + +# --------------------------------------------------------------------------- +# enqueue_enrich – immediate start when idle +# --------------------------------------------------------------------------- + + +class TestEnqueueEnrichStartsImmediately: + def test_starts_when_idle(self, seeder): + """enqueue_enrich should delegate to start_enrich and return True when idle.""" + with patch.object(seeder, "start_enrich", return_value=True) as mock: + assert seeder.enqueue_enrich(roots=("output",), compute_hashes=True) is True + mock.assert_called_once_with(roots=("output",), compute_hashes=True) + + def test_no_pending_when_started_immediately(self, seeder): + """No pending request should be stored when start_enrich succeeds.""" + with patch.object(seeder, "start_enrich", return_value=True): + seeder.enqueue_enrich(roots=("output",)) + assert seeder._pending_enrich is None + + +# --------------------------------------------------------------------------- +# enqueue_enrich – queuing when busy +# --------------------------------------------------------------------------- + + +class TestEnqueueEnrichQueuesWhenBusy: + def test_queues_when_busy(self, seeder): + """enqueue_enrich should store a pending request when seeder is busy.""" + with patch.object(seeder, "start_enrich", return_value=False): + result = seeder.enqueue_enrich(roots=("models",), compute_hashes=False) + + assert result is False + assert seeder._pending_enrich == { + "roots": ("models",), + "compute_hashes": False, + } + + def test_queues_preserves_compute_hashes_true(self, seeder): + with patch.object(seeder, "start_enrich", return_value=False): + seeder.enqueue_enrich(roots=("input",), compute_hashes=True) + + assert seeder._pending_enrich["compute_hashes"] is True + + +# --------------------------------------------------------------------------- +# enqueue_enrich – merging when a pending request already exists +# --------------------------------------------------------------------------- + + +class TestEnqueueEnrichMergesPending: + def _make_busy(self, seeder): + """Patch start_enrich to always return False (seeder busy).""" + return patch.object(seeder, "start_enrich", return_value=False) + + def test_merges_roots(self, seeder): + """A second enqueue should merge roots with the existing pending request.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models",)) + seeder.enqueue_enrich(roots=("output",)) + + merged = set(seeder._pending_enrich["roots"]) + assert merged == {"models", "output"} + + def test_merges_overlapping_roots(self, seeder): + """Duplicate roots should be deduplicated.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models", "input")) + seeder.enqueue_enrich(roots=("input", "output")) + + merged = set(seeder._pending_enrich["roots"]) + assert merged == {"models", "input", "output"} + + def test_compute_hashes_sticky_true(self, seeder): + """Once compute_hashes is True it should stay True after merging.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models",), compute_hashes=True) + seeder.enqueue_enrich(roots=("output",), compute_hashes=False) + + assert seeder._pending_enrich["compute_hashes"] is True + + def test_compute_hashes_upgrades_to_true(self, seeder): + """A later enqueue with compute_hashes=True should upgrade the pending request.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models",), compute_hashes=False) + seeder.enqueue_enrich(roots=("output",), compute_hashes=True) + + assert seeder._pending_enrich["compute_hashes"] is True + + def test_compute_hashes_stays_false(self, seeder): + """If both enqueues have compute_hashes=False it stays False.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models",), compute_hashes=False) + seeder.enqueue_enrich(roots=("output",), compute_hashes=False) + + assert seeder._pending_enrich["compute_hashes"] is False + + def test_triple_merge(self, seeder): + """Three successive enqueues should all merge correctly.""" + with self._make_busy(seeder): + seeder.enqueue_enrich(roots=("models",), compute_hashes=False) + seeder.enqueue_enrich(roots=("input",), compute_hashes=False) + seeder.enqueue_enrich(roots=("output",), compute_hashes=True) + + merged = set(seeder._pending_enrich["roots"]) + assert merged == {"models", "input", "output"} + assert seeder._pending_enrich["compute_hashes"] is True + + +# --------------------------------------------------------------------------- +# Pending enrich drains after scan completes +# --------------------------------------------------------------------------- + + +class TestPendingEnrichDrain: + """Verify that _run_scan drains _pending_enrich via start_enrich.""" + + @patch("app.assets.seeder.dependencies_available", return_value=True) + @patch("app.assets.seeder.get_all_known_prefixes", return_value=[]) + @patch("app.assets.seeder.sync_root_safely", return_value=set()) + @patch("app.assets.seeder.collect_paths_for_roots", return_value=[]) + @patch("app.assets.seeder.build_asset_specs", return_value=([], {}, 0)) + def test_pending_enrich_starts_after_scan(self, *_mocks): + """After a fast scan finishes, the pending enrich should be started.""" + seeder = _AssetSeeder() + + seeder._pending_enrich = { + "roots": ("output",), + "compute_hashes": True, + } + + with patch.object(seeder, "start_enrich", return_value=True) as mock_start: + seeder.start_fast(roots=("models",)) + seeder.wait(timeout=5) + + mock_start.assert_called_once_with( + roots=("output",), + compute_hashes=True, + ) + + assert seeder._pending_enrich is None + + @patch("app.assets.seeder.dependencies_available", return_value=True) + @patch("app.assets.seeder.get_all_known_prefixes", return_value=[]) + @patch("app.assets.seeder.sync_root_safely", return_value=set()) + @patch("app.assets.seeder.collect_paths_for_roots", return_value=[]) + @patch("app.assets.seeder.build_asset_specs", return_value=([], {}, 0)) + def test_pending_cleared_even_when_start_fails(self, *_mocks): + """_pending_enrich should be cleared even if start_enrich returns False.""" + seeder = _AssetSeeder() + seeder._pending_enrich = { + "roots": ("output",), + "compute_hashes": False, + } + + with patch.object(seeder, "start_enrich", return_value=False): + seeder.start_fast(roots=("models",)) + seeder.wait(timeout=5) + + assert seeder._pending_enrich is None + + @patch("app.assets.seeder.dependencies_available", return_value=True) + @patch("app.assets.seeder.get_all_known_prefixes", return_value=[]) + @patch("app.assets.seeder.sync_root_safely", return_value=set()) + @patch("app.assets.seeder.collect_paths_for_roots", return_value=[]) + @patch("app.assets.seeder.build_asset_specs", return_value=([], {}, 0)) + def test_no_drain_when_no_pending(self, *_mocks): + """start_enrich should not be called when there is no pending request.""" + seeder = _AssetSeeder() + assert seeder._pending_enrich is None + + with patch.object(seeder, "start_enrich", return_value=True) as mock_start: + seeder.start_fast(roots=("models",)) + seeder.wait(timeout=5) + + mock_start.assert_not_called() + + +# --------------------------------------------------------------------------- +# Thread-safety of enqueue_enrich +# --------------------------------------------------------------------------- + + +class TestEnqueueEnrichThreadSafety: + def test_concurrent_enqueues(self, seeder): + """Multiple threads enqueuing should not lose roots.""" + with patch.object(seeder, "start_enrich", return_value=False): + barrier = threading.Barrier(3) + + def enqueue(root): + barrier.wait() + seeder.enqueue_enrich(roots=(root,), compute_hashes=False) + + threads = [ + threading.Thread(target=enqueue, args=(r,)) + for r in ("models", "input", "output") + ] + for t in threads: + t.start() + for t in threads: + t.join(timeout=5) + + merged = set(seeder._pending_enrich["roots"]) + assert merged == {"models", "input", "output"}