feat(assets): register output files as assets after prompt execution

Add ingest_existing_file() to services/ingest.py as a public wrapper for
registering on-disk files (stat, BLAKE3 hash, MIME detection, path-based
tag derivation).

After each prompt execution in the main loop, iterate
history_result['outputs'] and register files with type 'output' as
assets. Runs while the asset seeder is paused, gated behind
asset_seeder.is_disabled(). Stores prompt_id in user_metadata for
provenance tracking.

Ingest uses a two-phase approach: insert a stub record (hash=NULL) first
for instant visibility, then defer hashing to the background seeder
enrich phase to avoid blocking the prompt worker thread.

When multiple enrich scans are enqueued while the seeder is busy, roots
are now unioned and compute_hashes uses sticky-true (OR) logic so no
queued work is silently dropped.

Extract _reset_to_idle helper in the asset seeder to deduplicate the
state reset pattern shared by _run_scan and mark_missing_outside_prefixes.

Separate history parsing from output file registration: move generic
file registration logic into register_output_files() in
app/assets/services/ingest.py, keeping only the ComfyUI history format
parsing (_collect_output_absolute_paths) in main.py.
This commit is contained in:
Luke Mino-Altherr
2026-03-06 14:51:22 -08:00
parent 06f85e2c79
commit 42eda2b6fc
5 changed files with 387 additions and 7 deletions

View File

@@ -92,6 +92,7 @@ class _AssetSeeder:
self._prune_first: bool = False
self._progress_callback: ProgressCallback | None = None
self._disabled: bool = False
self._pending_enrich: dict | None = None
def disable(self) -> None:
"""Disable the asset seeder, preventing any scans from starting."""
@@ -196,6 +197,42 @@ class _AssetSeeder:
compute_hashes=compute_hashes,
)
def enqueue_enrich(
self,
roots: tuple[RootType, ...] = ("models", "input", "output"),
compute_hashes: bool = False,
) -> bool:
"""Start an enrichment scan now, or queue it for after the current scan.
If the seeder is idle, starts immediately. Otherwise, the enrich
request is stored and will run automatically when the current scan
finishes.
Args:
roots: Tuple of root types to scan
compute_hashes: If True, compute blake3 hashes
Returns:
True if started immediately, False if queued for later
"""
if self.start_enrich(roots=roots, compute_hashes=compute_hashes):
return True
with self._lock:
if self._pending_enrich is not None:
existing_roots = set(self._pending_enrich["roots"])
existing_roots.update(roots)
self._pending_enrich["roots"] = tuple(existing_roots)
self._pending_enrich["compute_hashes"] = (
self._pending_enrich["compute_hashes"] or compute_hashes
)
else:
self._pending_enrich = {
"roots": roots,
"compute_hashes": compute_hashes,
}
logging.info("Enrich scan queued (roots=%s)", self._pending_enrich["roots"])
return False
def cancel(self) -> bool:
"""Request cancellation of the current scan.
@@ -381,9 +418,13 @@ class _AssetSeeder:
return marked
finally:
with self._lock:
self._last_progress = self._progress
self._state = State.IDLE
self._progress = None
self._reset_to_idle()
def _reset_to_idle(self) -> None:
"""Reset state to IDLE, preserving last progress. Caller must hold _lock."""
self._last_progress = self._progress
self._state = State.IDLE
self._progress = None
def _is_cancelled(self) -> bool:
"""Check if cancellation has been requested."""
@@ -594,9 +635,14 @@ class _AssetSeeder:
},
)
with self._lock:
self._last_progress = self._progress
self._state = State.IDLE
self._progress = None
self._reset_to_idle()
pending = self._pending_enrich
self._pending_enrich = None
if pending is not None:
self.start_enrich(
roots=pending["roots"],
compute_hashes=pending["compute_hashes"],
)
def _run_fast_phase(self, roots: tuple[RootType, ...]) -> tuple[int, int, int]:
"""Run phase 1: fast scan to create stub records.

View File

@@ -23,6 +23,8 @@ from app.assets.services.ingest import (
DependencyMissingError,
HashMismatchError,
create_from_hash,
ingest_existing_file,
register_output_files,
upload_from_temp_path,
)
from app.assets.database.queries import (
@@ -72,6 +74,8 @@ __all__ = [
"delete_asset_reference",
"get_asset_by_hash",
"get_asset_detail",
"ingest_existing_file",
"register_output_files",
"get_mtime_ns",
"get_size_and_mtime_ns",
"list_assets_page",

View File

@@ -23,9 +23,11 @@ from app.assets.database.queries import (
validate_tags_exist,
)
from app.assets.helpers import normalize_tags
from app.assets.services.bulk_ingest import batch_insert_seed_assets
from app.assets.services.file_utils import get_size_and_mtime_ns
from app.assets.services.path_utils import (
compute_relative_filename,
get_name_and_tags_from_asset_path,
resolve_destination_from_tags,
validate_path_within_base,
)
@@ -128,6 +130,59 @@ def _ingest_file_from_path(
)
def register_output_files(
file_paths: Sequence[str],
user_metadata: UserMetadata = None,
) -> int:
"""Register a batch of output file paths as assets.
Returns the number of files successfully registered.
"""
registered = 0
for abs_path in file_paths:
if not os.path.isfile(abs_path):
continue
try:
ingest_existing_file(abs_path, user_metadata=user_metadata)
registered += 1
except Exception:
logging.exception("Failed to register output: %s", abs_path)
return registered
def ingest_existing_file(
abs_path: str,
user_metadata: UserMetadata = None,
extra_tags: Sequence[str] = (),
owner_id: str = "",
) -> None:
"""Register an existing on-disk file as an asset stub.
Inserts a stub record (hash=NULL) for immediate UX visibility.
The caller is responsible for triggering background enrichment
(hash computation, metadata extraction) via the asset seeder.
"""
size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path)
mime_type = mimetypes.guess_type(abs_path, strict=False)[0]
name, path_tags = get_name_and_tags_from_asset_path(abs_path)
tags = list(dict.fromkeys(path_tags + list(extra_tags)))
spec = {
"abs_path": abs_path,
"size_bytes": size_bytes,
"mtime_ns": mtime_ns,
"info_name": name,
"tags": tags,
"fname": os.path.basename(abs_path),
"metadata": None,
"hash": None,
"mime_type": mime_type,
}
with create_session() as session:
batch_insert_seed_assets(session, [spec], owner_id=owner_id)
session.commit()
def _register_existing_asset(
asset_hash: str,
name: str,