From 53869fb0c7046ee552e3a12e1f21f7cc7a83470d Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Fri, 6 Feb 2026 20:09:54 -0800 Subject: [PATCH] Fix FK constraint violation in bulk_ingest by filtering dropped assets Co-authored-by: Amp Amp-Thread-ID: https://ampcode.com/threads/T-019c3626-c6ad-7139-a570-62da4e656a1a --- app/assets/database/queries/__init__.py | 2 ++ app/assets/database/queries/asset.py | 13 +++++++++++++ app/assets/services/bulk_ingest.py | 11 +++++++++++ main.py | 2 +- 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/app/assets/database/queries/__init__.py b/app/assets/database/queries/__init__.py index 474355f85..fa365fad0 100644 --- a/app/assets/database/queries/__init__.py +++ b/app/assets/database/queries/__init__.py @@ -2,6 +2,7 @@ from app.assets.database.queries.asset import ( asset_exists_by_hash, bulk_insert_assets, get_asset_by_hash, + get_existing_asset_ids, upsert_asset, ) from app.assets.database.queries.asset_info import ( @@ -76,6 +77,7 @@ __all__ = [ "fetch_asset_info_and_asset", "fetch_asset_info_asset_and_tags", "get_asset_by_hash", + "get_existing_asset_ids", "get_asset_info_by_id", "get_asset_info_ids_by_ids", "get_asset_tags", diff --git a/app/assets/database/queries/asset.py b/app/assets/database/queries/asset.py index cba937b1e..020e3bdcb 100644 --- a/app/assets/database/queries/asset.py +++ b/app/assets/database/queries/asset.py @@ -88,3 +88,16 @@ def bulk_insert_assets( ins = sqlite.insert(Asset).on_conflict_do_nothing(index_elements=[Asset.hash]) for chunk in iter_chunks(rows, calculate_rows_per_statement(5)): session.execute(ins, chunk) + + +def get_existing_asset_ids( + session: Session, + asset_ids: list[str], +) -> set[str]: + """Return the subset of asset_ids that exist in the database.""" + if not asset_ids: + return set() + rows = session.execute( + select(Asset.id).where(Asset.id.in_(asset_ids)) + ).fetchall() + return {row[0] for row in rows} diff --git a/app/assets/services/bulk_ingest.py b/app/assets/services/bulk_ingest.py index ba7872488..c0aabcc1e 100644 --- a/app/assets/services/bulk_ingest.py +++ b/app/assets/services/bulk_ingest.py @@ -16,6 +16,7 @@ from app.assets.database.queries import ( delete_assets_by_ids, get_asset_info_ids_by_ids, get_cache_states_by_paths_and_asset_ids, + get_existing_asset_ids, get_unreferenced_unhashed_asset_ids, mark_cache_states_missing_outside_prefixes, restore_cache_states_by_paths, @@ -202,6 +203,16 @@ def batch_insert_seed_assets( } bulk_insert_assets(session, asset_rows) + + # Filter cache states to only those whose assets were actually inserted + # (assets with duplicate hashes are silently dropped by ON CONFLICT DO NOTHING) + inserted_asset_ids = get_existing_asset_ids( + session, [r["asset_id"] for r in cache_state_rows] + ) + cache_state_rows = [ + r for r in cache_state_rows if r["asset_id"] in inserted_asset_ids + ] + bulk_insert_cache_states_ignore_conflicts(session, cache_state_rows) restore_cache_states_by_paths(session, absolute_path_list) winning_paths = get_cache_states_by_paths_and_asset_ids(session, path_to_asset_id) diff --git a/main.py b/main.py index 32339e70b..d6d0e5210 100644 --- a/main.py +++ b/main.py @@ -357,7 +357,7 @@ def setup_database(): if dependencies_available(): init_db() if not args.disable_assets_autoscan: - if asset_seeder.start(roots=("models", "input", "output"), prune_first=True): + if asset_seeder.start(roots=("models", "input", "output"), prune_first=True, compute_hashes=True): logging.info("Background asset scan initiated for models, input, output") except Exception as e: logging.error(f"Failed to initialize database. Please ensure you have installed the latest requirements. If the error persists, please report this as in future the database will be required: {e}")