mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-24 00:44:03 +00:00
refactor: move bulk_ops to queries and scanner service
- Delete bulk_ops.py, moving logic to appropriate layers - Add bulk insert query functions: - queries/asset.bulk_insert_assets - queries/cache_state.bulk_insert_cache_states_ignore_conflicts - queries/cache_state.get_cache_states_by_paths_and_asset_ids - queries/asset_info.bulk_insert_asset_infos_ignore_conflicts - queries/asset_info.get_asset_info_ids_by_ids - queries/tags.bulk_insert_tags_and_meta - Move seed_from_paths_batch orchestration to scanner._seed_from_paths_batch Amp-Thread-ID: https://ampcode.com/threads/T-019c24fd-157d-776a-ad24-4f19cf5d3afe Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
@@ -2,9 +2,11 @@ import contextlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import folder_paths
|
||||
from app.assets.database.bulk_ops import seed_from_paths_batch
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.assets.database.queries import (
|
||||
add_missing_tag_for_asset_id,
|
||||
ensure_tags_exist,
|
||||
@@ -16,6 +18,12 @@ from app.assets.database.queries import (
|
||||
bulk_set_needs_verify,
|
||||
delete_cache_states_by_ids,
|
||||
delete_orphaned_seed_asset,
|
||||
bulk_insert_assets,
|
||||
bulk_insert_cache_states_ignore_conflicts,
|
||||
get_cache_states_by_paths_and_asset_ids,
|
||||
bulk_insert_asset_infos_ignore_conflicts,
|
||||
get_asset_info_ids_by_ids,
|
||||
bulk_insert_tags_and_meta,
|
||||
)
|
||||
from app.assets.helpers import (
|
||||
collect_models_files,
|
||||
@@ -25,10 +33,157 @@ from app.assets.helpers import (
|
||||
list_tree,
|
||||
prefixes_for_root,
|
||||
RootType,
|
||||
utcnow,
|
||||
)
|
||||
from app.database.db import create_session, dependencies_available
|
||||
|
||||
|
||||
def _seed_from_paths_batch(
|
||||
session: Session,
|
||||
specs: list[dict],
|
||||
owner_id: str = "",
|
||||
) -> dict:
|
||||
"""Seed assets from filesystem specs in batch.
|
||||
|
||||
Each spec is a dict with keys:
|
||||
- abs_path: str
|
||||
- size_bytes: int
|
||||
- mtime_ns: int
|
||||
- info_name: str
|
||||
- tags: list[str]
|
||||
- fname: Optional[str]
|
||||
|
||||
This function orchestrates:
|
||||
1. Insert seed Assets (hash=NULL)
|
||||
2. Claim cache states with ON CONFLICT DO NOTHING
|
||||
3. Query to find winners (paths where our asset_id was inserted)
|
||||
4. Delete Assets for losers (path already claimed by another asset)
|
||||
5. Insert AssetInfo for winners
|
||||
6. Insert tags and metadata for successfully inserted AssetInfos
|
||||
|
||||
Returns:
|
||||
dict with keys: inserted_infos, won_states, lost_states
|
||||
"""
|
||||
if not specs:
|
||||
return {"inserted_infos": 0, "won_states": 0, "lost_states": 0}
|
||||
|
||||
now = utcnow()
|
||||
asset_rows: list[dict] = []
|
||||
state_rows: list[dict] = []
|
||||
path_to_asset: dict[str, str] = {}
|
||||
asset_to_info: dict[str, dict] = {}
|
||||
path_list: list[str] = []
|
||||
|
||||
for sp in specs:
|
||||
ap = os.path.abspath(sp["abs_path"])
|
||||
aid = str(uuid.uuid4())
|
||||
iid = str(uuid.uuid4())
|
||||
path_list.append(ap)
|
||||
path_to_asset[ap] = aid
|
||||
|
||||
asset_rows.append({
|
||||
"id": aid,
|
||||
"hash": None,
|
||||
"size_bytes": sp["size_bytes"],
|
||||
"mime_type": None,
|
||||
"created_at": now,
|
||||
})
|
||||
state_rows.append({
|
||||
"asset_id": aid,
|
||||
"file_path": ap,
|
||||
"mtime_ns": sp["mtime_ns"],
|
||||
})
|
||||
asset_to_info[aid] = {
|
||||
"id": iid,
|
||||
"owner_id": owner_id,
|
||||
"name": sp["info_name"],
|
||||
"asset_id": aid,
|
||||
"preview_id": None,
|
||||
"user_metadata": {"filename": sp["fname"]} if sp["fname"] else None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"last_access_time": now,
|
||||
"_tags": sp["tags"],
|
||||
"_filename": sp["fname"],
|
||||
}
|
||||
|
||||
# 1. Insert all seed Assets (hash=NULL)
|
||||
bulk_insert_assets(session, asset_rows)
|
||||
|
||||
# 2. Try to claim cache states (file_path unique)
|
||||
bulk_insert_cache_states_ignore_conflicts(session, state_rows)
|
||||
|
||||
# 3. Query to find which paths we won
|
||||
winners_by_path = get_cache_states_by_paths_and_asset_ids(session, path_to_asset)
|
||||
|
||||
all_paths_set = set(path_list)
|
||||
losers_by_path = all_paths_set - winners_by_path
|
||||
lost_assets = [path_to_asset[p] for p in losers_by_path]
|
||||
|
||||
# 4. Delete Assets for losers
|
||||
if lost_assets:
|
||||
delete_assets_by_ids(session, lost_assets)
|
||||
|
||||
if not winners_by_path:
|
||||
return {"inserted_infos": 0, "won_states": 0, "lost_states": len(losers_by_path)}
|
||||
|
||||
# 5. Insert AssetInfo for winners
|
||||
winner_info_rows = [asset_to_info[path_to_asset[p]] for p in winners_by_path]
|
||||
db_info_rows = [
|
||||
{
|
||||
"id": row["id"],
|
||||
"owner_id": row["owner_id"],
|
||||
"name": row["name"],
|
||||
"asset_id": row["asset_id"],
|
||||
"preview_id": row["preview_id"],
|
||||
"user_metadata": row["user_metadata"],
|
||||
"created_at": row["created_at"],
|
||||
"updated_at": row["updated_at"],
|
||||
"last_access_time": row["last_access_time"],
|
||||
}
|
||||
for row in winner_info_rows
|
||||
]
|
||||
bulk_insert_asset_infos_ignore_conflicts(session, db_info_rows)
|
||||
|
||||
# 6. Find which info rows were actually inserted
|
||||
all_info_ids = [row["id"] for row in winner_info_rows]
|
||||
inserted_info_ids = get_asset_info_ids_by_ids(session, all_info_ids)
|
||||
|
||||
# 7. Build and insert tag + meta rows
|
||||
tag_rows: list[dict] = []
|
||||
meta_rows: list[dict] = []
|
||||
if inserted_info_ids:
|
||||
for row in winner_info_rows:
|
||||
iid = row["id"]
|
||||
if iid not in inserted_info_ids:
|
||||
continue
|
||||
for t in row["_tags"]:
|
||||
tag_rows.append({
|
||||
"asset_info_id": iid,
|
||||
"tag_name": t,
|
||||
"origin": "automatic",
|
||||
"added_at": now,
|
||||
})
|
||||
if row["_filename"]:
|
||||
meta_rows.append({
|
||||
"asset_info_id": iid,
|
||||
"key": "filename",
|
||||
"ordinal": 0,
|
||||
"val_str": row["_filename"],
|
||||
"val_num": None,
|
||||
"val_bool": None,
|
||||
"val_json": None,
|
||||
})
|
||||
|
||||
bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=meta_rows)
|
||||
|
||||
return {
|
||||
"inserted_infos": len(inserted_info_ids),
|
||||
"won_states": len(winners_by_path),
|
||||
"lost_states": len(losers_by_path),
|
||||
}
|
||||
|
||||
|
||||
def prune_orphaned_assets(session, valid_prefixes: list[str]) -> int:
|
||||
"""Prune cache states outside valid prefixes, then delete orphaned seed assets.
|
||||
|
||||
@@ -229,7 +384,7 @@ def seed_assets(roots: tuple[RootType, ...], enable_logging: bool = False) -> No
|
||||
with create_session() as sess:
|
||||
if tag_pool:
|
||||
ensure_tags_exist(sess, tag_pool, tag_type="user")
|
||||
result = seed_from_paths_batch(sess, specs=specs, owner_id="")
|
||||
result = _seed_from_paths_batch(sess, specs=specs, owner_id="")
|
||||
created += result["inserted_infos"]
|
||||
sess.commit()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user