refactor(assets): extract scanner logic into service modules

- Create file_utils.py with shared file utilities:
  - get_mtime_ns() - extract mtime in nanoseconds from stat
  - get_size_and_mtime_ns() - get both size and mtime
  - verify_file_unchanged() - check file matches DB mtime/size
  - list_files_recursively() - recursive directory listing

- Create bulk_ingest.py for bulk operations:
  - BulkInsertResult dataclass
  - batch_insert_seed_assets() - batch insert with conflict handling
  - prune_orphaned_assets() - clean up orphaned assets

- Update scanner.py to use new service modules instead of
  calling database queries directly

- Update ingest.py to use shared get_size_and_mtime_ns()

- Export new functions from services/__init__.py

Amp-Thread-ID: https://ampcode.com/threads/T-019c2ae7-f701-716a-a0dd-1feb988732fb
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-04 15:17:31 -08:00
parent 8c4eb9a659
commit bc92ae4a0d
6 changed files with 331 additions and 278 deletions

View File

@@ -52,48 +52,48 @@ from app.assets.database.queries.tags import (
)
__all__ = [
"asset_exists_by_hash",
"get_asset_by_hash",
"upsert_asset",
"bulk_insert_assets",
"asset_info_exists_for_asset_id",
"get_asset_info_by_id",
"insert_asset_info",
"get_or_create_asset_info",
"update_asset_info_timestamps",
"list_asset_infos_page",
"fetch_asset_info_asset_and_tags",
"fetch_asset_info_and_asset",
"update_asset_info_access_time",
"update_asset_info_name",
"update_asset_info_updated_at",
"set_asset_info_metadata",
"delete_asset_info_by_id",
"set_asset_info_preview",
"bulk_insert_asset_infos_ignore_conflicts",
"get_asset_info_ids_by_ids",
"CacheStateRow",
"list_cache_states_by_asset_id",
"upsert_cache_state",
"delete_cache_states_outside_prefixes",
"get_orphaned_seed_asset_ids",
"delete_assets_by_ids",
"get_cache_states_for_prefixes",
"bulk_set_needs_verify",
"delete_cache_states_by_ids",
"delete_orphaned_seed_asset",
"bulk_insert_cache_states_ignore_conflicts",
"get_cache_states_by_paths_and_asset_ids",
"ensure_tags_exist",
"get_asset_tags",
"set_asset_info_tags",
"add_tags_to_asset_info",
"remove_tags_from_asset_info",
"add_missing_tag_for_asset_id",
"remove_missing_tag_for_asset_id",
"list_tags_with_usage",
"bulk_insert_tags_and_meta",
"AddTagsDict",
"CacheStateRow",
"RemoveTagsDict",
"SetTagsDict",
"add_missing_tag_for_asset_id",
"add_tags_to_asset_info",
"asset_exists_by_hash",
"asset_info_exists_for_asset_id",
"bulk_insert_asset_infos_ignore_conflicts",
"bulk_insert_assets",
"bulk_insert_cache_states_ignore_conflicts",
"bulk_insert_tags_and_meta",
"bulk_set_needs_verify",
"delete_asset_info_by_id",
"delete_assets_by_ids",
"delete_cache_states_by_ids",
"delete_cache_states_outside_prefixes",
"delete_orphaned_seed_asset",
"ensure_tags_exist",
"fetch_asset_info_and_asset",
"fetch_asset_info_asset_and_tags",
"get_asset_by_hash",
"get_asset_info_by_id",
"get_asset_info_ids_by_ids",
"get_asset_tags",
"get_cache_states_by_paths_and_asset_ids",
"get_cache_states_for_prefixes",
"get_or_create_asset_info",
"get_orphaned_seed_asset_ids",
"insert_asset_info",
"list_asset_infos_page",
"list_cache_states_by_asset_id",
"list_tags_with_usage",
"remove_missing_tag_for_asset_id",
"remove_tags_from_asset_info",
"set_asset_info_metadata",
"set_asset_info_preview",
"set_asset_info_tags",
"update_asset_info_access_time",
"update_asset_info_name",
"update_asset_info_timestamps",
"update_asset_info_updated_at",
"upsert_asset",
"upsert_cache_state",
]