From 5ac207e8469c5f3781d406457d65e0653c869e2f Mon Sep 17 00:00:00 2001 From: Luke Mino-Altherr Date: Fri, 6 Mar 2026 15:10:08 -0800 Subject: [PATCH] =?UTF-8?q?refactor(assets):=20two-phase=20ingest=20?= =?UTF-8?q?=E2=80=94=20stub=20insert=20then=20hash?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ingest_existing_file() now inserts a stub record (hash=NULL) first for instant UX visibility, then computes the BLAKE3 hash and runs the full ingest pipeline. No compute_hash flag exposed — both phases always run. Amp-Thread-ID: https://ampcode.com/threads/T-019cc013-1444-73c8-81d6-07cae6e5e38d Co-authored-by: Amp --- app/assets/services/ingest.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/app/assets/services/ingest.py b/app/assets/services/ingest.py index 86e6ac521..eb2afd4c0 100644 --- a/app/assets/services/ingest.py +++ b/app/assets/services/ingest.py @@ -23,6 +23,7 @@ from app.assets.database.queries import ( validate_tags_exist, ) from app.assets.helpers import normalize_tags +from app.assets.services.bulk_ingest import batch_insert_seed_assets from app.assets.services.file_utils import get_size_and_mtime_ns from app.assets.services.path_utils import ( compute_relative_filename, @@ -138,19 +139,35 @@ def ingest_existing_file( ) -> IngestResult: """Register an existing on-disk file as an asset. - Handles stat, BLAKE3 hash, MIME detection, and path-based tag derivation. - Deduplicates by hash (same content → same Asset, new AssetReference). + Uses a two-phase approach: first inserts a stub record (hash=NULL) to + unblock UX immediately, then computes the BLAKE3 hash and updates the + asset with the full ingest pipeline (dedup, metadata, tags). """ size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path) - - digest, _ = hashing.compute_blake3_hash(abs_path) - asset_hash = "blake3:" + digest - mime_type = mimetypes.guess_type(abs_path, strict=False)[0] - name, path_tags = get_name_and_tags_from_asset_path(abs_path) tags = list(dict.fromkeys(path_tags + list(extra_tags))) + # Phase 1: fast stub insert (hash=NULL) to make the asset visible immediately + spec = { + "abs_path": abs_path, + "size_bytes": size_bytes, + "mtime_ns": mtime_ns, + "info_name": name, + "tags": tags, + "fname": os.path.basename(abs_path), + "metadata": None, + "hash": None, + "mime_type": mime_type, + } + with create_session() as session: + batch_insert_seed_assets(session, [spec], owner_id=owner_id) + session.commit() + + # Phase 2: compute hash and run full ingest (dedup, metadata, tags) + digest, _ = hashing.compute_blake3_hash(abs_path) + asset_hash = "blake3:" + digest + return _ingest_file_from_path( abs_path=abs_path, asset_hash=asset_hash,