refactor(assets): two-phase ingest — stub insert then hash

ingest_existing_file() now inserts a stub record (hash=NULL) first for
instant UX visibility, then computes the BLAKE3 hash and runs the full
ingest pipeline. No compute_hash flag exposed — both phases always run.

Amp-Thread-ID: https://ampcode.com/threads/T-019cc013-1444-73c8-81d6-07cae6e5e38d
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-03-06 15:10:08 -08:00
parent 508cae643b
commit 5ac207e846

View File

@@ -23,6 +23,7 @@ from app.assets.database.queries import (
validate_tags_exist,
)
from app.assets.helpers import normalize_tags
from app.assets.services.bulk_ingest import batch_insert_seed_assets
from app.assets.services.file_utils import get_size_and_mtime_ns
from app.assets.services.path_utils import (
compute_relative_filename,
@@ -138,19 +139,35 @@ def ingest_existing_file(
) -> IngestResult:
"""Register an existing on-disk file as an asset.
Handles stat, BLAKE3 hash, MIME detection, and path-based tag derivation.
Deduplicates by hash (same content → same Asset, new AssetReference).
Uses a two-phase approach: first inserts a stub record (hash=NULL) to
unblock UX immediately, then computes the BLAKE3 hash and updates the
asset with the full ingest pipeline (dedup, metadata, tags).
"""
size_bytes, mtime_ns = get_size_and_mtime_ns(abs_path)
digest, _ = hashing.compute_blake3_hash(abs_path)
asset_hash = "blake3:" + digest
mime_type = mimetypes.guess_type(abs_path, strict=False)[0]
name, path_tags = get_name_and_tags_from_asset_path(abs_path)
tags = list(dict.fromkeys(path_tags + list(extra_tags)))
# Phase 1: fast stub insert (hash=NULL) to make the asset visible immediately
spec = {
"abs_path": abs_path,
"size_bytes": size_bytes,
"mtime_ns": mtime_ns,
"info_name": name,
"tags": tags,
"fname": os.path.basename(abs_path),
"metadata": None,
"hash": None,
"mime_type": mime_type,
}
with create_session() as session:
batch_insert_seed_assets(session, [spec], owner_id=owner_id)
session.commit()
# Phase 2: compute hash and run full ingest (dedup, metadata, tags)
digest, _ = hashing.compute_blake3_hash(abs_path)
asset_hash = "blake3:" + digest
return _ingest_file_from_path(
abs_path=abs_path,
asset_hash=asset_hash,