fix: address code review feedback

- Fix missing import for compute_filename_for_reference in ingest.py - Apply code review fixes across routes, queries, scanner, seeder, hashing, ingest, path_utils, main, and server - Update and add tests for sync references and seeder Amp-Thread-ID: https://ampcode.com/threads/T-019cb61a-ed54-738c-a05f-9b5242e513f3 Co-authored-by: Amp <amp@ampcode.com>
2026-03-07 22:30:00 +00:00 · 2026-03-03 15:51:35 -08:00
parent 3232f48a41
commit 4d4c2cedd3
13 changed files with 675 additions and 218 deletions
--- a/app/assets/scanner.py
+++ b/app/assets/scanner.py
@@ -44,9 +44,9 @@ from app.database.db import create_session, dependencies_available

 class _RefInfo(TypedDict):
    ref_id: str
-    fp: str
+    file_path: str
    exists: bool
-    fast_ok: bool
+    stat_unchanged: bool
    needs_verify: bool


@@ -75,9 +75,7 @@ def get_prefixes_for_root(root: RootType) -> list[str]:
 def get_all_known_prefixes() -> list[str]:
    """Get all known asset prefixes across all root types."""
    all_roots: tuple[RootType, ...] = ("models", "input", "output")
-    return [
-        os.path.abspath(p) for root in all_roots for p in get_prefixes_for_root(root)
-    ]
+    return [p for root in all_roots for p in get_prefixes_for_root(root)]


 def collect_models_files() -> list[str]:
@@ -110,10 +108,10 @@ def sync_references_with_filesystem(
 ) -> set[str] | None:
    """Reconcile asset references with filesystem for a root.

-    - Toggle needs_verify per reference using fast mtime/size check
-    - For hashed assets with at least one fast-ok ref: delete stale missing refs
+    - Toggle needs_verify per reference using mtime/size stat check
+    - For hashed assets with at least one stat-unchanged ref: delete stale missing refs
    - For seed assets with all refs missing: delete Asset and its references
-    - Optionally add/remove 'missing' tags based on fast-ok in this root
+    - Optionally add/remove 'missing' tags based on stat check in this root
    - Optionally return surviving absolute paths

    Args:
@@ -140,10 +138,10 @@ def sync_references_with_filesystem(
            acc = {"hash": row.asset_hash, "size_db": row.size_bytes, "refs": []}
            by_asset[row.asset_id] = acc

-        fast_ok = False
+        stat_unchanged = False
        try:
            exists = True
-            fast_ok = verify_file_unchanged(
+            stat_unchanged = verify_file_unchanged(
                mtime_db=row.mtime_ns,
                size_db=acc["size_db"],
                stat_result=os.stat(row.file_path, follow_symlinks=True),
@@ -160,9 +158,9 @@ def sync_references_with_filesystem(
        acc["refs"].append(
            {
                "ref_id": row.reference_id,
-                "fp": row.file_path,
+                "file_path": row.file_path,
                "exists": exists,
-                "fast_ok": fast_ok,
+                "stat_unchanged": stat_unchanged,
                "needs_verify": row.needs_verify,
            }
        )
@@ -177,18 +175,18 @@ def sync_references_with_filesystem(
    for aid, acc in by_asset.items():
        a_hash = acc["hash"]
        refs = acc["refs"]
-        any_fast_ok = any(r["fast_ok"] for r in refs)
+        any_unchanged = any(r["stat_unchanged"] for r in refs)
        all_missing = all(not r["exists"] for r in refs)

        for r in refs:
            if not r["exists"]:
                to_mark_missing.append(r["ref_id"])
                continue
-            if r["fast_ok"]:
+            if r["stat_unchanged"]:
                to_clear_missing.append(r["ref_id"])
                if r["needs_verify"]:
                    to_clear_verify.append(r["ref_id"])
-            if not r["fast_ok"] and not r["needs_verify"]:
+            if not r["stat_unchanged"] and not r["needs_verify"]:
                to_set_verify.append(r["ref_id"])

        if a_hash is None:
@@ -197,10 +195,10 @@ def sync_references_with_filesystem(
            else:
                for r in refs:
                    if r["exists"]:
-                        survivors.add(os.path.abspath(r["fp"]))
+                        survivors.add(os.path.abspath(r["file_path"]))
            continue

-        if any_fast_ok:
+        if any_unchanged:
            for r in refs:
                if not r["exists"]:
                    stale_ref_ids.append(r["ref_id"])
@@ -219,7 +217,7 @@ def sync_references_with_filesystem(

        for r in refs:
            if r["exists"]:
-                survivors.add(os.path.abspath(r["fp"]))
+                survivors.add(os.path.abspath(r["file_path"]))

    delete_references_by_ids(session, stale_ref_ids)
    stale_set = set(stale_ref_ids)
@@ -349,58 +347,6 @@ def build_asset_specs(
    return specs, tag_pool, skipped


-def build_stub_specs(
-    paths: list[str],
-    existing_paths: set[str],
-) -> tuple[list[SeedAssetSpec], set[str], int]:
-    """Build minimal stub specs for fast phase scanning.
-
-    Only collects filesystem metadata (stat), no file content reading.
-    This is the fastest possible scan to populate the asset database.
-
-    Args:
-        paths: List of file paths to process
-        existing_paths: Set of paths that already exist in the database
-
-    Returns:
-        Tuple of (specs, tag_pool, skipped_count)
-    """
-    specs: list[SeedAssetSpec] = []
-    tag_pool: set[str] = set()
-    skipped = 0
-
-    for p in paths:
-        abs_p = os.path.abspath(p)
-        if abs_p in existing_paths:
-            skipped += 1
-            continue
-        try:
-            stat_p = os.stat(abs_p, follow_symlinks=True)
-        except OSError:
-            continue
-        if not stat_p.st_size:
-            continue
-
-        name, tags = get_name_and_tags_from_asset_path(abs_p)
-        rel_fname = compute_relative_filename(abs_p)
-
-        specs.append(
-            {
-                "abs_path": abs_p,
-                "size_bytes": stat_p.st_size,
-                "mtime_ns": get_mtime_ns(stat_p),
-                "info_name": name,
-                "tags": tags,
-                "fname": rel_fname,
-                "metadata": None,
-                "hash": None,
-                "mime_type": None,
-            }
-        )
-        tag_pool.update(tags)
-
-    return specs, tag_pool, skipped
-

 def insert_asset_specs(specs: list[SeedAssetSpec], tag_pool: set[str]) -> int:
    """Insert asset specs into database, returning count of created refs."""
@@ -538,7 +484,8 @@ def enrich_asset(
        try:
            digest = compute_blake3_hash(file_path)
            full_hash = f"blake3:{digest}"
-            if not extract_metadata or metadata:
+            metadata_ok = not extract_metadata or metadata is not None
+            if metadata_ok:
                new_level = ENRICHMENT_HASHED
        except Exception as e:
            logging.warning("Failed to hash %s: %s", file_path, e)