refactor(assets): modular architecture + async two-phase scanner & background seeder (#12621)

2026-03-09 15:20:06 +00:00 · 2026-03-07 17:37:25 -08:00
parent a7a6335be5
commit 29b24cb517
62 changed files with 10737 additions and 2878 deletions
--- a/tests-unit/assets_test/conftest.py
+++ b/tests-unit/assets_test/conftest.py
@@ -108,7 +108,7 @@ def comfy_url_and_proc(comfy_tmp_base_dir: Path, request: pytest.FixtureRequest)
            "main.py",
            f"--base-directory={str(comfy_tmp_base_dir)}",
            f"--database-url={db_url}",
-            "--disable-assets-autoscan",
+            "--enable-assets",
            "--listen",
            "127.0.0.1",
            "--port",
@@ -212,7 +212,7 @@ def asset_factory(http: requests.Session, api_base: str):

    for aid in created:
        with contextlib.suppress(Exception):
-            http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
+            http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)


@pytest.fixture
@@ -258,14 +258,4 @@ def autoclean_unit_test_assets(http: requests.Session, api_base: str):
            break
        for aid in ids:
            with contextlib.suppress(Exception):
-                http.delete(f"{api_base}/api/assets/{aid}", timeout=30)
-
-
-def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None:
-    """Force a fast sync/seed pass by calling the seed endpoint."""
-    session.post(base_url + "/api/assets/seed", json={"roots": ["models", "input", "output"]}, timeout=30)
-    time.sleep(0.2)
-
-
-def get_asset_filename(asset_hash: str, extension: str) -> str:
-    return asset_hash.removeprefix("blake3:") + extension
+                http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=30)
--- a/tests-unit/assets_test/helpers.py
+++ b/tests-unit/assets_test/helpers.py
@@ -0,0 +1,28 @@
+"""Helper functions for assets integration tests."""
+import time
+
+import requests
+
+
+def trigger_sync_seed_assets(session: requests.Session, base_url: str) -> None:
+    """Force a synchronous sync/seed pass by calling the seed endpoint with wait=true.
+
+    Retries on 409 (already running) until the previous scan finishes.
+    """
+    deadline = time.monotonic() + 60
+    while True:
+        r = session.post(
+            base_url + "/api/assets/seed?wait=true",
+            json={"roots": ["models", "input", "output"]},
+            timeout=60,
+        )
+        if r.status_code != 409:
+            assert r.status_code == 200, f"seed endpoint returned {r.status_code}: {r.text}"
+            return
+        if time.monotonic() > deadline:
+            raise TimeoutError("seed endpoint stuck in 409 (already running)")
+        time.sleep(0.25)
+
+
+def get_asset_filename(asset_hash: str, extension: str) -> str:
+    return asset_hash.removeprefix("blake3:") + extension
--- a/tests-unit/assets_test/queries/conftest.py
+++ b/tests-unit/assets_test/queries/conftest.py
@@ -0,0 +1,20 @@
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Base
+
+
+@pytest.fixture
+def session():
+    """In-memory SQLite session for fast unit tests."""
+    engine = create_engine("sqlite:///:memory:")
+    Base.metadata.create_all(engine)
+    with Session(engine) as sess:
+        yield sess
+
+
+@pytest.fixture(autouse=True)
+def autoclean_unit_test_assets():
+    """Override parent autouse fixture - query tests don't need server cleanup."""
+    yield
--- a/tests-unit/assets_test/queries/test_asset.py
+++ b/tests-unit/assets_test/queries/test_asset.py
@@ -0,0 +1,144 @@
+import uuid
+
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.helpers import get_utc_now
+from app.assets.database.models import Asset
+from app.assets.database.queries import (
+    asset_exists_by_hash,
+    get_asset_by_hash,
+    upsert_asset,
+    bulk_insert_assets,
+)
+
+
+class TestAssetExistsByHash:
+    @pytest.mark.parametrize(
+        "setup_hash,query_hash,expected",
+        [
+            (None, "nonexistent", False),  # No asset exists
+            ("blake3:abc123", "blake3:abc123", True),  # Asset exists with matching hash
+            (None, "", False),  # Null hash in DB doesn't match empty string
+        ],
+        ids=["nonexistent", "existing", "null_hash_no_match"],
+    )
+    def test_exists_by_hash(self, session: Session, setup_hash, query_hash, expected):
+        if setup_hash is not None or query_hash == "":
+            asset = Asset(hash=setup_hash, size_bytes=100)
+            session.add(asset)
+            session.commit()
+
+        assert asset_exists_by_hash(session, asset_hash=query_hash) is expected
+
+
+class TestGetAssetByHash:
+    @pytest.mark.parametrize(
+        "setup_hash,query_hash,should_find",
+        [
+            (None, "nonexistent", False),
+            ("blake3:def456", "blake3:def456", True),
+        ],
+        ids=["nonexistent", "existing"],
+    )
+    def test_get_by_hash(self, session: Session, setup_hash, query_hash, should_find):
+        if setup_hash is not None:
+            asset = Asset(hash=setup_hash, size_bytes=200, mime_type="image/png")
+            session.add(asset)
+            session.commit()
+
+        result = get_asset_by_hash(session, asset_hash=query_hash)
+        if should_find:
+            assert result is not None
+            assert result.size_bytes == 200
+            assert result.mime_type == "image/png"
+        else:
+            assert result is None
+
+
+class TestUpsertAsset:
+    @pytest.mark.parametrize(
+        "first_size,first_mime,second_size,second_mime,expect_created,expect_updated,final_size,final_mime",
+        [
+            # New asset creation
+            (None, None, 1024, "application/octet-stream", True, False, 1024, "application/octet-stream"),
+            # Existing asset, same values - no update
+            (500, "text/plain", 500, "text/plain", False, False, 500, "text/plain"),
+            # Existing asset with size 0, update with new values
+            (0, None, 2048, "image/png", False, True, 2048, "image/png"),
+            # Existing asset, second call with size 0 - no update
+            (1000, None, 0, None, False, False, 1000, None),
+        ],
+        ids=["new_asset", "existing_no_change", "update_from_zero", "zero_size_no_update"],
+    )
+    def test_upsert_scenarios(
+        self,
+        session: Session,
+        first_size,
+        first_mime,
+        second_size,
+        second_mime,
+        expect_created,
+        expect_updated,
+        final_size,
+        final_mime,
+    ):
+        asset_hash = f"blake3:test_{first_size}_{second_size}"
+
+        # First upsert (if first_size is not None, we're testing the second call)
+        if first_size is not None:
+            upsert_asset(
+                session,
+                asset_hash=asset_hash,
+                size_bytes=first_size,
+                mime_type=first_mime,
+            )
+            session.commit()
+
+        # The upsert call we're testing
+        asset, created, updated = upsert_asset(
+            session,
+            asset_hash=asset_hash,
+            size_bytes=second_size,
+            mime_type=second_mime,
+        )
+        session.commit()
+
+        assert created is expect_created
+        assert updated is expect_updated
+        assert asset.size_bytes == final_size
+        assert asset.mime_type == final_mime
+
+
+class TestBulkInsertAssets:
+    def test_inserts_multiple_assets(self, session: Session):
+        now = get_utc_now()
+        rows = [
+            {"id": str(uuid.uuid4()), "hash": "blake3:bulk1", "size_bytes": 100, "mime_type": "text/plain", "created_at": now},
+            {"id": str(uuid.uuid4()), "hash": "blake3:bulk2", "size_bytes": 200, "mime_type": "image/png", "created_at": now},
+            {"id": str(uuid.uuid4()), "hash": "blake3:bulk3", "size_bytes": 300, "mime_type": None, "created_at": now},
+        ]
+        bulk_insert_assets(session, rows)
+        session.commit()
+
+        assets = session.query(Asset).all()
+        assert len(assets) == 3
+        hashes = {a.hash for a in assets}
+        assert hashes == {"blake3:bulk1", "blake3:bulk2", "blake3:bulk3"}
+
+    def test_empty_list_is_noop(self, session: Session):
+        bulk_insert_assets(session, [])
+        session.commit()
+        assert session.query(Asset).count() == 0
+
+    def test_handles_large_batch(self, session: Session):
+        """Test chunking logic with more rows than MAX_BIND_PARAMS allows."""
+        now = get_utc_now()
+        rows = [
+            {"id": str(uuid.uuid4()), "hash": f"blake3:large{i}", "size_bytes": i, "mime_type": None, "created_at": now}
+            for i in range(200)
+        ]
+        bulk_insert_assets(session, rows)
+        session.commit()
+
+        assert session.query(Asset).count() == 200
--- a/tests-unit/assets_test/queries/test_asset_info.py
+++ b/tests-unit/assets_test/queries/test_asset_info.py
@@ -0,0 +1,517 @@
+import time
+import uuid
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
+from app.assets.database.queries import (
+    reference_exists_for_asset_id,
+    get_reference_by_id,
+    insert_reference,
+    get_or_create_reference,
+    update_reference_timestamps,
+    list_references_page,
+    fetch_reference_asset_and_tags,
+    fetch_reference_and_asset,
+    update_reference_access_time,
+    set_reference_metadata,
+    delete_reference_by_id,
+    set_reference_preview,
+    bulk_insert_references_ignore_conflicts,
+    get_reference_ids_by_ids,
+    ensure_tags_exist,
+    add_tags_to_reference,
+)
+from app.assets.helpers import get_utc_now
+
+
+def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=size, mime_type="application/octet-stream")
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(
+    session: Session,
+    asset: Asset,
+    name: str = "test",
+    owner_id: str = "",
+) -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        owner_id=owner_id,
+        name=name,
+        asset_id=asset.id,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+    return ref
+
+
+class TestReferenceExistsForAssetId:
+    def test_returns_false_when_no_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        assert reference_exists_for_asset_id(session, asset_id=asset.id) is False
+
+    def test_returns_true_when_reference_exists(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset)
+        assert reference_exists_for_asset_id(session, asset_id=asset.id) is True
+
+
+class TestGetReferenceById:
+    def test_returns_none_for_nonexistent(self, session: Session):
+        assert get_reference_by_id(session, reference_id="nonexistent") is None
+
+    def test_returns_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset, name="myfile.txt")
+
+        result = get_reference_by_id(session, reference_id=ref.id)
+        assert result is not None
+        assert result.name == "myfile.txt"
+
+
+class TestListReferencesPage:
+    def test_empty_db(self, session: Session):
+        refs, tag_map, total = list_references_page(session)
+        assert refs == []
+        assert tag_map == {}
+        assert total == 0
+
+    def test_returns_references_with_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset, name="test.bin")
+        ensure_tags_exist(session, ["alpha", "beta"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
+        session.commit()
+
+        refs, tag_map, total = list_references_page(session)
+        assert len(refs) == 1
+        assert refs[0].id == ref.id
+        assert set(tag_map[ref.id]) == {"alpha", "beta"}
+        assert total == 1
+
+    def test_name_contains_filter(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, name="model_v1.safetensors")
+        _make_reference(session, asset, name="config.json")
+        session.commit()
+
+        refs, _, total = list_references_page(session, name_contains="model")
+        assert total == 1
+        assert refs[0].name == "model_v1.safetensors"
+
+    def test_owner_visibility(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, name="public", owner_id="")
+        _make_reference(session, asset, name="private", owner_id="user1")
+        session.commit()
+
+        # Empty owner sees only public
+        refs, _, total = list_references_page(session, owner_id="")
+        assert total == 1
+        assert refs[0].name == "public"
+
+        # Owner sees both
+        refs, _, total = list_references_page(session, owner_id="user1")
+        assert total == 2
+
+    def test_include_tags_filter(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1 = _make_reference(session, asset, name="tagged")
+        _make_reference(session, asset, name="untagged")
+        ensure_tags_exist(session, ["wanted"])
+        add_tags_to_reference(session, reference_id=ref1.id, tags=["wanted"])
+        session.commit()
+
+        refs, _, total = list_references_page(session, include_tags=["wanted"])
+        assert total == 1
+        assert refs[0].name == "tagged"
+
+    def test_exclude_tags_filter(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, name="keep")
+        ref_exclude = _make_reference(session, asset, name="exclude")
+        ensure_tags_exist(session, ["bad"])
+        add_tags_to_reference(session, reference_id=ref_exclude.id, tags=["bad"])
+        session.commit()
+
+        refs, _, total = list_references_page(session, exclude_tags=["bad"])
+        assert total == 1
+        assert refs[0].name == "keep"
+
+    def test_sorting(self, session: Session):
+        asset = _make_asset(session, "hash1", size=100)
+        asset2 = _make_asset(session, "hash2", size=500)
+        _make_reference(session, asset, name="small")
+        _make_reference(session, asset2, name="large")
+        session.commit()
+
+        refs, _, _ = list_references_page(session, sort="size", order="desc")
+        assert refs[0].name == "large"
+
+        refs, _, _ = list_references_page(session, sort="name", order="asc")
+        assert refs[0].name == "large"
+
+
+class TestFetchReferenceAssetAndTags:
+    def test_returns_none_for_nonexistent(self, session: Session):
+        result = fetch_reference_asset_and_tags(session, "nonexistent")
+        assert result is None
+
+    def test_returns_tuple(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset, name="test.bin")
+        ensure_tags_exist(session, ["tag1"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["tag1"])
+        session.commit()
+
+        result = fetch_reference_asset_and_tags(session, ref.id)
+        assert result is not None
+        ret_ref, ret_asset, ret_tags = result
+        assert ret_ref.id == ref.id
+        assert ret_asset.id == asset.id
+        assert ret_tags == ["tag1"]
+
+
+class TestFetchReferenceAndAsset:
+    def test_returns_none_for_nonexistent(self, session: Session):
+        result = fetch_reference_and_asset(session, reference_id="nonexistent")
+        assert result is None
+
+    def test_returns_tuple(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        result = fetch_reference_and_asset(session, reference_id=ref.id)
+        assert result is not None
+        ret_ref, ret_asset = result
+        assert ret_ref.id == ref.id
+        assert ret_asset.id == asset.id
+
+
+class TestUpdateReferenceAccessTime:
+    def test_updates_last_access_time(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        original_time = ref.last_access_time
+        session.commit()
+
+        import time
+        time.sleep(0.01)
+
+        update_reference_access_time(session, reference_id=ref.id)
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.last_access_time > original_time
+
+
+class TestDeleteReferenceById:
+    def test_deletes_existing(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        result = delete_reference_by_id(session, reference_id=ref.id, owner_id="")
+        assert result is True
+        assert get_reference_by_id(session, reference_id=ref.id) is None
+
+    def test_returns_false_for_nonexistent(self, session: Session):
+        result = delete_reference_by_id(session, reference_id="nonexistent", owner_id="")
+        assert result is False
+
+    def test_respects_owner_visibility(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        result = delete_reference_by_id(session, reference_id=ref.id, owner_id="user2")
+        assert result is False
+        assert get_reference_by_id(session, reference_id=ref.id) is not None
+
+
+class TestSetReferencePreview:
+    def test_sets_preview(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        preview_asset = _make_asset(session, "preview_hash")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        set_reference_preview(session, reference_id=ref.id, preview_asset_id=preview_asset.id)
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.preview_id == preview_asset.id
+
+    def test_clears_preview(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        preview_asset = _make_asset(session, "preview_hash")
+        ref = _make_reference(session, asset)
+        ref.preview_id = preview_asset.id
+        session.commit()
+
+        set_reference_preview(session, reference_id=ref.id, preview_asset_id=None)
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.preview_id is None
+
+    def test_raises_for_nonexistent_reference(self, session: Session):
+        with pytest.raises(ValueError, match="not found"):
+            set_reference_preview(session, reference_id="nonexistent", preview_asset_id=None)
+
+    def test_raises_for_nonexistent_preview(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        with pytest.raises(ValueError, match="Preview Asset"):
+            set_reference_preview(session, reference_id=ref.id, preview_asset_id="nonexistent")
+
+
+class TestInsertReference:
+    def test_creates_new_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = insert_reference(
+            session, asset_id=asset.id, owner_id="user1", name="test.bin"
+        )
+        session.commit()
+
+        assert ref is not None
+        assert ref.name == "test.bin"
+        assert ref.owner_id == "user1"
+
+    def test_allows_duplicate_names(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1 = insert_reference(session, asset_id=asset.id, owner_id="user1", name="dup.bin")
+        session.commit()
+
+        # Duplicate names are now allowed
+        ref2 = insert_reference(
+            session, asset_id=asset.id, owner_id="user1", name="dup.bin"
+        )
+        session.commit()
+
+        assert ref1 is not None
+        assert ref2 is not None
+        assert ref1.id != ref2.id
+
+
+class TestGetOrCreateReference:
+    def test_creates_new_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref, created = get_or_create_reference(
+            session, asset_id=asset.id, owner_id="user1", name="new.bin"
+        )
+        session.commit()
+
+        assert created is True
+        assert ref.name == "new.bin"
+
+    def test_always_creates_new_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1, created1 = get_or_create_reference(
+            session, asset_id=asset.id, owner_id="user1", name="existing.bin"
+        )
+        session.commit()
+
+        # Duplicate names are allowed, so always creates new
+        ref2, created2 = get_or_create_reference(
+            session, asset_id=asset.id, owner_id="user1", name="existing.bin"
+        )
+        session.commit()
+
+        assert created1 is True
+        assert created2 is True
+        assert ref1.id != ref2.id
+
+
+class TestUpdateReferenceTimestamps:
+    def test_updates_timestamps(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        original_updated_at = ref.updated_at
+        session.commit()
+
+        time.sleep(0.01)
+        update_reference_timestamps(session, ref)
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.updated_at > original_updated_at
+
+    def test_updates_preview_id(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        preview_asset = _make_asset(session, "preview_hash")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        update_reference_timestamps(session, ref, preview_id=preview_asset.id)
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.preview_id == preview_asset.id
+
+
+class TestSetReferenceMetadata:
+    def test_sets_metadata(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        set_reference_metadata(
+            session, reference_id=ref.id, user_metadata={"key": "value"}
+        )
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.user_metadata == {"key": "value"}
+        # Check metadata table
+        meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
+        assert len(meta) == 1
+        assert meta[0].key == "key"
+        assert meta[0].val_str == "value"
+
+    def test_replaces_existing_metadata(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        set_reference_metadata(
+            session, reference_id=ref.id, user_metadata={"old": "data"}
+        )
+        session.commit()
+
+        set_reference_metadata(
+            session, reference_id=ref.id, user_metadata={"new": "data"}
+        )
+        session.commit()
+
+        meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
+        assert len(meta) == 1
+        assert meta[0].key == "new"
+
+    def test_clears_metadata_with_empty_dict(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        set_reference_metadata(
+            session, reference_id=ref.id, user_metadata={"key": "value"}
+        )
+        session.commit()
+
+        set_reference_metadata(
+            session, reference_id=ref.id, user_metadata={}
+        )
+        session.commit()
+
+        session.refresh(ref)
+        assert ref.user_metadata == {}
+        meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
+        assert len(meta) == 0
+
+    def test_raises_for_nonexistent(self, session: Session):
+        with pytest.raises(ValueError, match="not found"):
+            set_reference_metadata(
+                session, reference_id="nonexistent", user_metadata={"key": "value"}
+            )
+
+
+class TestBulkInsertReferencesIgnoreConflicts:
+    def test_inserts_multiple_references(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        now = get_utc_now()
+        rows = [
+            {
+                "id": str(uuid.uuid4()),
+                "owner_id": "",
+                "name": "bulk1.bin",
+                "asset_id": asset.id,
+                "preview_id": None,
+                "user_metadata": {},
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+            {
+                "id": str(uuid.uuid4()),
+                "owner_id": "",
+                "name": "bulk2.bin",
+                "asset_id": asset.id,
+                "preview_id": None,
+                "user_metadata": {},
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+        ]
+        bulk_insert_references_ignore_conflicts(session, rows)
+        session.commit()
+
+        refs = session.query(AssetReference).all()
+        assert len(refs) == 2
+
+    def test_allows_duplicate_names(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, name="existing.bin", owner_id="")
+        session.commit()
+
+        now = get_utc_now()
+        rows = [
+            {
+                "id": str(uuid.uuid4()),
+                "owner_id": "",
+                "name": "existing.bin",
+                "asset_id": asset.id,
+                "preview_id": None,
+                "user_metadata": {},
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+            {
+                "id": str(uuid.uuid4()),
+                "owner_id": "",
+                "name": "new.bin",
+                "asset_id": asset.id,
+                "preview_id": None,
+                "user_metadata": {},
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+        ]
+        bulk_insert_references_ignore_conflicts(session, rows)
+        session.commit()
+
+        # Duplicate names allowed, so all 3 rows exist
+        refs = session.query(AssetReference).all()
+        assert len(refs) == 3
+
+    def test_empty_list_is_noop(self, session: Session):
+        bulk_insert_references_ignore_conflicts(session, [])
+        assert session.query(AssetReference).count() == 0
+
+
+class TestGetReferenceIdsByIds:
+    def test_returns_existing_ids(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1 = _make_reference(session, asset, name="a.bin")
+        ref2 = _make_reference(session, asset, name="b.bin")
+        session.commit()
+
+        found = get_reference_ids_by_ids(session, [ref1.id, ref2.id, "nonexistent"])
+
+        assert found == {ref1.id, ref2.id}
+
+    def test_empty_list_returns_empty(self, session: Session):
+        found = get_reference_ids_by_ids(session, [])
+        assert found == set()
--- a/tests-unit/assets_test/queries/test_cache_state.py
+++ b/tests-unit/assets_test/queries/test_cache_state.py
@@ -0,0 +1,499 @@
+"""Tests for cache_state (AssetReference file path) query functions."""
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference
+from app.assets.database.queries import (
+    list_references_by_asset_id,
+    upsert_reference,
+    get_unreferenced_unhashed_asset_ids,
+    delete_assets_by_ids,
+    get_references_for_prefixes,
+    bulk_update_needs_verify,
+    delete_references_by_ids,
+    delete_orphaned_seed_asset,
+    bulk_insert_references_ignore_conflicts,
+    get_references_by_paths_and_asset_ids,
+    mark_references_missing_outside_prefixes,
+    restore_references_by_paths,
+)
+from app.assets.helpers import select_best_live_path, get_utc_now
+
+
+def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=size)
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(
+    session: Session,
+    asset: Asset,
+    file_path: str,
+    name: str = "test",
+    mtime_ns: int | None = None,
+    needs_verify: bool = False,
+) -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        asset_id=asset.id,
+        file_path=file_path,
+        name=name,
+        mtime_ns=mtime_ns,
+        needs_verify=needs_verify,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+    return ref
+
+
+class TestListReferencesByAssetId:
+    def test_returns_empty_for_no_references(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        refs = list_references_by_asset_id(session, asset_id=asset.id)
+        assert list(refs) == []
+
+    def test_returns_references_for_asset(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "/path/a.bin", name="a")
+        _make_reference(session, asset, "/path/b.bin", name="b")
+        session.commit()
+
+        refs = list_references_by_asset_id(session, asset_id=asset.id)
+        paths = [r.file_path for r in refs]
+        assert set(paths) == {"/path/a.bin", "/path/b.bin"}
+
+    def test_does_not_return_other_assets_references(self, session: Session):
+        asset1 = _make_asset(session, "hash1")
+        asset2 = _make_asset(session, "hash2")
+        _make_reference(session, asset1, "/path/asset1.bin", name="a1")
+        _make_reference(session, asset2, "/path/asset2.bin", name="a2")
+        session.commit()
+
+        refs = list_references_by_asset_id(session, asset_id=asset1.id)
+        paths = [r.file_path for r in refs]
+        assert paths == ["/path/asset1.bin"]
+
+
+class TestSelectBestLivePath:
+    def test_returns_empty_for_empty_list(self):
+        result = select_best_live_path([])
+        assert result == ""
+
+    def test_returns_empty_when_no_files_exist(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset, "/nonexistent/path.bin")
+        session.commit()
+
+        result = select_best_live_path([ref])
+        assert result == ""
+
+    def test_prefers_verified_path(self, session: Session, tmp_path):
+        """needs_verify=False should be preferred."""
+        asset = _make_asset(session, "hash1")
+
+        verified_file = tmp_path / "verified.bin"
+        verified_file.write_bytes(b"data")
+
+        unverified_file = tmp_path / "unverified.bin"
+        unverified_file.write_bytes(b"data")
+
+        ref_verified = _make_reference(
+            session, asset, str(verified_file), name="verified", needs_verify=False
+        )
+        ref_unverified = _make_reference(
+            session, asset, str(unverified_file), name="unverified", needs_verify=True
+        )
+        session.commit()
+
+        refs = [ref_unverified, ref_verified]
+        result = select_best_live_path(refs)
+        assert result == str(verified_file)
+
+    def test_falls_back_to_existing_unverified(self, session: Session, tmp_path):
+        """If all references need verification, return first existing path."""
+        asset = _make_asset(session, "hash1")
+
+        existing_file = tmp_path / "exists.bin"
+        existing_file.write_bytes(b"data")
+
+        ref = _make_reference(session, asset, str(existing_file), needs_verify=True)
+        session.commit()
+
+        result = select_best_live_path([ref])
+        assert result == str(existing_file)
+
+
+class TestSelectBestLivePathWithMocking:
+    def test_handles_missing_file_path_attr(self):
+        """Gracefully handle references with None file_path."""
+
+        class MockRef:
+            file_path = None
+            needs_verify = False
+
+        result = select_best_live_path([MockRef()])
+        assert result == ""
+
+
+class TestUpsertReference:
+    @pytest.mark.parametrize(
+        "initial_mtime,second_mtime,expect_created,expect_updated,final_mtime",
+        [
+            # New reference creation
+            (None, 12345, True, False, 12345),
+            # Existing reference, same mtime - no update
+            (100, 100, False, False, 100),
+            # Existing reference, different mtime - update
+            (100, 200, False, True, 200),
+        ],
+        ids=["new_reference", "existing_no_change", "existing_update_mtime"],
+    )
+    def test_upsert_scenarios(
+        self, session: Session, initial_mtime, second_mtime, expect_created, expect_updated, final_mtime
+    ):
+        asset = _make_asset(session, "hash1")
+        file_path = f"/path_{initial_mtime}_{second_mtime}.bin"
+        name = f"file_{initial_mtime}_{second_mtime}"
+
+        # Create initial reference if needed
+        if initial_mtime is not None:
+            upsert_reference(session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=initial_mtime)
+            session.commit()
+
+        # The upsert call we're testing
+        created, updated = upsert_reference(
+            session, asset_id=asset.id, file_path=file_path, name=name, mtime_ns=second_mtime
+        )
+        session.commit()
+
+        assert created is expect_created
+        assert updated is expect_updated
+        ref = session.query(AssetReference).filter_by(file_path=file_path).one()
+        assert ref.mtime_ns == final_mtime
+
+    def test_upsert_restores_missing_reference(self, session: Session):
+        """Upserting a reference that was marked missing should restore it."""
+        asset = _make_asset(session, "hash1")
+        file_path = "/restored/file.bin"
+
+        ref = _make_reference(session, asset, file_path, mtime_ns=100)
+        ref.is_missing = True
+        session.commit()
+
+        created, updated = upsert_reference(
+            session, asset_id=asset.id, file_path=file_path, name="restored", mtime_ns=100
+        )
+        session.commit()
+
+        assert created is False
+        assert updated is True
+        restored_ref = session.query(AssetReference).filter_by(file_path=file_path).one()
+        assert restored_ref.is_missing is False
+
+
+class TestRestoreReferencesByPaths:
+    def test_restores_missing_references(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        missing_path = "/missing/file.bin"
+        active_path = "/active/file.bin"
+
+        missing_ref = _make_reference(session, asset, missing_path, name="missing")
+        missing_ref.is_missing = True
+        _make_reference(session, asset, active_path, name="active")
+        session.commit()
+
+        restored = restore_references_by_paths(session, [missing_path])
+        session.commit()
+
+        assert restored == 1
+        ref = session.query(AssetReference).filter_by(file_path=missing_path).one()
+        assert ref.is_missing is False
+
+    def test_empty_list_restores_nothing(self, session: Session):
+        restored = restore_references_by_paths(session, [])
+        assert restored == 0
+
+
+class TestMarkReferencesMissingOutsidePrefixes:
+    def test_marks_references_missing_outside_prefixes(self, session: Session, tmp_path):
+        asset = _make_asset(session, "hash1")
+        valid_dir = tmp_path / "valid"
+        valid_dir.mkdir()
+        invalid_dir = tmp_path / "invalid"
+        invalid_dir.mkdir()
+
+        valid_path = str(valid_dir / "file.bin")
+        invalid_path = str(invalid_dir / "file.bin")
+
+        _make_reference(session, asset, valid_path, name="valid")
+        _make_reference(session, asset, invalid_path, name="invalid")
+        session.commit()
+
+        marked = mark_references_missing_outside_prefixes(session, [str(valid_dir)])
+        session.commit()
+
+        assert marked == 1
+        all_refs = session.query(AssetReference).all()
+        assert len(all_refs) == 2
+
+        valid_ref = next(r for r in all_refs if r.file_path == valid_path)
+        invalid_ref = next(r for r in all_refs if r.file_path == invalid_path)
+        assert valid_ref.is_missing is False
+        assert invalid_ref.is_missing is True
+
+    def test_empty_prefixes_marks_nothing(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "/some/path.bin")
+        session.commit()
+
+        marked = mark_references_missing_outside_prefixes(session, [])
+
+        assert marked == 0
+
+
+class TestGetUnreferencedUnhashedAssetIds:
+    def test_returns_unreferenced_unhashed_assets(self, session: Session):
+        # Unhashed asset (hash=None) with no references (no file_path)
+        no_refs = _make_asset(session, hash_val=None)
+        # Unhashed asset with active reference (not unreferenced)
+        with_active_ref = _make_asset(session, hash_val=None)
+        _make_reference(session, with_active_ref, "/has/ref.bin", name="has_ref")
+        # Unhashed asset with only missing reference (should be unreferenced)
+        with_missing_ref = _make_asset(session, hash_val=None)
+        missing_ref = _make_reference(session, with_missing_ref, "/missing/ref.bin", name="missing_ref")
+        missing_ref.is_missing = True
+        # Regular asset (hash not None) - should not be returned
+        _make_asset(session, hash_val="blake3:regular")
+        session.commit()
+
+        unreferenced = get_unreferenced_unhashed_asset_ids(session)
+
+        assert no_refs.id in unreferenced
+        assert with_missing_ref.id in unreferenced
+        assert with_active_ref.id not in unreferenced
+
+
+class TestDeleteAssetsByIds:
+    def test_deletes_assets_and_references(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "/test/path.bin", name="test")
+        session.commit()
+
+        deleted = delete_assets_by_ids(session, [asset.id])
+        session.commit()
+
+        assert deleted == 1
+        assert session.query(Asset).count() == 0
+        assert session.query(AssetReference).count() == 0
+
+    def test_empty_list_deletes_nothing(self, session: Session):
+        _make_asset(session, "hash1")
+        session.commit()
+
+        deleted = delete_assets_by_ids(session, [])
+
+        assert deleted == 0
+        assert session.query(Asset).count() == 1
+
+
+class TestGetReferencesForPrefixes:
+    def test_returns_references_matching_prefix(self, session: Session, tmp_path):
+        asset = _make_asset(session, "hash1")
+        dir1 = tmp_path / "dir1"
+        dir1.mkdir()
+        dir2 = tmp_path / "dir2"
+        dir2.mkdir()
+
+        path1 = str(dir1 / "file.bin")
+        path2 = str(dir2 / "file.bin")
+
+        _make_reference(session, asset, path1, name="file1", mtime_ns=100)
+        _make_reference(session, asset, path2, name="file2", mtime_ns=200)
+        session.commit()
+
+        rows = get_references_for_prefixes(session, [str(dir1)])
+
+        assert len(rows) == 1
+        assert rows[0].file_path == path1
+
+    def test_empty_prefixes_returns_empty(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "/some/path.bin")
+        session.commit()
+
+        rows = get_references_for_prefixes(session, [])
+
+        assert rows == []
+
+
+class TestBulkSetNeedsVerify:
+    def test_sets_needs_verify_flag(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1 = _make_reference(session, asset, "/path1.bin", needs_verify=False)
+        ref2 = _make_reference(session, asset, "/path2.bin", needs_verify=False)
+        session.commit()
+
+        updated = bulk_update_needs_verify(session, [ref1.id, ref2.id], True)
+        session.commit()
+
+        assert updated == 2
+        session.refresh(ref1)
+        session.refresh(ref2)
+        assert ref1.needs_verify is True
+        assert ref2.needs_verify is True
+
+    def test_empty_list_updates_nothing(self, session: Session):
+        updated = bulk_update_needs_verify(session, [], True)
+        assert updated == 0
+
+
+class TestDeleteReferencesByIds:
+    def test_deletes_references_by_id(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref1 = _make_reference(session, asset, "/path1.bin")
+        _make_reference(session, asset, "/path2.bin")
+        session.commit()
+
+        deleted = delete_references_by_ids(session, [ref1.id])
+        session.commit()
+
+        assert deleted == 1
+        assert session.query(AssetReference).count() == 1
+
+    def test_empty_list_deletes_nothing(self, session: Session):
+        deleted = delete_references_by_ids(session, [])
+        assert deleted == 0
+
+
+class TestDeleteOrphanedSeedAsset:
+    @pytest.mark.parametrize(
+        "create_asset,expected_deleted,expected_count",
+        [
+            (True, True, 0),   # Existing asset gets deleted
+            (False, False, 0),  # Nonexistent returns False
+        ],
+        ids=["deletes_existing", "nonexistent_returns_false"],
+    )
+    def test_delete_orphaned_seed_asset(
+        self, session: Session, create_asset, expected_deleted, expected_count
+    ):
+        asset_id = "nonexistent-id"
+        if create_asset:
+            asset = _make_asset(session, hash_val=None)
+            asset_id = asset.id
+            _make_reference(session, asset, "/test/path.bin", name="test")
+            session.commit()
+
+        deleted = delete_orphaned_seed_asset(session, asset_id)
+        if create_asset:
+            session.commit()
+
+        assert deleted is expected_deleted
+        assert session.query(Asset).count() == expected_count
+
+
+class TestBulkInsertReferencesIgnoreConflicts:
+    def test_inserts_multiple_references(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        now = get_utc_now()
+        rows = [
+            {
+                "asset_id": asset.id,
+                "file_path": "/bulk1.bin",
+                "name": "bulk1",
+                "mtime_ns": 100,
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+            {
+                "asset_id": asset.id,
+                "file_path": "/bulk2.bin",
+                "name": "bulk2",
+                "mtime_ns": 200,
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+        ]
+        bulk_insert_references_ignore_conflicts(session, rows)
+        session.commit()
+
+        assert session.query(AssetReference).count() == 2
+
+    def test_ignores_conflicts(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "/existing.bin", mtime_ns=100)
+        session.commit()
+
+        now = get_utc_now()
+        rows = [
+            {
+                "asset_id": asset.id,
+                "file_path": "/existing.bin",
+                "name": "existing",
+                "mtime_ns": 999,
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+            {
+                "asset_id": asset.id,
+                "file_path": "/new.bin",
+                "name": "new",
+                "mtime_ns": 200,
+                "created_at": now,
+                "updated_at": now,
+                "last_access_time": now,
+            },
+        ]
+        bulk_insert_references_ignore_conflicts(session, rows)
+        session.commit()
+
+        assert session.query(AssetReference).count() == 2
+        existing = session.query(AssetReference).filter_by(file_path="/existing.bin").one()
+        assert existing.mtime_ns == 100  # Original value preserved
+
+    def test_empty_list_is_noop(self, session: Session):
+        bulk_insert_references_ignore_conflicts(session, [])
+        assert session.query(AssetReference).count() == 0
+
+
+class TestGetReferencesByPathsAndAssetIds:
+    def test_returns_matching_paths(self, session: Session):
+        asset1 = _make_asset(session, "hash1")
+        asset2 = _make_asset(session, "hash2")
+
+        _make_reference(session, asset1, "/path1.bin")
+        _make_reference(session, asset2, "/path2.bin")
+        session.commit()
+
+        path_to_asset = {
+            "/path1.bin": asset1.id,
+            "/path2.bin": asset2.id,
+        }
+        winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
+
+        assert winners == {"/path1.bin", "/path2.bin"}
+
+    def test_excludes_non_matching_asset_ids(self, session: Session):
+        asset1 = _make_asset(session, "hash1")
+        asset2 = _make_asset(session, "hash2")
+
+        _make_reference(session, asset1, "/path1.bin")
+        session.commit()
+
+        # Path exists but with different asset_id
+        path_to_asset = {"/path1.bin": asset2.id}
+        winners = get_references_by_paths_and_asset_ids(session, path_to_asset)
+
+        assert winners == set()
+
+    def test_empty_dict_returns_empty(self, session: Session):
+        winners = get_references_by_paths_and_asset_ids(session, {})
+        assert winners == set()
--- a/tests-unit/assets_test/queries/test_metadata.py
+++ b/tests-unit/assets_test/queries/test_metadata.py
@@ -0,0 +1,184 @@
+"""Tests for metadata filtering logic in asset_reference queries."""
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference, AssetReferenceMeta
+from app.assets.database.queries import list_references_page
+from app.assets.database.queries.asset_reference import convert_metadata_to_rows
+from app.assets.helpers import get_utc_now
+
+
+def _make_asset(session: Session, hash_val: str) -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=1024)
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(
+    session: Session,
+    asset: Asset,
+    name: str,
+    metadata: dict | None = None,
+) -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        owner_id="",
+        name=name,
+        asset_id=asset.id,
+        user_metadata=metadata,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+
+    if metadata:
+        for key, val in metadata.items():
+            for row in convert_metadata_to_rows(key, val):
+                meta_row = AssetReferenceMeta(
+                    asset_reference_id=ref.id,
+                    key=row["key"],
+                    ordinal=row.get("ordinal", 0),
+                    val_str=row.get("val_str"),
+                    val_num=row.get("val_num"),
+                    val_bool=row.get("val_bool"),
+                    val_json=row.get("val_json"),
+                )
+                session.add(meta_row)
+        session.flush()
+
+    return ref
+
+
+class TestMetadataFilterByType:
+    """Table-driven tests for metadata filtering by different value types."""
+
+    @pytest.mark.parametrize(
+        "match_meta,nomatch_meta,filter_key,filter_val",
+        [
+            # String matching
+            ({"category": "models"}, {"category": "images"}, "category", "models"),
+            # Integer matching
+            ({"epoch": 5}, {"epoch": 10}, "epoch", 5),
+            # Float matching
+            ({"score": 0.95}, {"score": 0.5}, "score", 0.95),
+            # Boolean True matching
+            ({"enabled": True}, {"enabled": False}, "enabled", True),
+            # Boolean False matching
+            ({"enabled": False}, {"enabled": True}, "enabled", False),
+        ],
+        ids=["string", "int", "float", "bool_true", "bool_false"],
+    )
+    def test_filter_matches_correct_value(
+        self, session: Session, match_meta, nomatch_meta, filter_key, filter_val
+    ):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "match", match_meta)
+        _make_reference(session, asset, "nomatch", nomatch_meta)
+        session.commit()
+
+        refs, _, total = list_references_page(
+            session, metadata_filter={filter_key: filter_val}
+        )
+        assert total == 1
+        assert refs[0].name == "match"
+
+    @pytest.mark.parametrize(
+        "stored_meta,filter_key,filter_val",
+        [
+            # String no match
+            ({"category": "models"}, "category", "other"),
+            # Int no match
+            ({"epoch": 5}, "epoch", 99),
+            # Float no match
+            ({"score": 0.5}, "score", 0.99),
+        ],
+        ids=["string_no_match", "int_no_match", "float_no_match"],
+    )
+    def test_filter_returns_empty_when_no_match(
+        self, session: Session, stored_meta, filter_key, filter_val
+    ):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "item", stored_meta)
+        session.commit()
+
+        refs, _, total = list_references_page(
+            session, metadata_filter={filter_key: filter_val}
+        )
+        assert total == 0
+
+
+class TestMetadataFilterNull:
+    """Tests for null/missing key filtering."""
+
+    @pytest.mark.parametrize(
+        "match_name,match_meta,nomatch_name,nomatch_meta,filter_key",
+        [
+            # Null matches missing key
+            ("missing_key", {}, "has_key", {"optional": "value"}, "optional"),
+            # Null matches explicit null
+            ("explicit_null", {"nullable": None}, "has_value", {"nullable": "present"}, "nullable"),
+        ],
+        ids=["missing_key", "explicit_null"],
+    )
+    def test_null_filter_matches(
+        self, session: Session, match_name, match_meta, nomatch_name, nomatch_meta, filter_key
+    ):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, match_name, match_meta)
+        _make_reference(session, asset, nomatch_name, nomatch_meta)
+        session.commit()
+
+        refs, _, total = list_references_page(session, metadata_filter={filter_key: None})
+        assert total == 1
+        assert refs[0].name == match_name
+
+
+class TestMetadataFilterList:
+    """Tests for list-based (OR) filtering."""
+
+    def test_filter_by_list_matches_any(self, session: Session):
+        """List values should match ANY of the values (OR)."""
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "cat_a", {"category": "a"})
+        _make_reference(session, asset, "cat_b", {"category": "b"})
+        _make_reference(session, asset, "cat_c", {"category": "c"})
+        session.commit()
+
+        refs, _, total = list_references_page(session, metadata_filter={"category": ["a", "b"]})
+        assert total == 2
+        names = {r.name for r in refs}
+        assert names == {"cat_a", "cat_b"}
+
+
+class TestMetadataFilterMultipleKeys:
+    """Tests for multiple filter keys (AND semantics)."""
+
+    def test_multiple_keys_must_all_match(self, session: Session):
+        """Multiple keys should ALL match (AND)."""
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "match", {"type": "model", "version": 2})
+        _make_reference(session, asset, "wrong_type", {"type": "config", "version": 2})
+        _make_reference(session, asset, "wrong_version", {"type": "model", "version": 1})
+        session.commit()
+
+        refs, _, total = list_references_page(
+            session, metadata_filter={"type": "model", "version": 2}
+        )
+        assert total == 1
+        assert refs[0].name == "match"
+
+
+class TestMetadataFilterEmptyDict:
+    """Tests for empty filter behavior."""
+
+    def test_empty_filter_returns_all(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        _make_reference(session, asset, "a", {"key": "val"})
+        _make_reference(session, asset, "b", {})
+        session.commit()
+
+        refs, _, total = list_references_page(session, metadata_filter={})
+        assert total == 2
--- a/tests-unit/assets_test/queries/test_tags.py
+++ b/tests-unit/assets_test/queries/test_tags.py
@@ -0,0 +1,366 @@
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference, AssetReferenceTag, AssetReferenceMeta, Tag
+from app.assets.database.queries import (
+    ensure_tags_exist,
+    get_reference_tags,
+    set_reference_tags,
+    add_tags_to_reference,
+    remove_tags_from_reference,
+    add_missing_tag_for_asset_id,
+    remove_missing_tag_for_asset_id,
+    list_tags_with_usage,
+    bulk_insert_tags_and_meta,
+)
+from app.assets.helpers import get_utc_now
+
+
+def _make_asset(session: Session, hash_val: str | None = None) -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=1024)
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(session: Session, asset: Asset, name: str = "test", owner_id: str = "") -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        owner_id=owner_id,
+        name=name,
+        asset_id=asset.id,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+    return ref
+
+
+class TestEnsureTagsExist:
+    def test_creates_new_tags(self, session: Session):
+        ensure_tags_exist(session, ["alpha", "beta"], tag_type="user")
+        session.commit()
+
+        tags = session.query(Tag).all()
+        assert {t.name for t in tags} == {"alpha", "beta"}
+
+    def test_is_idempotent(self, session: Session):
+        ensure_tags_exist(session, ["alpha"], tag_type="user")
+        ensure_tags_exist(session, ["alpha"], tag_type="user")
+        session.commit()
+
+        assert session.query(Tag).count() == 1
+
+    def test_normalizes_tags(self, session: Session):
+        ensure_tags_exist(session, ["  ALPHA  ", "Beta", "alpha"])
+        session.commit()
+
+        tags = session.query(Tag).all()
+        assert {t.name for t in tags} == {"alpha", "beta"}
+
+    def test_empty_list_is_noop(self, session: Session):
+        ensure_tags_exist(session, [])
+        session.commit()
+        assert session.query(Tag).count() == 0
+
+    def test_tag_type_is_set(self, session: Session):
+        ensure_tags_exist(session, ["system-tag"], tag_type="system")
+        session.commit()
+
+        tag = session.query(Tag).filter_by(name="system-tag").one()
+        assert tag.tag_type == "system"
+
+
+class TestGetReferenceTags:
+    def test_returns_empty_for_no_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        tags = get_reference_tags(session, reference_id=ref.id)
+        assert tags == []
+
+    def test_returns_tags_for_reference(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        ensure_tags_exist(session, ["tag1", "tag2"])
+        session.add_all([
+            AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag1", origin="manual", added_at=get_utc_now()),
+            AssetReferenceTag(asset_reference_id=ref.id, tag_name="tag2", origin="manual", added_at=get_utc_now()),
+        ])
+        session.flush()
+
+        tags = get_reference_tags(session, reference_id=ref.id)
+        assert set(tags) == {"tag1", "tag2"}
+
+
+class TestSetReferenceTags:
+    def test_adds_new_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        result = set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
+        session.commit()
+
+        assert set(result.added) == {"a", "b"}
+        assert result.removed == []
+        assert set(result.total) == {"a", "b"}
+
+    def test_removes_old_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        set_reference_tags(session, reference_id=ref.id, tags=["a", "b", "c"])
+        result = set_reference_tags(session, reference_id=ref.id, tags=["a"])
+        session.commit()
+
+        assert result.added == []
+        assert set(result.removed) == {"b", "c"}
+        assert result.total == ["a"]
+
+    def test_replaces_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        set_reference_tags(session, reference_id=ref.id, tags=["a", "b"])
+        result = set_reference_tags(session, reference_id=ref.id, tags=["b", "c"])
+        session.commit()
+
+        assert result.added == ["c"]
+        assert result.removed == ["a"]
+        assert set(result.total) == {"b", "c"}
+
+
+class TestAddTagsToReference:
+    def test_adds_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
+        session.commit()
+
+        assert set(result.added) == {"x", "y"}
+        assert result.already_present == []
+
+    def test_reports_already_present(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        add_tags_to_reference(session, reference_id=ref.id, tags=["x"])
+        result = add_tags_to_reference(session, reference_id=ref.id, tags=["x", "y"])
+        session.commit()
+
+        assert result.added == ["y"]
+        assert result.already_present == ["x"]
+
+    def test_raises_for_missing_reference(self, session: Session):
+        with pytest.raises(ValueError, match="not found"):
+            add_tags_to_reference(session, reference_id="nonexistent", tags=["x"])
+
+
+class TestRemoveTagsFromReference:
+    def test_removes_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
+        result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "b"])
+        session.commit()
+
+        assert set(result.removed) == {"a", "b"}
+        assert result.not_present == []
+        assert result.total_tags == ["c"]
+
+    def test_reports_not_present(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+
+        add_tags_to_reference(session, reference_id=ref.id, tags=["a"])
+        result = remove_tags_from_reference(session, reference_id=ref.id, tags=["a", "x"])
+        session.commit()
+
+        assert result.removed == ["a"]
+        assert result.not_present == ["x"]
+
+    def test_raises_for_missing_reference(self, session: Session):
+        with pytest.raises(ValueError, match="not found"):
+            remove_tags_from_reference(session, reference_id="nonexistent", tags=["x"])
+
+
+class TestMissingTagFunctions:
+    def test_add_missing_tag_for_asset_id(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["missing"], tag_type="system")
+
+        add_missing_tag_for_asset_id(session, asset_id=asset.id)
+        session.commit()
+
+        tags = get_reference_tags(session, reference_id=ref.id)
+        assert "missing" in tags
+
+    def test_add_missing_tag_is_idempotent(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["missing"], tag_type="system")
+
+        add_missing_tag_for_asset_id(session, asset_id=asset.id)
+        add_missing_tag_for_asset_id(session, asset_id=asset.id)
+        session.commit()
+
+        links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="missing").all()
+        assert len(links) == 1
+
+    def test_remove_missing_tag_for_asset_id(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["missing"], tag_type="system")
+        add_missing_tag_for_asset_id(session, asset_id=asset.id)
+
+        remove_missing_tag_for_asset_id(session, asset_id=asset.id)
+        session.commit()
+
+        tags = get_reference_tags(session, reference_id=ref.id)
+        assert "missing" not in tags
+
+
+class TestListTagsWithUsage:
+    def test_returns_tags_with_counts(self, session: Session):
+        ensure_tags_exist(session, ["used", "unused"])
+
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
+        session.commit()
+
+        rows, total = list_tags_with_usage(session)
+
+        tag_dict = {name: count for name, _, count in rows}
+        assert tag_dict["used"] == 1
+        assert tag_dict["unused"] == 0
+        assert total == 2
+
+    def test_exclude_zero_counts(self, session: Session):
+        ensure_tags_exist(session, ["used", "unused"])
+
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
+        session.commit()
+
+        rows, total = list_tags_with_usage(session, include_zero=False)
+
+        tag_names = {name for name, _, _ in rows}
+        assert "used" in tag_names
+        assert "unused" not in tag_names
+
+    def test_prefix_filter(self, session: Session):
+        ensure_tags_exist(session, ["alpha", "beta", "alphabet"])
+        session.commit()
+
+        rows, total = list_tags_with_usage(session, prefix="alph")
+
+        tag_names = {name for name, _, _ in rows}
+        assert tag_names == {"alpha", "alphabet"}
+
+    def test_order_by_name(self, session: Session):
+        ensure_tags_exist(session, ["zebra", "alpha", "middle"])
+        session.commit()
+
+        rows, _ = list_tags_with_usage(session, order="name_asc")
+
+        names = [name for name, _, _ in rows]
+        assert names == ["alpha", "middle", "zebra"]
+
+    def test_owner_visibility(self, session: Session):
+        ensure_tags_exist(session, ["shared-tag", "owner-tag"])
+
+        asset = _make_asset(session, "hash1")
+        shared_ref = _make_reference(session, asset, name="shared", owner_id="")
+        owner_ref = _make_reference(session, asset, name="owned", owner_id="user1")
+
+        add_tags_to_reference(session, reference_id=shared_ref.id, tags=["shared-tag"])
+        add_tags_to_reference(session, reference_id=owner_ref.id, tags=["owner-tag"])
+        session.commit()
+
+        # Empty owner sees only shared
+        rows, _ = list_tags_with_usage(session, owner_id="", include_zero=False)
+        tag_dict = {name: count for name, _, count in rows}
+        assert tag_dict.get("shared-tag", 0) == 1
+        assert tag_dict.get("owner-tag", 0) == 0
+
+        # User1 sees both
+        rows, _ = list_tags_with_usage(session, owner_id="user1", include_zero=False)
+        tag_dict = {name: count for name, _, count in rows}
+        assert tag_dict.get("shared-tag", 0) == 1
+        assert tag_dict.get("owner-tag", 0) == 1
+
+
+class TestBulkInsertTagsAndMeta:
+    def test_inserts_tags(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["bulk-tag1", "bulk-tag2"])
+        session.commit()
+
+        now = get_utc_now()
+        tag_rows = [
+            {"asset_reference_id": ref.id, "tag_name": "bulk-tag1", "origin": "manual", "added_at": now},
+            {"asset_reference_id": ref.id, "tag_name": "bulk-tag2", "origin": "manual", "added_at": now},
+        ]
+        bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
+        session.commit()
+
+        tags = get_reference_tags(session, reference_id=ref.id)
+        assert set(tags) == {"bulk-tag1", "bulk-tag2"}
+
+    def test_inserts_meta(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        meta_rows = [
+            {
+                "asset_reference_id": ref.id,
+                "key": "meta-key",
+                "ordinal": 0,
+                "val_str": "meta-value",
+                "val_num": None,
+                "val_bool": None,
+                "val_json": None,
+            },
+        ]
+        bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=meta_rows)
+        session.commit()
+
+        meta = session.query(AssetReferenceMeta).filter_by(asset_reference_id=ref.id).all()
+        assert len(meta) == 1
+        assert meta[0].key == "meta-key"
+        assert meta[0].val_str == "meta-value"
+
+    def test_ignores_conflicts(self, session: Session):
+        asset = _make_asset(session, "hash1")
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["existing-tag"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["existing-tag"])
+        session.commit()
+
+        now = get_utc_now()
+        tag_rows = [
+            {"asset_reference_id": ref.id, "tag_name": "existing-tag", "origin": "duplicate", "added_at": now},
+        ]
+        bulk_insert_tags_and_meta(session, tag_rows=tag_rows, meta_rows=[])
+        session.commit()
+
+        # Should still have only one tag link
+        links = session.query(AssetReferenceTag).filter_by(asset_reference_id=ref.id, tag_name="existing-tag").all()
+        assert len(links) == 1
+        # Origin should be original, not overwritten
+        assert links[0].origin == "manual"
+
+    def test_empty_lists_is_noop(self, session: Session):
+        bulk_insert_tags_and_meta(session, tag_rows=[], meta_rows=[])
+        assert session.query(AssetReferenceTag).count() == 0
+        assert session.query(AssetReferenceMeta).count() == 0
--- a/tests-unit/assets_test/services/init.py
+++ b/tests-unit/assets_test/services/init.py
@@ -0,0 +1 @@
+# Service layer tests
--- a/tests-unit/assets_test/services/conftest.py
+++ b/tests-unit/assets_test/services/conftest.py
@@ -0,0 +1,54 @@
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Base
+
+
+@pytest.fixture(autouse=True)
+def autoclean_unit_test_assets():
+    """Override parent autouse fixture - service unit tests don't need server cleanup."""
+    yield
+
+
+@pytest.fixture
+def db_engine():
+    """In-memory SQLite engine for fast unit tests."""
+    engine = create_engine("sqlite:///:memory:")
+    Base.metadata.create_all(engine)
+    return engine
+
+
+@pytest.fixture
+def session(db_engine):
+    """Session fixture for tests that need direct DB access."""
+    with Session(db_engine) as sess:
+        yield sess
+
+
+@pytest.fixture
+def mock_create_session(db_engine):
+    """Patch create_session to use our in-memory database."""
+    from contextlib import contextmanager
+    from sqlalchemy.orm import Session as SASession
+
+    @contextmanager
+    def _create_session():
+        with SASession(db_engine) as sess:
+            yield sess
+
+    with patch("app.assets.services.ingest.create_session", _create_session), \
+         patch("app.assets.services.asset_management.create_session", _create_session), \
+         patch("app.assets.services.tagging.create_session", _create_session):
+        yield _create_session
+
+
+@pytest.fixture
+def temp_dir():
+    """Temporary directory for file operations."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
--- a/tests-unit/assets_test/services/test_asset_management.py
+++ b/tests-unit/assets_test/services/test_asset_management.py
@@ -0,0 +1,268 @@
+"""Tests for asset_management services."""
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference
+from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
+from app.assets.helpers import get_utc_now
+from app.assets.services import (
+    get_asset_detail,
+    update_asset_metadata,
+    delete_asset_reference,
+    set_asset_preview,
+)
+
+
+def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 1024) -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=size, mime_type="application/octet-stream")
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(
+    session: Session,
+    asset: Asset,
+    name: str = "test",
+    owner_id: str = "",
+) -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        owner_id=owner_id,
+        name=name,
+        asset_id=asset.id,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+    return ref
+
+
+class TestGetAssetDetail:
+    def test_returns_none_for_nonexistent(self, mock_create_session):
+        result = get_asset_detail(reference_id="nonexistent")
+        assert result is None
+
+    def test_returns_asset_with_tags(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, name="test.bin")
+        ensure_tags_exist(session, ["alpha", "beta"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["alpha", "beta"])
+        session.commit()
+
+        result = get_asset_detail(reference_id=ref.id)
+
+        assert result is not None
+        assert result.ref.id == ref.id
+        assert result.asset.hash == asset.hash
+        assert set(result.tags) == {"alpha", "beta"}
+
+    def test_respects_owner_visibility(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        # Wrong owner cannot see
+        result = get_asset_detail(reference_id=ref.id, owner_id="user2")
+        assert result is None
+
+        # Correct owner can see
+        result = get_asset_detail(reference_id=ref.id, owner_id="user1")
+        assert result is not None
+
+
+class TestUpdateAssetMetadata:
+    def test_updates_name(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, name="old_name.bin")
+        ref_id = ref.id
+        session.commit()
+
+        update_asset_metadata(
+            reference_id=ref_id,
+            name="new_name.bin",
+        )
+
+        # Verify by re-fetching from DB
+        session.expire_all()
+        updated_ref = session.get(AssetReference, ref_id)
+        assert updated_ref.name == "new_name.bin"
+
+    def test_updates_tags(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["old"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["old"])
+        session.commit()
+
+        result = update_asset_metadata(
+            reference_id=ref.id,
+            tags=["new1", "new2"],
+        )
+
+        assert set(result.tags) == {"new1", "new2"}
+        assert "old" not in result.tags
+
+    def test_updates_user_metadata(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ref_id = ref.id
+        session.commit()
+
+        update_asset_metadata(
+            reference_id=ref_id,
+            user_metadata={"key": "value", "num": 42},
+        )
+
+        # Verify by re-fetching from DB
+        session.expire_all()
+        updated_ref = session.get(AssetReference, ref_id)
+        assert updated_ref.user_metadata["key"] == "value"
+        assert updated_ref.user_metadata["num"] == 42
+
+    def test_raises_for_nonexistent(self, mock_create_session):
+        with pytest.raises(ValueError, match="not found"):
+            update_asset_metadata(reference_id="nonexistent", name="fail")
+
+    def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        with pytest.raises(PermissionError, match="not owner"):
+            update_asset_metadata(
+                reference_id=ref.id,
+                name="new",
+                owner_id="user2",
+            )
+
+
+class TestDeleteAssetReference:
+    def test_soft_deletes_reference(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ref_id = ref.id
+        session.commit()
+
+        result = delete_asset_reference(
+            reference_id=ref_id,
+            owner_id="",
+            delete_content_if_orphan=False,
+        )
+
+        assert result is True
+        # Row still exists but is marked as soft-deleted
+        session.expire_all()
+        row = session.get(AssetReference, ref_id)
+        assert row is not None
+        assert row.deleted_at is not None
+
+    def test_returns_false_for_nonexistent(self, mock_create_session):
+        result = delete_asset_reference(
+            reference_id="nonexistent",
+            owner_id="",
+        )
+        assert result is False
+
+    def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        ref_id = ref.id
+        session.commit()
+
+        result = delete_asset_reference(
+            reference_id=ref_id,
+            owner_id="user2",
+        )
+
+        assert result is False
+        assert session.get(AssetReference, ref_id) is not None
+
+    def test_keeps_asset_if_other_references_exist(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref1 = _make_reference(session, asset, name="ref1")
+        _make_reference(session, asset, name="ref2")  # Second ref keeps asset alive
+        asset_id = asset.id
+        session.commit()
+
+        delete_asset_reference(
+            reference_id=ref1.id,
+            owner_id="",
+            delete_content_if_orphan=True,
+        )
+
+        # Asset should still exist
+        assert session.get(Asset, asset_id) is not None
+
+    def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        asset_id = asset.id
+        ref_id = ref.id
+        session.commit()
+
+        delete_asset_reference(
+            reference_id=ref_id,
+            owner_id="",
+            delete_content_if_orphan=True,
+        )
+
+        # Both ref and asset should be gone
+        assert session.get(AssetReference, ref_id) is None
+        assert session.get(Asset, asset_id) is None
+
+
+class TestSetAssetPreview:
+    def test_sets_preview(self, mock_create_session, session: Session):
+        asset = _make_asset(session, hash_val="blake3:main")
+        preview_asset = _make_asset(session, hash_val="blake3:preview")
+        ref = _make_reference(session, asset)
+        ref_id = ref.id
+        preview_id = preview_asset.id
+        session.commit()
+
+        set_asset_preview(
+            reference_id=ref_id,
+            preview_asset_id=preview_id,
+        )
+
+        # Verify by re-fetching from DB
+        session.expire_all()
+        updated_ref = session.get(AssetReference, ref_id)
+        assert updated_ref.preview_id == preview_id
+
+    def test_clears_preview(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        preview_asset = _make_asset(session, hash_val="blake3:preview")
+        ref = _make_reference(session, asset)
+        ref.preview_id = preview_asset.id
+        ref_id = ref.id
+        session.commit()
+
+        set_asset_preview(
+            reference_id=ref_id,
+            preview_asset_id=None,
+        )
+
+        # Verify by re-fetching from DB
+        session.expire_all()
+        updated_ref = session.get(AssetReference, ref_id)
+        assert updated_ref.preview_id is None
+
+    def test_raises_for_nonexistent_ref(self, mock_create_session):
+        with pytest.raises(ValueError, match="not found"):
+            set_asset_preview(reference_id="nonexistent")
+
+    def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        with pytest.raises(PermissionError, match="not owner"):
+            set_asset_preview(
+                reference_id=ref.id,
+                preview_asset_id=None,
+                owner_id="user2",
+            )
--- a/tests-unit/assets_test/services/test_bulk_ingest.py
+++ b/tests-unit/assets_test/services/test_bulk_ingest.py
@@ -0,0 +1,137 @@
+"""Tests for bulk ingest services."""
+
+from pathlib import Path
+
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference
+from app.assets.services.bulk_ingest import SeedAssetSpec, batch_insert_seed_assets
+
+
+class TestBatchInsertSeedAssets:
+    def test_populates_mime_type_for_model_files(self, session: Session, temp_dir: Path):
+        """Verify mime_type is stored in the Asset table for model files."""
+        file_path = temp_dir / "model.safetensors"
+        file_path.write_bytes(b"fake safetensors content")
+
+        specs: list[SeedAssetSpec] = [
+            {
+                "abs_path": str(file_path),
+                "size_bytes": 24,
+                "mtime_ns": 1234567890000000000,
+                "info_name": "Test Model",
+                "tags": ["models"],
+                "fname": "model.safetensors",
+                "metadata": None,
+                "hash": None,
+                "mime_type": "application/safetensors",
+            }
+        ]
+
+        result = batch_insert_seed_assets(session, specs=specs, owner_id="")
+
+        assert result.inserted_refs == 1
+
+        # Verify Asset has mime_type populated
+        assets = session.query(Asset).all()
+        assert len(assets) == 1
+        assert assets[0].mime_type == "application/safetensors"
+
+    def test_mime_type_none_when_not_provided(self, session: Session, temp_dir: Path):
+        """Verify mime_type is None when not provided in spec."""
+        file_path = temp_dir / "unknown.bin"
+        file_path.write_bytes(b"binary data")
+
+        specs: list[SeedAssetSpec] = [
+            {
+                "abs_path": str(file_path),
+                "size_bytes": 11,
+                "mtime_ns": 1234567890000000000,
+                "info_name": "Unknown File",
+                "tags": [],
+                "fname": "unknown.bin",
+                "metadata": None,
+                "hash": None,
+                "mime_type": None,
+            }
+        ]
+
+        result = batch_insert_seed_assets(session, specs=specs, owner_id="")
+
+        assert result.inserted_refs == 1
+
+        assets = session.query(Asset).all()
+        assert len(assets) == 1
+        assert assets[0].mime_type is None
+
+    def test_various_model_mime_types(self, session: Session, temp_dir: Path):
+        """Verify various model file types get correct mime_type."""
+        test_cases = [
+            ("model.safetensors", "application/safetensors"),
+            ("model.pt", "application/pytorch"),
+            ("model.ckpt", "application/pickle"),
+            ("model.gguf", "application/gguf"),
+        ]
+
+        specs: list[SeedAssetSpec] = []
+        for filename, mime_type in test_cases:
+            file_path = temp_dir / filename
+            file_path.write_bytes(b"content")
+            specs.append(
+                {
+                    "abs_path": str(file_path),
+                    "size_bytes": 7,
+                    "mtime_ns": 1234567890000000000,
+                    "info_name": filename,
+                    "tags": [],
+                    "fname": filename,
+                    "metadata": None,
+                    "hash": None,
+                    "mime_type": mime_type,
+                }
+            )
+
+        result = batch_insert_seed_assets(session, specs=specs, owner_id="")
+
+        assert result.inserted_refs == len(test_cases)
+
+        for filename, expected_mime in test_cases:
+            ref = session.query(AssetReference).filter_by(name=filename).first()
+            assert ref is not None
+            asset = session.query(Asset).filter_by(id=ref.asset_id).first()
+            assert asset.mime_type == expected_mime, f"Expected {expected_mime} for {filename}, got {asset.mime_type}"
+
+
+class TestMetadataExtraction:
+    def test_extracts_mime_type_for_model_files(self, temp_dir: Path):
+        """Verify metadata extraction returns correct mime_type for model files."""
+        from app.assets.services.metadata_extract import extract_file_metadata
+
+        file_path = temp_dir / "model.safetensors"
+        file_path.write_bytes(b"fake safetensors content")
+
+        meta = extract_file_metadata(str(file_path))
+
+        assert meta.content_type == "application/safetensors"
+
+    def test_mime_type_for_various_model_formats(self, temp_dir: Path):
+        """Verify various model file types get correct mime_type from metadata."""
+        from app.assets.services.metadata_extract import extract_file_metadata
+
+        test_cases = [
+            ("model.safetensors", "application/safetensors"),
+            ("model.sft", "application/safetensors"),
+            ("model.pt", "application/pytorch"),
+            ("model.pth", "application/pytorch"),
+            ("model.ckpt", "application/pickle"),
+            ("model.pkl", "application/pickle"),
+            ("model.gguf", "application/gguf"),
+        ]
+
+        for filename, expected_mime in test_cases:
+            file_path = temp_dir / filename
+            file_path.write_bytes(b"content")
+
+            meta = extract_file_metadata(str(file_path))
+
+            assert meta.content_type == expected_mime, f"Expected {expected_mime} for {filename}, got {meta.content_type}"
--- a/tests-unit/assets_test/services/test_enrich.py
+++ b/tests-unit/assets_test/services/test_enrich.py
@@ -0,0 +1,207 @@
+"""Tests for asset enrichment (mime_type and hash population)."""
+from pathlib import Path
+
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference
+from app.assets.scanner import (
+    ENRICHMENT_HASHED,
+    ENRICHMENT_METADATA,
+    ENRICHMENT_STUB,
+    enrich_asset,
+)
+
+
+def _create_stub_asset(
+    session: Session,
+    file_path: str,
+    asset_id: str = "test-asset-id",
+    reference_id: str = "test-ref-id",
+    name: str | None = None,
+) -> tuple[Asset, AssetReference]:
+    """Create a stub asset with reference for testing enrichment."""
+    asset = Asset(
+        id=asset_id,
+        hash=None,
+        size_bytes=100,
+        mime_type=None,
+    )
+    session.add(asset)
+    session.flush()
+
+    ref = AssetReference(
+        id=reference_id,
+        asset_id=asset_id,
+        name=name or f"test-asset-{asset_id}",
+        owner_id="system",
+        file_path=file_path,
+        mtime_ns=1234567890000000000,
+        enrichment_level=ENRICHMENT_STUB,
+    )
+    session.add(ref)
+    session.flush()
+
+    return asset, ref
+
+
+class TestEnrichAsset:
+    def test_extracts_mime_type_and_updates_asset(
+        self, db_engine, temp_dir: Path, session: Session
+    ):
+        """Verify mime_type is written to the Asset table during enrichment."""
+        file_path = temp_dir / "model.safetensors"
+        file_path.write_bytes(b"\x00" * 100)
+
+        asset, ref = _create_stub_asset(
+            session, str(file_path), "asset-1", "ref-1"
+        )
+        session.commit()
+
+        new_level = enrich_asset(
+            session,
+            file_path=str(file_path),
+            reference_id=ref.id,
+            asset_id=asset.id,
+            extract_metadata=True,
+            compute_hash=False,
+        )
+
+        assert new_level == ENRICHMENT_METADATA
+
+        session.expire_all()
+        updated_asset = session.get(Asset, "asset-1")
+        assert updated_asset is not None
+        assert updated_asset.mime_type == "application/safetensors"
+
+    def test_computes_hash_and_updates_asset(
+        self, db_engine, temp_dir: Path, session: Session
+    ):
+        """Verify hash is written to the Asset table during enrichment."""
+        file_path = temp_dir / "data.bin"
+        file_path.write_bytes(b"test content for hashing")
+
+        asset, ref = _create_stub_asset(
+            session, str(file_path), "asset-2", "ref-2"
+        )
+        session.commit()
+
+        new_level = enrich_asset(
+            session,
+            file_path=str(file_path),
+            reference_id=ref.id,
+            asset_id=asset.id,
+            extract_metadata=True,
+            compute_hash=True,
+        )
+
+        assert new_level == ENRICHMENT_HASHED
+
+        session.expire_all()
+        updated_asset = session.get(Asset, "asset-2")
+        assert updated_asset is not None
+        assert updated_asset.hash is not None
+        assert updated_asset.hash.startswith("blake3:")
+
+    def test_enrichment_updates_both_mime_and_hash(
+        self, db_engine, temp_dir: Path, session: Session
+    ):
+        """Verify both mime_type and hash are set when full enrichment runs."""
+        file_path = temp_dir / "model.safetensors"
+        file_path.write_bytes(b"\x00" * 50)
+
+        asset, ref = _create_stub_asset(
+            session, str(file_path), "asset-3", "ref-3"
+        )
+        session.commit()
+
+        enrich_asset(
+            session,
+            file_path=str(file_path),
+            reference_id=ref.id,
+            asset_id=asset.id,
+            extract_metadata=True,
+            compute_hash=True,
+        )
+
+        session.expire_all()
+        updated_asset = session.get(Asset, "asset-3")
+        assert updated_asset is not None
+        assert updated_asset.mime_type == "application/safetensors"
+        assert updated_asset.hash is not None
+        assert updated_asset.hash.startswith("blake3:")
+
+    def test_missing_file_returns_stub_level(
+        self, db_engine, temp_dir: Path, session: Session
+    ):
+        """Verify missing files don't cause errors and return STUB level."""
+        file_path = temp_dir / "nonexistent.bin"
+
+        asset, ref = _create_stub_asset(
+            session, str(file_path), "asset-4", "ref-4"
+        )
+        session.commit()
+
+        new_level = enrich_asset(
+            session,
+            file_path=str(file_path),
+            reference_id=ref.id,
+            asset_id=asset.id,
+            extract_metadata=True,
+            compute_hash=True,
+        )
+
+        assert new_level == ENRICHMENT_STUB
+
+        session.expire_all()
+        updated_asset = session.get(Asset, "asset-4")
+        assert updated_asset.mime_type is None
+        assert updated_asset.hash is None
+
+    def test_duplicate_hash_merges_into_existing_asset(
+        self, db_engine, temp_dir: Path, session: Session
+    ):
+        """Verify duplicate files merge into existing asset instead of failing."""
+        file_path_1 = temp_dir / "file1.bin"
+        file_path_2 = temp_dir / "file2.bin"
+        content = b"identical content"
+        file_path_1.write_bytes(content)
+        file_path_2.write_bytes(content)
+
+        asset1, ref1 = _create_stub_asset(
+            session, str(file_path_1), "asset-dup-1", "ref-dup-1"
+        )
+        asset2, ref2 = _create_stub_asset(
+            session, str(file_path_2), "asset-dup-2", "ref-dup-2"
+        )
+        session.commit()
+
+        enrich_asset(
+            session,
+            file_path=str(file_path_1),
+            reference_id=ref1.id,
+            asset_id=asset1.id,
+            extract_metadata=True,
+            compute_hash=True,
+        )
+
+        enrich_asset(
+            session,
+            file_path=str(file_path_2),
+            reference_id=ref2.id,
+            asset_id=asset2.id,
+            extract_metadata=True,
+            compute_hash=True,
+        )
+
+        session.expire_all()
+
+        updated_asset1 = session.get(Asset, "asset-dup-1")
+        assert updated_asset1 is not None
+        assert updated_asset1.hash is not None
+
+        updated_asset2 = session.get(Asset, "asset-dup-2")
+        assert updated_asset2 is None
+
+        updated_ref2 = session.get(AssetReference, "ref-dup-2")
+        assert updated_ref2 is not None
+        assert updated_ref2.asset_id == "asset-dup-1"
--- a/tests-unit/assets_test/services/test_ingest.py
+++ b/tests-unit/assets_test/services/test_ingest.py
@@ -0,0 +1,229 @@
+"""Tests for ingest services."""
+from pathlib import Path
+
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference, Tag
+from app.assets.database.queries import get_reference_tags
+from app.assets.services.ingest import _ingest_file_from_path, _register_existing_asset
+
+
+class TestIngestFileFromPath:
+    def test_creates_asset_and_reference(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "test_file.bin"
+        file_path.write_bytes(b"test content")
+
+        result = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:abc123",
+            size_bytes=12,
+            mtime_ns=1234567890000000000,
+            mime_type="application/octet-stream",
+        )
+
+        assert result.asset_created is True
+        assert result.ref_created is True
+        assert result.reference_id is not None
+
+        # Verify DB state
+        assets = session.query(Asset).all()
+        assert len(assets) == 1
+        assert assets[0].hash == "blake3:abc123"
+
+        refs = session.query(AssetReference).all()
+        assert len(refs) == 1
+        assert refs[0].file_path == str(file_path)
+
+    def test_creates_reference_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "model.safetensors"
+        file_path.write_bytes(b"model data")
+
+        result = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:def456",
+            size_bytes=10,
+            mtime_ns=1234567890000000000,
+            mime_type="application/octet-stream",
+            info_name="My Model",
+            owner_id="user1",
+        )
+
+        assert result.asset_created is True
+        assert result.reference_id is not None
+
+        ref = session.query(AssetReference).first()
+        assert ref is not None
+        assert ref.name == "My Model"
+        assert ref.owner_id == "user1"
+
+    def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "tagged.bin"
+        file_path.write_bytes(b"data")
+
+        result = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:ghi789",
+            size_bytes=4,
+            mtime_ns=1234567890000000000,
+            info_name="Tagged Asset",
+            tags=["models", "checkpoints"],
+        )
+
+        assert result.reference_id is not None
+
+        # Verify tags were created and linked
+        tags = session.query(Tag).all()
+        tag_names = {t.name for t in tags}
+        assert "models" in tag_names
+        assert "checkpoints" in tag_names
+
+        ref_tags = get_reference_tags(session, reference_id=result.reference_id)
+        assert set(ref_tags) == {"models", "checkpoints"}
+
+    def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "dup.bin"
+        file_path.write_bytes(b"content")
+
+        # First ingest
+        r1 = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:repeat",
+            size_bytes=7,
+            mtime_ns=1234567890000000000,
+        )
+        assert r1.asset_created is True
+
+        # Second ingest with same hash - should update, not create
+        r2 = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:repeat",
+            size_bytes=7,
+            mtime_ns=1234567890000000001,  # different mtime
+        )
+        assert r2.asset_created is False
+        assert r2.ref_created is False
+        assert r2.ref_updated is True
+
+        # Still only one asset
+        assets = session.query(Asset).all()
+        assert len(assets) == 1
+
+    def test_validates_preview_id(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "with_preview.bin"
+        file_path.write_bytes(b"data")
+
+        # Create a preview asset first
+        preview_asset = Asset(hash="blake3:preview", size_bytes=100)
+        session.add(preview_asset)
+        session.commit()
+        preview_id = preview_asset.id
+
+        result = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:main",
+            size_bytes=4,
+            mtime_ns=1234567890000000000,
+            info_name="With Preview",
+            preview_id=preview_id,
+        )
+
+        assert result.reference_id is not None
+        ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
+        assert ref.preview_id == preview_id
+
+    def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
+        file_path = temp_dir / "bad_preview.bin"
+        file_path.write_bytes(b"data")
+
+        result = _ingest_file_from_path(
+            abs_path=str(file_path),
+            asset_hash="blake3:badpreview",
+            size_bytes=4,
+            mtime_ns=1234567890000000000,
+            info_name="Bad Preview",
+            preview_id="nonexistent-uuid",
+        )
+
+        assert result.reference_id is not None
+        ref = session.query(AssetReference).filter_by(id=result.reference_id).first()
+        assert ref.preview_id is None
+
+
+class TestRegisterExistingAsset:
+    def test_creates_reference_for_existing_asset(self, mock_create_session, session: Session):
+        # Create existing asset
+        asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
+        session.add(asset)
+        session.commit()
+
+        result = _register_existing_asset(
+            asset_hash="blake3:existing",
+            name="Registered Asset",
+            user_metadata={"key": "value"},
+            tags=["models"],
+        )
+
+        assert result.created is True
+        assert "models" in result.tags
+
+        # Verify by re-fetching from DB
+        session.expire_all()
+        refs = session.query(AssetReference).filter_by(name="Registered Asset").all()
+        assert len(refs) == 1
+
+    def test_creates_new_reference_even_with_same_name(self, mock_create_session, session: Session):
+        # Create asset and reference
+        asset = Asset(hash="blake3:withref", size_bytes=512)
+        session.add(asset)
+        session.flush()
+
+        from app.assets.helpers import get_utc_now
+        ref = AssetReference(
+            owner_id="",
+            name="Existing Ref",
+            asset_id=asset.id,
+            created_at=get_utc_now(),
+            updated_at=get_utc_now(),
+            last_access_time=get_utc_now(),
+        )
+        session.add(ref)
+        session.flush()
+        ref_id = ref.id
+        session.commit()
+
+        result = _register_existing_asset(
+            asset_hash="blake3:withref",
+            name="Existing Ref",
+            owner_id="",
+        )
+
+        # Multiple files with same name are allowed
+        assert result.created is True
+
+        # Verify two AssetReferences exist for this name
+        session.expire_all()
+        refs = session.query(AssetReference).filter_by(name="Existing Ref").all()
+        assert len(refs) == 2
+        assert ref_id in [r.id for r in refs]
+
+    def test_raises_for_nonexistent_hash(self, mock_create_session):
+        with pytest.raises(ValueError, match="No asset with hash"):
+            _register_existing_asset(
+                asset_hash="blake3:doesnotexist",
+                name="Fail",
+            )
+
+    def test_applies_tags_to_new_reference(self, mock_create_session, session: Session):
+        asset = Asset(hash="blake3:tagged", size_bytes=256)
+        session.add(asset)
+        session.commit()
+
+        result = _register_existing_asset(
+            asset_hash="blake3:tagged",
+            name="Tagged Ref",
+            tags=["alpha", "beta"],
+        )
+
+        assert result.created is True
+        assert set(result.tags) == {"alpha", "beta"}
--- a/tests-unit/assets_test/services/test_tagging.py
+++ b/tests-unit/assets_test/services/test_tagging.py
@@ -0,0 +1,197 @@
+"""Tests for tagging services."""
+import pytest
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import Asset, AssetReference
+from app.assets.database.queries import ensure_tags_exist, add_tags_to_reference
+from app.assets.helpers import get_utc_now
+from app.assets.services import apply_tags, remove_tags, list_tags
+
+
+def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
+    asset = Asset(hash=hash_val, size_bytes=1024)
+    session.add(asset)
+    session.flush()
+    return asset
+
+
+def _make_reference(
+    session: Session,
+    asset: Asset,
+    name: str = "test",
+    owner_id: str = "",
+) -> AssetReference:
+    now = get_utc_now()
+    ref = AssetReference(
+        owner_id=owner_id,
+        name=name,
+        asset_id=asset.id,
+        created_at=now,
+        updated_at=now,
+        last_access_time=now,
+    )
+    session.add(ref)
+    session.flush()
+    return ref
+
+
+class TestApplyTags:
+    def test_adds_new_tags(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        session.commit()
+
+        result = apply_tags(
+            reference_id=ref.id,
+            tags=["alpha", "beta"],
+        )
+
+        assert set(result.added) == {"alpha", "beta"}
+        assert result.already_present == []
+        assert set(result.total_tags) == {"alpha", "beta"}
+
+    def test_reports_already_present(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["existing"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["existing"])
+        session.commit()
+
+        result = apply_tags(
+            reference_id=ref.id,
+            tags=["existing", "new"],
+        )
+
+        assert result.added == ["new"]
+        assert result.already_present == ["existing"]
+
+    def test_raises_for_nonexistent_ref(self, mock_create_session):
+        with pytest.raises(ValueError, match="not found"):
+            apply_tags(reference_id="nonexistent", tags=["x"])
+
+    def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        with pytest.raises(PermissionError, match="not owner"):
+            apply_tags(
+                reference_id=ref.id,
+                tags=["new"],
+                owner_id="user2",
+            )
+
+
+class TestRemoveTags:
+    def test_removes_tags(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["a", "b", "c"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["a", "b", "c"])
+        session.commit()
+
+        result = remove_tags(
+            reference_id=ref.id,
+            tags=["a", "b"],
+        )
+
+        assert set(result.removed) == {"a", "b"}
+        assert result.not_present == []
+        assert result.total_tags == ["c"]
+
+    def test_reports_not_present(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        ensure_tags_exist(session, ["present"])
+        add_tags_to_reference(session, reference_id=ref.id, tags=["present"])
+        session.commit()
+
+        result = remove_tags(
+            reference_id=ref.id,
+            tags=["present", "absent"],
+        )
+
+        assert result.removed == ["present"]
+        assert result.not_present == ["absent"]
+
+    def test_raises_for_nonexistent_ref(self, mock_create_session):
+        with pytest.raises(ValueError, match="not found"):
+            remove_tags(reference_id="nonexistent", tags=["x"])
+
+    def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset, owner_id="user1")
+        session.commit()
+
+        with pytest.raises(PermissionError, match="not owner"):
+            remove_tags(
+                reference_id=ref.id,
+                tags=["x"],
+                owner_id="user2",
+            )
+
+
+class TestListTags:
+    def test_returns_tags_with_counts(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["used", "unused"])
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
+        session.commit()
+
+        rows, total = list_tags()
+
+        tag_dict = {name: count for name, _, count in rows}
+        assert tag_dict["used"] == 1
+        assert tag_dict["unused"] == 0
+        assert total == 2
+
+    def test_excludes_zero_counts(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["used", "unused"])
+        asset = _make_asset(session)
+        ref = _make_reference(session, asset)
+        add_tags_to_reference(session, reference_id=ref.id, tags=["used"])
+        session.commit()
+
+        rows, total = list_tags(include_zero=False)
+
+        tag_names = {name for name, _, _ in rows}
+        assert "used" in tag_names
+        assert "unused" not in tag_names
+
+    def test_prefix_filter(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["alpha", "beta", "alphabet"])
+        session.commit()
+
+        rows, _ = list_tags(prefix="alph")
+
+        tag_names = {name for name, _, _ in rows}
+        assert tag_names == {"alpha", "alphabet"}
+
+    def test_order_by_name(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["zebra", "alpha", "middle"])
+        session.commit()
+
+        rows, _ = list_tags(order="name_asc")
+
+        names = [name for name, _, _ in rows]
+        assert names == ["alpha", "middle", "zebra"]
+
+    def test_pagination(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["a", "b", "c", "d", "e"])
+        session.commit()
+
+        rows, total = list_tags(limit=2, offset=1, order="name_asc")
+
+        assert total == 5
+        assert len(rows) == 2
+        names = [name for name, _, _ in rows]
+        assert names == ["b", "c"]
+
+    def test_clamps_limit(self, mock_create_session, session: Session):
+        ensure_tags_exist(session, ["a"])
+        session.commit()
+
+        # Service should clamp limit to max 1000
+        rows, _ = list_tags(limit=2000)
+        assert len(rows) <= 1000
--- a/tests-unit/assets_test/test_assets_missing_sync.py
+++ b/tests-unit/assets_test/test_assets_missing_sync.py
@@ -4,7 +4,7 @@ from pathlib import Path

 import pytest
 import requests
-from conftest import get_asset_filename, trigger_sync_seed_assets
+from helpers import get_asset_filename, trigger_sync_seed_assets



--- a/tests-unit/assets_test/test_crud.py
+++ b/tests-unit/assets_test/test_crud.py
@@ -4,7 +4,7 @@ from pathlib import Path

 import pytest
 import requests
-from conftest import get_asset_filename, trigger_sync_seed_assets
+from helpers import get_asset_filename, trigger_sync_seed_assets


 def test_create_from_hash_success(
@@ -24,11 +24,11 @@ def test_create_from_hash_success(
    assert b1["created_new"] is False
    aid = b1["id"]

-    # Calling again with the same name should return the same AssetInfo id
+    # Calling again with the same name creates a new AssetReference (duplicates allowed)
    r2 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
    b2 = r2.json()
    assert r2.status_code == 201, b2
-    assert b2["id"] == aid
+    assert b2["id"] != aid  # new reference, not the same one


 def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asset: dict):
@@ -42,8 +42,8 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
    assert "user_metadata" in detail
    assert "filename" in detail["user_metadata"]

-    # DELETE
-    rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
+    # DELETE (hard delete to also remove underlying asset and file)
+    rd = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
    assert rd.status_code == 204

    # GET again -> 404
@@ -53,6 +53,35 @@ def test_get_and_delete_asset(http: requests.Session, api_base: str, seeded_asse
    assert body["error"]["code"] == "ASSET_NOT_FOUND"


+def test_soft_delete_hides_from_get(http: requests.Session, api_base: str, seeded_asset: dict):
+    aid = seeded_asset["id"]
+    asset_hash = seeded_asset["asset_hash"]
+
+    # Soft-delete (default, no delete_content param)
+    rd = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
+    assert rd.status_code == 204
+
+    # GET by reference ID -> 404 (soft-deleted references are hidden)
+    rg = http.get(f"{api_base}/api/assets/{aid}", timeout=120)
+    assert rg.status_code == 404
+
+    # Asset identity is preserved (underlying content still exists)
+    rh = http.head(f"{api_base}/api/assets/hash/{asset_hash}", timeout=120)
+    assert rh.status_code == 200
+
+    # Soft-deleted reference should not appear in listings
+    rl = http.get(
+        f"{api_base}/api/assets",
+        params={"include_tags": "unit-tests", "limit": "500"},
+        timeout=120,
+    )
+    ids = [a["id"] for a in rl.json().get("assets", [])]
+    assert aid not in ids
+
+    # Clean up: hard-delete the soft-deleted reference and orphaned asset
+    http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
+
+
 def test_delete_upon_reference_count(
    http: requests.Session, api_base: str, seeded_asset: dict
 ):
@@ -70,21 +99,32 @@ def test_delete_upon_reference_count(
    assert copy["asset_hash"] == src_hash
    assert copy["created_new"] is False

-    # Delete original reference -> asset identity must remain
+    # Soft-delete original reference (default) -> asset identity must remain
    aid1 = seeded_asset["id"]
    rd1 = http.delete(f"{api_base}/api/assets/{aid1}", timeout=120)
    assert rd1.status_code == 204

    rh1 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
-    assert rh1.status_code == 200  # identity still present
+    assert rh1.status_code == 200  # identity still present (second ref exists)

-    # Delete the last reference with default semantics -> identity and cached files removed
+    # Soft-delete the last reference -> asset identity preserved (no hard delete)
    aid2 = copy["id"]
    rd2 = http.delete(f"{api_base}/api/assets/{aid2}", timeout=120)
    assert rd2.status_code == 204

    rh2 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
-    assert rh2.status_code == 404  # orphan content removed
+    assert rh2.status_code == 200  # asset identity preserved (soft delete)
+
+    # Re-associate via from-hash, then hard-delete -> orphan content removed
+    r3 = http.post(f"{api_base}/api/assets/from-hash", json=payload, timeout=120)
+    assert r3.status_code == 201, r3.json()
+    aid3 = r3.json()["id"]
+
+    rd3 = http.delete(f"{api_base}/api/assets/{aid3}?delete_content=true", timeout=120)
+    assert rd3.status_code == 204
+
+    rh3 = http.head(f"{api_base}/api/assets/hash/{src_hash}", timeout=120)
+    assert rh3.status_code == 404  # orphan content removed


 def test_update_asset_fields(http: requests.Session, api_base: str, seeded_asset: dict):
@@ -126,42 +166,52 @@ def test_head_asset_bad_hash_returns_400_and_no_body(http: requests.Session, api
    assert body == b""


-def test_delete_nonexistent_returns_404(http: requests.Session, api_base: str):
-    bogus = str(uuid.uuid4())
-    r = http.delete(f"{api_base}/api/assets/{bogus}", timeout=120)
+@pytest.mark.parametrize(
+    "method,endpoint_template,payload,expected_status,error_code",
+    [
+        # Delete nonexistent asset
+        ("delete", "/api/assets/{uuid}", None, 404, "ASSET_NOT_FOUND"),
+        # Bad hash algorithm in from-hash
+        (
+            "post",
+            "/api/assets/from-hash",
+            {"hash": "sha256:" + "0" * 64, "name": "x.bin", "tags": ["models", "checkpoints", "unit-tests"]},
+            400,
+            "INVALID_BODY",
+        ),
+        # Get with bad UUID format
+        ("get", "/api/assets/not-a-uuid", None, 404, None),
+        # Get content with bad UUID format
+        ("get", "/api/assets/not-a-uuid/content", None, 404, None),
+    ],
+    ids=["delete_nonexistent", "bad_hash_algorithm", "get_bad_uuid", "content_bad_uuid"],
+)
+def test_error_responses(
+    http: requests.Session, api_base: str, method, endpoint_template, payload, expected_status, error_code
+):
+    # Replace {uuid} placeholder with a random UUID for delete test
+    endpoint = endpoint_template.replace("{uuid}", str(uuid.uuid4()))
+    url = f"{api_base}{endpoint}"
+
+    if method == "get":
+        r = http.get(url, timeout=120)
+    elif method == "post":
+        r = http.post(url, json=payload, timeout=120)
+    elif method == "delete":
+        r = http.delete(url, timeout=120)
+
+    assert r.status_code == expected_status
+    if error_code:
+        body = r.json()
+        assert body["error"]["code"] == error_code
+
+
+def test_create_from_hash_invalid_json(http: requests.Session, api_base: str):
+    """Invalid JSON body requires special handling (data= instead of json=)."""
+    r = http.post(f"{api_base}/api/assets/from-hash", data=b"{not json}", timeout=120)
    body = r.json()
-    assert r.status_code == 404
-    assert body["error"]["code"] == "ASSET_NOT_FOUND"
-
-
-def test_create_from_hash_invalids(http: requests.Session, api_base: str):
-    # Bad hash algorithm
-    bad = {
-        "hash": "sha256:" + "0" * 64,
-        "name": "x.bin",
-        "tags": ["models", "checkpoints", "unit-tests"],
-    }
-    r1 = http.post(f"{api_base}/api/assets/from-hash", json=bad, timeout=120)
-    b1 = r1.json()
-    assert r1.status_code == 400
-    assert b1["error"]["code"] == "INVALID_BODY"
-
-    # Invalid JSON body
-    r2 = http.post(f"{api_base}/api/assets/from-hash", data=b"{not json}", timeout=120)
-    b2 = r2.json()
-    assert r2.status_code == 400
-    assert b2["error"]["code"] == "INVALID_JSON"
-
-
-def test_get_update_download_bad_ids(http: requests.Session, api_base: str):
-    # All endpoints should be not found, as we UUID regex directly in the route definition.
-    bad_id = "not-a-uuid"
-
-    r1 = http.get(f"{api_base}/api/assets/{bad_id}", timeout=120)
-    assert r1.status_code == 404
-
-    r3 = http.get(f"{api_base}/api/assets/{bad_id}/content", timeout=120)
-    assert r3.status_code == 404
+    assert r.status_code == 400
+    assert body["error"]["code"] == "INVALID_JSON"


 def test_update_requires_at_least_one_field(http: requests.Session, api_base: str, seeded_asset: dict):
--- a/tests-unit/assets_test/test_downloads.py
+++ b/tests-unit/assets_test/test_downloads.py
@@ -6,7 +6,7 @@ from typing import Optional

 import pytest
 import requests
-from conftest import get_asset_filename, trigger_sync_seed_assets
+from helpers import get_asset_filename, trigger_sync_seed_assets


 def test_download_attachment_and_inline(http: requests.Session, api_base: str, seeded_asset: dict):
@@ -117,7 +117,7 @@ def test_download_missing_file_returns_404(
        assert body["error"]["code"] == "FILE_NOT_FOUND"
    finally:
        # We created asset without the "unit-tests" tag(see `autoclean_unit_test_assets`), we need to clear it manually.
-        dr = http.delete(f"{api_base}/api/assets/{aid}", timeout=120)
+        dr = http.delete(f"{api_base}/api/assets/{aid}?delete_content=true", timeout=120)
        dr.content


--- a/tests-unit/assets_test/test_file_utils.py
+++ b/tests-unit/assets_test/test_file_utils.py
@@ -0,0 +1,121 @@
+import os
+import sys
+
+import pytest
+
+from app.assets.services.file_utils import is_visible, list_files_recursively
+
+
+class TestIsVisible:
+    def test_visible_file(self):
+        assert is_visible("file.txt") is True
+
+    def test_hidden_file(self):
+        assert is_visible(".hidden") is False
+
+    def test_hidden_directory(self):
+        assert is_visible(".git") is False
+
+    def test_visible_directory(self):
+        assert is_visible("src") is True
+
+    def test_dotdot_is_hidden(self):
+        assert is_visible("..") is False
+
+    def test_dot_is_hidden(self):
+        assert is_visible(".") is False
+
+
+class TestListFilesRecursively:
+    def test_skips_hidden_files(self, tmp_path):
+        (tmp_path / "visible.txt").write_text("a")
+        (tmp_path / ".hidden").write_text("b")
+
+        result = list_files_recursively(str(tmp_path))
+
+        assert len(result) == 1
+        assert result[0].endswith("visible.txt")
+
+    def test_skips_hidden_directories(self, tmp_path):
+        hidden_dir = tmp_path / ".hidden_dir"
+        hidden_dir.mkdir()
+        (hidden_dir / "file.txt").write_text("a")
+
+        visible_dir = tmp_path / "visible_dir"
+        visible_dir.mkdir()
+        (visible_dir / "file.txt").write_text("b")
+
+        result = list_files_recursively(str(tmp_path))
+
+        assert len(result) == 1
+        assert "visible_dir" in result[0]
+        assert ".hidden_dir" not in result[0]
+
+    def test_empty_directory(self, tmp_path):
+        result = list_files_recursively(str(tmp_path))
+        assert result == []
+
+    def test_nonexistent_directory(self, tmp_path):
+        result = list_files_recursively(str(tmp_path / "nonexistent"))
+        assert result == []
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
+    def test_follows_symlinked_directories(self, tmp_path):
+        target = tmp_path / "real_dir"
+        target.mkdir()
+        (target / "model.safetensors").write_text("data")
+
+        root = tmp_path / "root"
+        root.mkdir()
+        (root / "link").symlink_to(target)
+
+        result = list_files_recursively(str(root))
+
+        assert len(result) == 1
+        assert result[0].endswith("model.safetensors")
+        assert "link" in result[0]
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
+    def test_follows_symlinked_files(self, tmp_path):
+        real_file = tmp_path / "real.txt"
+        real_file.write_text("content")
+
+        root = tmp_path / "root"
+        root.mkdir()
+        (root / "link.txt").symlink_to(real_file)
+
+        result = list_files_recursively(str(root))
+
+        assert len(result) == 1
+        assert result[0].endswith("link.txt")
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
+    def test_circular_symlinks_do_not_loop(self, tmp_path):
+        dir_a = tmp_path / "a"
+        dir_a.mkdir()
+        (dir_a / "file.txt").write_text("a")
+        # a/b -> a  (circular)
+        (dir_a / "b").symlink_to(dir_a)
+
+        result = list_files_recursively(str(dir_a))
+
+        assert len(result) == 1
+        assert result[0].endswith("file.txt")
+
+    @pytest.mark.skipif(sys.platform == "win32", reason="symlinks need privileges on Windows")
+    def test_mutual_circular_symlinks(self, tmp_path):
+        dir_a = tmp_path / "a"
+        dir_b = tmp_path / "b"
+        dir_a.mkdir()
+        dir_b.mkdir()
+        (dir_a / "file_a.txt").write_text("a")
+        (dir_b / "file_b.txt").write_text("b")
+        # a/link_b -> b and b/link_a -> a
+        (dir_a / "link_b").symlink_to(dir_b)
+        (dir_b / "link_a").symlink_to(dir_a)
+
+        result = list_files_recursively(str(dir_a))
+        basenames = sorted(os.path.basename(p) for p in result)
+
+        assert "file_a.txt" in basenames
+        assert "file_b.txt" in basenames
--- a/tests-unit/assets_test/test_list_filter.py
+++ b/tests-unit/assets_test/test_list_filter.py
@@ -1,6 +1,7 @@
 import time
 import uuid

+import pytest
 import requests


@@ -283,30 +284,21 @@ def test_list_assets_offset_beyond_total_and_limit_boundary(http, api_base, asse
    assert b2["has_more"] is False


-def test_list_assets_offset_negative_and_limit_nonint_rejected(http, api_base):
-    r1 = http.get(api_base + "/api/assets", params={"offset": "-1"}, timeout=120)
-    b1 = r1.json()
-    assert r1.status_code == 400
-    assert b1["error"]["code"] == "INVALID_QUERY"
-
-    r2 = http.get(api_base + "/api/assets", params={"limit": "abc"}, timeout=120)
-    b2 = r2.json()
-    assert r2.status_code == 400
-    assert b2["error"]["code"] == "INVALID_QUERY"
-
-
-def test_list_assets_invalid_query_rejected(http: requests.Session, api_base: str):
-    # limit too small
-    r1 = http.get(api_base + "/api/assets", params={"limit": "0"}, timeout=120)
-    b1 = r1.json()
-    assert r1.status_code == 400
-    assert b1["error"]["code"] == "INVALID_QUERY"
-
-    # bad metadata JSON
-    r2 = http.get(api_base + "/api/assets", params={"metadata_filter": "{not json"}, timeout=120)
-    b2 = r2.json()
-    assert r2.status_code == 400
-    assert b2["error"]["code"] == "INVALID_QUERY"
+@pytest.mark.parametrize(
+    "params,error_code",
+    [
+        ({"offset": "-1"}, "INVALID_QUERY"),
+        ({"limit": "abc"}, "INVALID_QUERY"),
+        ({"limit": "0"}, "INVALID_QUERY"),
+        ({"metadata_filter": "{not json"}, "INVALID_QUERY"),
+    ],
+    ids=["negative_offset", "non_int_limit", "zero_limit", "invalid_metadata_json"],
+)
+def test_list_assets_invalid_query_rejected(http: requests.Session, api_base: str, params, error_code):
+    r = http.get(api_base + "/api/assets", params=params, timeout=120)
+    body = r.json()
+    assert r.status_code == 400
+    assert body["error"]["code"] == error_code


 def test_list_assets_name_contains_literal_underscore(
--- a/tests-unit/assets_test/test_prune_orphaned_assets.py
+++ b/tests-unit/assets_test/test_prune_orphaned_assets.py
@@ -3,7 +3,7 @@ from pathlib import Path

 import pytest
 import requests
-from conftest import get_asset_filename, trigger_sync_seed_assets
+from helpers import get_asset_filename, trigger_sync_seed_assets


@pytest.fixture
--- a/tests-unit/assets_test/test_sync_references.py
+++ b/tests-unit/assets_test/test_sync_references.py
@@ -0,0 +1,482 @@
+"""Tests for sync_references_with_filesystem in scanner.py."""
+
+import os
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+
+from app.assets.database.models import (
+    Asset,
+    AssetReference,
+    AssetReferenceTag,
+    Base,
+    Tag,
+)
+from app.assets.database.queries.asset_reference import (
+    bulk_insert_references_ignore_conflicts,
+    get_references_for_prefixes,
+    get_unenriched_references,
+    restore_references_by_paths,
+)
+from app.assets.scanner import sync_references_with_filesystem
+from app.assets.services.file_utils import get_mtime_ns
+
+
+@pytest.fixture
+def db_engine():
+    engine = create_engine("sqlite:///:memory:")
+    Base.metadata.create_all(engine)
+    return engine
+
+
+@pytest.fixture
+def session(db_engine):
+    with Session(db_engine) as sess:
+        yield sess
+
+
+@pytest.fixture
+def temp_dir():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+def _create_file(temp_dir: Path, name: str, content: bytes = b"\x00" * 100) -> str:
+    """Create a file and return its absolute path (no symlink resolution)."""
+    p = temp_dir / name
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_bytes(content)
+    return os.path.abspath(str(p))
+
+
+def _stat_mtime_ns(path: str) -> int:
+    return get_mtime_ns(os.stat(path, follow_symlinks=True))
+
+
+def _make_asset(
+    session: Session,
+    asset_id: str,
+    file_path: str,
+    ref_id: str,
+    *,
+    asset_hash: str | None = None,
+    size_bytes: int = 100,
+    mtime_ns: int | None = None,
+    needs_verify: bool = False,
+    is_missing: bool = False,
+) -> tuple[Asset, AssetReference]:
+    """Insert an Asset + AssetReference and flush."""
+    asset = session.get(Asset, asset_id)
+    if asset is None:
+        asset = Asset(id=asset_id, hash=asset_hash, size_bytes=size_bytes)
+        session.add(asset)
+        session.flush()
+
+    ref = AssetReference(
+        id=ref_id,
+        asset_id=asset_id,
+        name=f"test-{ref_id}",
+        owner_id="system",
+        file_path=file_path,
+        mtime_ns=mtime_ns,
+        needs_verify=needs_verify,
+        is_missing=is_missing,
+    )
+    session.add(ref)
+    session.flush()
+    return asset, ref
+
+
+def _ensure_missing_tag(session: Session):
+    """Ensure the 'missing' tag exists."""
+    if not session.get(Tag, "missing"):
+        session.add(Tag(name="missing", tag_type="system"))
+        session.flush()
+
+
+class _VerifyCase:
+    def __init__(self, id, stat_unchanged, needs_verify_before, expect_needs_verify):
+        self.id = id
+        self.stat_unchanged = stat_unchanged
+        self.needs_verify_before = needs_verify_before
+        self.expect_needs_verify = expect_needs_verify
+
+
+VERIFY_CASES = [
+    _VerifyCase(
+        id="unchanged_clears_verify",
+        stat_unchanged=True,
+        needs_verify_before=True,
+        expect_needs_verify=False,
+    ),
+    _VerifyCase(
+        id="unchanged_keeps_clear",
+        stat_unchanged=True,
+        needs_verify_before=False,
+        expect_needs_verify=False,
+    ),
+    _VerifyCase(
+        id="changed_sets_verify",
+        stat_unchanged=False,
+        needs_verify_before=False,
+        expect_needs_verify=True,
+    ),
+    _VerifyCase(
+        id="changed_keeps_verify",
+        stat_unchanged=False,
+        needs_verify_before=True,
+        expect_needs_verify=True,
+    ),
+]
+
+
+@pytest.mark.parametrize("case", VERIFY_CASES, ids=lambda c: c.id)
+def test_needs_verify_toggling(session, temp_dir, case):
+    """needs_verify is set/cleared based on mtime+size match."""
+    fp = _create_file(temp_dir, "model.bin")
+    real_mtime = _stat_mtime_ns(fp)
+
+    mtime_for_db = real_mtime if case.stat_unchanged else real_mtime + 1
+    _make_asset(
+        session, "a1", fp, "r1",
+        asset_hash="blake3:abc",
+        mtime_ns=mtime_for_db,
+        needs_verify=case.needs_verify_before,
+    )
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    ref = session.get(AssetReference, "r1")
+    assert ref.needs_verify is case.expect_needs_verify
+
+
+class _MissingCase:
+    def __init__(self, id, file_exists, expect_is_missing):
+        self.id = id
+        self.file_exists = file_exists
+        self.expect_is_missing = expect_is_missing
+
+
+MISSING_CASES = [
+    _MissingCase(id="existing_file_not_missing", file_exists=True, expect_is_missing=False),
+    _MissingCase(id="missing_file_marked_missing", file_exists=False, expect_is_missing=True),
+]
+
+
+@pytest.mark.parametrize("case", MISSING_CASES, ids=lambda c: c.id)
+def test_is_missing_flag(session, temp_dir, case):
+    """is_missing reflects whether the file exists on disk."""
+    if case.file_exists:
+        fp = _create_file(temp_dir, "model.bin")
+        mtime = _stat_mtime_ns(fp)
+    else:
+        fp = str(temp_dir / "gone.bin")
+        mtime = 999
+
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    ref = session.get(AssetReference, "r1")
+    assert ref.is_missing is case.expect_is_missing
+
+
+def test_seed_asset_all_missing_deletes_asset(session, temp_dir):
+    """Seed asset with all refs missing gets deleted entirely."""
+    fp = str(temp_dir / "gone.bin")
+    _make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    assert session.get(Asset, "seed1") is None
+    assert session.get(AssetReference, "r1") is None
+
+
+def test_seed_asset_some_exist_returns_survivors(session, temp_dir):
+    """Seed asset with at least one existing ref survives and is returned."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=mtime)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        survivors = sync_references_with_filesystem(
+            session, "models", collect_existing_paths=True,
+        )
+        session.commit()
+
+    assert session.get(Asset, "seed1") is not None
+    assert os.path.abspath(fp) in survivors
+
+
+def test_hashed_asset_prunes_missing_refs_when_one_is_ok(session, temp_dir):
+    """Hashed asset with one stat-unchanged ref deletes missing refs."""
+    fp_ok = _create_file(temp_dir, "good.bin")
+    fp_gone = str(temp_dir / "gone.bin")
+    mtime = _stat_mtime_ns(fp_ok)
+
+    _make_asset(session, "h1", fp_ok, "r_ok", asset_hash="blake3:aaa", mtime_ns=mtime)
+    # Second ref on same asset, file missing
+    ref_gone = AssetReference(
+        id="r_gone", asset_id="h1", name="gone",
+        owner_id="system", file_path=fp_gone, mtime_ns=999,
+    )
+    session.add(ref_gone)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    assert session.get(AssetReference, "r_ok") is not None
+    assert session.get(AssetReference, "r_gone") is None
+
+
+def test_hashed_asset_all_missing_keeps_refs(session, temp_dir):
+    """Hashed asset with all refs missing keeps refs (no pruning)."""
+    fp = str(temp_dir / "gone.bin")
+    _make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    assert session.get(AssetReference, "r1") is not None
+    ref = session.get(AssetReference, "r1")
+    assert ref.is_missing is True
+
+
+def test_missing_tag_added_when_all_refs_gone(session, temp_dir):
+    """Missing tag is added to hashed asset when all refs are missing."""
+    _ensure_missing_tag(session)
+    fp = str(temp_dir / "gone.bin")
+    _make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(
+            session, "models", update_missing_tags=True,
+        )
+        session.commit()
+
+    session.expire_all()
+    tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
+    assert tag_link is not None
+
+
+def test_missing_tag_removed_when_ref_ok(session, temp_dir):
+    """Missing tag is removed from hashed asset when a ref is stat-unchanged."""
+    _ensure_missing_tag(session)
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=mtime)
+    # Pre-add a stale missing tag
+    session.add(AssetReferenceTag(
+        asset_reference_id="r1", tag_name="missing", origin="automatic",
+    ))
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(
+            session, "models", update_missing_tags=True,
+        )
+        session.commit()
+
+    session.expire_all()
+    tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
+    assert tag_link is None
+
+
+def test_missing_tags_not_touched_when_flag_false(session, temp_dir):
+    """Missing tags are not modified when update_missing_tags=False."""
+    _ensure_missing_tag(session)
+    fp = str(temp_dir / "gone.bin")
+    _make_asset(session, "h1", fp, "r1", asset_hash="blake3:aaa", mtime_ns=999)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(
+            session, "models", update_missing_tags=False,
+        )
+        session.commit()
+
+    tag_link = session.get(AssetReferenceTag, ("r1", "missing"))
+    assert tag_link is None  # tag was never added
+
+
+def test_returns_none_when_collect_false(session, temp_dir):
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        result = sync_references_with_filesystem(
+            session, "models", collect_existing_paths=False,
+        )
+
+    assert result is None
+
+
+def test_returns_empty_set_for_no_prefixes(session):
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[]):
+        result = sync_references_with_filesystem(
+            session, "models", collect_existing_paths=True,
+        )
+
+    assert result == set()
+
+
+def test_no_references_is_noop(session, temp_dir):
+    """No crash and no side effects when there are no references."""
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        survivors = sync_references_with_filesystem(
+            session, "models", collect_existing_paths=True,
+        )
+        session.commit()
+
+    assert survivors == set()
+
+
+# ---------------------------------------------------------------------------
+# Soft-delete persistence across scanner operations
+# ---------------------------------------------------------------------------
+
+def _soft_delete_ref(session: Session, ref_id: str) -> None:
+    """Mark a reference as soft-deleted (mimics the API DELETE behaviour)."""
+    ref = session.get(AssetReference, ref_id)
+    ref.deleted_at = datetime(2025, 1, 1)
+    session.flush()
+
+
+def test_soft_deleted_ref_excluded_from_get_references_for_prefixes(session, temp_dir):
+    """get_references_for_prefixes skips soft-deleted references."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    rows = get_references_for_prefixes(session, [str(temp_dir)], include_missing=True)
+    assert len(rows) == 0
+
+
+def test_sync_does_not_resurrect_soft_deleted_ref(session, temp_dir):
+    """Scanner sync leaves soft-deleted refs untouched even when file exists on disk."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    ref = session.get(AssetReference, "r1")
+    assert ref.deleted_at is not None, "soft-deleted ref must stay deleted after sync"
+
+
+def test_bulk_insert_does_not_overwrite_soft_deleted_ref(session, temp_dir):
+    """bulk_insert_references_ignore_conflicts cannot replace a soft-deleted row."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    now = datetime.now(tz=None)
+    bulk_insert_references_ignore_conflicts(session, [
+        {
+            "id": "r_new",
+            "asset_id": "a1",
+            "file_path": fp,
+            "name": "model.bin",
+            "owner_id": "",
+            "mtime_ns": mtime,
+            "preview_id": None,
+            "user_metadata": None,
+            "created_at": now,
+            "updated_at": now,
+            "last_access_time": now,
+        }
+    ])
+    session.commit()
+
+    session.expire_all()
+    # Original row is still the soft-deleted one
+    ref = session.get(AssetReference, "r1")
+    assert ref is not None
+    assert ref.deleted_at is not None
+    # The new row was not inserted (conflict on file_path)
+    assert session.get(AssetReference, "r_new") is None
+
+
+def test_restore_references_by_paths_skips_soft_deleted(session, temp_dir):
+    """restore_references_by_paths does not clear is_missing on soft-deleted refs."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(
+        session, "a1", fp, "r1",
+        asset_hash="blake3:abc", mtime_ns=mtime, is_missing=True,
+    )
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    restored = restore_references_by_paths(session, [fp])
+    session.commit()
+
+    assert restored == 0
+    session.expire_all()
+    ref = session.get(AssetReference, "r1")
+    assert ref.is_missing is True, "is_missing must not be cleared on soft-deleted ref"
+    assert ref.deleted_at is not None
+
+
+def test_get_unenriched_references_excludes_soft_deleted(session, temp_dir):
+    """Enrichment queries do not pick up soft-deleted references."""
+    fp = _create_file(temp_dir, "model.bin")
+    mtime = _stat_mtime_ns(fp)
+    _make_asset(session, "a1", fp, "r1", asset_hash="blake3:abc", mtime_ns=mtime)
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    rows = get_unenriched_references(session, [str(temp_dir)], max_level=2)
+    assert len(rows) == 0
+
+
+def test_sync_ignores_soft_deleted_seed_asset(session, temp_dir):
+    """Soft-deleted seed ref is not garbage-collected even when file is missing."""
+    fp = str(temp_dir / "gone.bin")  # file does not exist
+    _make_asset(session, "seed1", fp, "r1", asset_hash=None, mtime_ns=999)
+    _soft_delete_ref(session, "r1")
+    session.commit()
+
+    with patch("app.assets.scanner.get_prefixes_for_root", return_value=[str(temp_dir)]):
+        sync_references_with_filesystem(session, "models")
+        session.commit()
+
+    session.expire_all()
+    # Asset and ref must still exist — scanner did not see the soft-deleted row
+    assert session.get(Asset, "seed1") is not None
+    assert session.get(AssetReference, "r1") is not None
--- a/tests-unit/assets_test/test_tags_api.py
+++ b/tests-unit/assets_test/test_tags_api.py
@@ -69,8 +69,8 @@ def test_tags_empty_usage(http: requests.Session, api_base: str, asset_factory,
    used_names = [t["name"] for t in body2["tags"]]
    assert custom_tag in used_names

-    # Delete the asset so the tag usage drops to zero
-    rd = http.delete(f"{api_base}/api/assets/{_asset['id']}", timeout=120)
+    # Hard-delete the asset so the tag usage drops to zero
+    rd = http.delete(f"{api_base}/api/assets/{_asset['id']}?delete_content=true", timeout=120)
    assert rd.status_code == 204

    # Now the custom tag must not be returned when include_zero=false
--- a/tests-unit/assets_test/test_uploads.py
+++ b/tests-unit/assets_test/test_uploads.py
@@ -18,25 +18,25 @@ def test_upload_ok_duplicate_reference(http: requests.Session, api_base: str, ma
    assert r1.status_code == 201, a1
    assert a1["created_new"] is True

-    # Second upload with the same data and name should return created_new == False and the same asset
+    # Second upload with the same data and name creates a new AssetReference (duplicates allowed)
+    # Returns 200 because Asset already exists, but a new AssetReference is created
    files = {"file": (name, data, "application/octet-stream")}
    form = {"tags": json.dumps(tags), "name": name, "user_metadata": json.dumps(meta)}
    r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
    a2 = r2.json()
-    assert r2.status_code == 200, a2
-    assert a2["created_new"] is False
+    assert r2.status_code in (200, 201), a2
    assert a2["asset_hash"] == a1["asset_hash"]
-    assert a2["id"] == a1["id"]  # old reference
+    assert a2["id"] != a1["id"]  # new reference with same content

-    # Third upload with the same data but new name should return created_new == False and the new AssetReference
+    # Third upload with the same data but different name also creates new AssetReference
    files = {"file": (name, data, "application/octet-stream")}
    form = {"tags": json.dumps(tags), "name": name + "_d", "user_metadata": json.dumps(meta)}
-    r2 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
-    a3 = r2.json()
-    assert r2.status_code == 200, a3
-    assert a3["created_new"] is False
+    r3 = http.post(api_base + "/api/assets", data=form, files=files, timeout=120)
+    a3 = r3.json()
+    assert r3.status_code in (200, 201), a3
    assert a3["asset_hash"] == a1["asset_hash"]
-    assert a3["id"] != a1["id"]  # old reference
+    assert a3["id"] != a1["id"]
+    assert a3["id"] != a2["id"]


 def test_upload_fastpath_from_existing_hash_no_file(http: requests.Session, api_base: str):
@@ -116,7 +116,7 @@ def test_concurrent_upload_identical_bytes_different_names(
 ):
    """
    Two concurrent uploads of identical bytes but different names.
-    Expect a single Asset (same hash), two AssetInfo rows, and exactly one created_new=True.
+    Expect a single Asset (same hash), two AssetReference rows, and exactly one created_new=True.
    """
    scope = f"concupload-{uuid.uuid4().hex[:6]}"
    name1, name2 = "cu_a.bin", "cu_b.bin"
--- a/tests-unit/requirements.txt
+++ b/tests-unit/requirements.txt
@@ -2,4 +2,3 @@ pytest>=7.8.0
 pytest-aiohttp
 pytest-asyncio
 websocket-client
-blake3
--- a/tests-unit/seeder_test/test_seeder.py
+++ b/tests-unit/seeder_test/test_seeder.py
@@ -0,0 +1,900 @@
+"""Unit tests for the _AssetSeeder background scanning class."""
+
+import threading
+from unittest.mock import patch
+
+import pytest
+
+from app.assets.database.queries.asset_reference import UnenrichedReferenceRow
+from app.assets.seeder import _AssetSeeder, Progress, ScanInProgressError, ScanPhase, State
+
+
+@pytest.fixture
+def fresh_seeder():
+    """Create a fresh _AssetSeeder instance for testing."""
+    seeder = _AssetSeeder()
+    yield seeder
+    seeder.shutdown(timeout=1.0)
+
+
+@pytest.fixture
+def mock_dependencies():
+    """Mock all external dependencies for isolated testing."""
+    with (
+        patch("app.assets.seeder.dependencies_available", return_value=True),
+        patch("app.assets.seeder.sync_root_safely", return_value=set()),
+        patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+        patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+        patch("app.assets.seeder.insert_asset_specs", return_value=0),
+        patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+        patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+    ):
+        yield
+
+
+class TestSeederStateTransitions:
+    """Test state machine transitions."""
+
+    def test_initial_state_is_idle(self, fresh_seeder: _AssetSeeder):
+        assert fresh_seeder.get_status().state == State.IDLE
+
+    def test_start_transitions_to_running(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            started = fresh_seeder.start(roots=("models",))
+            assert started is True
+            assert reached.wait(timeout=2.0)
+            assert fresh_seeder.get_status().state == State.RUNNING
+
+            barrier.set()
+
+    def test_start_while_running_returns_false(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            second_start = fresh_seeder.start(roots=("models",))
+            assert second_start is False
+
+            barrier.set()
+
+    def test_cancel_transitions_to_cancelling(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            cancelled = fresh_seeder.cancel()
+            assert cancelled is True
+            assert fresh_seeder.get_status().state == State.CANCELLING
+
+            barrier.set()
+
+    def test_cancel_when_idle_returns_false(self, fresh_seeder: _AssetSeeder):
+        cancelled = fresh_seeder.cancel()
+        assert cancelled is False
+
+    def test_state_returns_to_idle_after_completion(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        fresh_seeder.start(roots=("models",))
+        completed = fresh_seeder.wait(timeout=5.0)
+        assert completed is True
+        assert fresh_seeder.get_status().state == State.IDLE
+
+
+class TestSeederWait:
+    """Test wait() behavior."""
+
+    def test_wait_blocks_until_complete(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        fresh_seeder.start(roots=("models",))
+        completed = fresh_seeder.wait(timeout=5.0)
+        assert completed is True
+        assert fresh_seeder.get_status().state == State.IDLE
+
+    def test_wait_returns_false_on_timeout(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+
+        def slow_collect(*args):
+            barrier.wait(timeout=10.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            completed = fresh_seeder.wait(timeout=0.1)
+            assert completed is False
+
+            barrier.set()
+
+    def test_wait_when_idle_returns_true(self, fresh_seeder: _AssetSeeder):
+        completed = fresh_seeder.wait(timeout=1.0)
+        assert completed is True
+
+
+class TestSeederProgress:
+    """Test progress tracking."""
+
+    def test_get_status_returns_progress_during_scan(
+        self, fresh_seeder: _AssetSeeder
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_build(*args, **kwargs):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return ([], set(), 0)
+
+        paths = ["/path/file1.safetensors", "/path/file2.safetensors"]
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=paths),
+            patch("app.assets.seeder.build_asset_specs", side_effect=slow_build),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            status = fresh_seeder.get_status()
+            assert status.state == State.RUNNING
+            assert status.progress is not None
+            assert status.progress.total == 2
+
+            barrier.set()
+
+    def test_progress_callback_is_invoked(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        progress_updates: list[Progress] = []
+
+        def callback(p: Progress):
+            progress_updates.append(p)
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots",
+            return_value=[f"/path/file{i}.safetensors" for i in range(10)],
+        ):
+            fresh_seeder.start(roots=("models",), progress_callback=callback)
+            fresh_seeder.wait(timeout=5.0)
+
+        assert len(progress_updates) > 0
+
+
+class TestSeederCancellation:
+    """Test cancellation behavior."""
+
+    def test_scan_commits_partial_progress_on_cancellation(
+        self, fresh_seeder: _AssetSeeder
+    ):
+        insert_count = 0
+        barrier = threading.Event()
+        first_insert_done = threading.Event()
+
+        def slow_insert(specs, tags):
+            nonlocal insert_count
+            insert_count += 1
+            if insert_count == 1:
+                first_insert_done.set()
+            if insert_count >= 2:
+                barrier.wait(timeout=5.0)
+            return len(specs)
+
+        paths = [f"/path/file{i}.safetensors" for i in range(1500)]
+        specs = [
+            {
+                "abs_path": p,
+                "size_bytes": 100,
+                "mtime_ns": 0,
+                "info_name": f"file{i}",
+                "tags": [],
+                "fname": f"file{i}",
+                "metadata": None,
+                "hash": None,
+                "mime_type": None,
+            }
+            for i, p in enumerate(paths)
+        ]
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=paths),
+            patch(
+                "app.assets.seeder.build_asset_specs", return_value=(specs, set(), 0)
+            ),
+            patch("app.assets.seeder.insert_asset_specs", side_effect=slow_insert),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert first_insert_done.wait(timeout=2.0)
+
+            fresh_seeder.cancel()
+            barrier.set()
+            fresh_seeder.wait(timeout=5.0)
+
+            assert 1 <= insert_count < 3  # 1500 paths / 500 batch = 3; cancel stopped early
+
+
+class TestSeederErrorHandling:
+    """Test error handling behavior."""
+
+    def test_database_errors_captured_in_status(self, fresh_seeder: _AssetSeeder):
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch(
+                "app.assets.seeder.collect_paths_for_roots",
+                return_value=["/path/file.safetensors"],
+            ),
+            patch(
+                "app.assets.seeder.build_asset_specs",
+                return_value=(
+                    [
+                        {
+                            "abs_path": "/path/file.safetensors",
+                            "size_bytes": 100,
+                            "mtime_ns": 0,
+                            "info_name": "file",
+                            "tags": [],
+                            "fname": "file",
+                            "metadata": None,
+                            "hash": None,
+                            "mime_type": None,
+                        }
+                    ],
+                    set(),
+                    0,
+                ),
+            ),
+            patch(
+                "app.assets.seeder.insert_asset_specs",
+                side_effect=Exception("DB connection failed"),
+            ),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            status = fresh_seeder.get_status()
+            assert len(status.errors) > 0
+            assert "DB connection failed" in status.errors[0]
+
+    def test_dependencies_unavailable_captured_in_errors(
+        self, fresh_seeder: _AssetSeeder
+    ):
+        with patch("app.assets.seeder.dependencies_available", return_value=False):
+            fresh_seeder.start(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            status = fresh_seeder.get_status()
+            assert len(status.errors) > 0
+            assert "dependencies" in status.errors[0].lower()
+
+    def test_thread_crash_resets_state_to_idle(self, fresh_seeder: _AssetSeeder):
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch(
+                "app.assets.seeder.sync_root_safely",
+                side_effect=RuntimeError("Unexpected crash"),
+            ),
+        ):
+            fresh_seeder.start(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            status = fresh_seeder.get_status()
+            assert status.state == State.IDLE
+            assert len(status.errors) > 0
+
+
+class TestSeederThreadSafety:
+    """Test thread safety of concurrent operations."""
+
+    def test_concurrent_start_calls_spawn_only_one_thread(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+
+        def slow_collect(*args):
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            results = []
+
+            def try_start():
+                results.append(fresh_seeder.start(roots=("models",)))
+
+            threads = [threading.Thread(target=try_start) for _ in range(10)]
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+
+            barrier.set()
+
+            assert sum(results) == 1
+
+    def test_get_status_safe_during_scan(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            statuses = []
+            for _ in range(100):
+                statuses.append(fresh_seeder.get_status())
+
+            barrier.set()
+
+            assert all(
+                s.state in (State.RUNNING, State.IDLE, State.CANCELLING)
+                for s in statuses
+            )
+
+
+class TestSeederMarkMissing:
+    """Test mark_missing_outside_prefixes behavior."""
+
+    def test_mark_missing_when_idle(self, fresh_seeder: _AssetSeeder):
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch(
+                "app.assets.seeder.get_all_known_prefixes",
+                return_value=["/models", "/input", "/output"],
+            ),
+            patch(
+                "app.assets.seeder.mark_missing_outside_prefixes_safely", return_value=5
+            ) as mock_mark,
+        ):
+            result = fresh_seeder.mark_missing_outside_prefixes()
+            assert result == 5
+            mock_mark.assert_called_once_with(["/models", "/input", "/output"])
+
+    def test_mark_missing_raises_when_running(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            with pytest.raises(ScanInProgressError):
+                fresh_seeder.mark_missing_outside_prefixes()
+
+            barrier.set()
+
+    def test_mark_missing_returns_zero_when_dependencies_unavailable(
+        self, fresh_seeder: _AssetSeeder
+    ):
+        with patch("app.assets.seeder.dependencies_available", return_value=False):
+            result = fresh_seeder.mark_missing_outside_prefixes()
+            assert result == 0
+
+    def test_prune_first_flag_triggers_mark_missing_before_scan(
+        self, fresh_seeder: _AssetSeeder
+    ):
+        call_order = []
+
+        def track_mark(prefixes):
+            call_order.append("mark_missing")
+            return 3
+
+        def track_sync(root):
+            call_order.append(f"sync_{root}")
+            return set()
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.get_all_known_prefixes", return_value=["/models"]),
+            patch("app.assets.seeder.mark_missing_outside_prefixes_safely", side_effect=track_mark),
+            patch("app.assets.seeder.sync_root_safely", side_effect=track_sync),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",), prune_first=True)
+            fresh_seeder.wait(timeout=5.0)
+
+            assert call_order[0] == "mark_missing"
+            assert "sync_models" in call_order
+
+
+class TestSeederPhases:
+    """Test phased scanning behavior."""
+
+    def test_start_fast_only_runs_fast_phase(self, fresh_seeder: _AssetSeeder):
+        """Verify start_fast only runs the fast phase."""
+        fast_called = []
+        enrich_called = []
+
+        def track_fast(*args, **kwargs):
+            fast_called.append(True)
+            return ([], set(), 0)
+
+        def track_enrich(*args, **kwargs):
+            enrich_called.append(True)
+            return []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start_fast(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            assert len(fast_called) == 1
+            assert len(enrich_called) == 0
+
+    def test_start_enrich_only_runs_enrich_phase(self, fresh_seeder: _AssetSeeder):
+        """Verify start_enrich only runs the enrich phase."""
+        fast_called = []
+        enrich_called = []
+
+        def track_fast(*args, **kwargs):
+            fast_called.append(True)
+            return ([], set(), 0)
+
+        def track_enrich(*args, **kwargs):
+            enrich_called.append(True)
+            return []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start_enrich(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            assert len(fast_called) == 0
+            assert len(enrich_called) == 1
+
+    def test_full_scan_runs_both_phases(self, fresh_seeder: _AssetSeeder):
+        """Verify full scan runs both fast and enrich phases."""
+        fast_called = []
+        enrich_called = []
+
+        def track_fast(*args, **kwargs):
+            fast_called.append(True)
+            return ([], set(), 0)
+
+        def track_enrich(*args, **kwargs):
+            enrich_called.append(True)
+            return []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", side_effect=track_fast),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=track_enrich),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",), phase=ScanPhase.FULL)
+            fresh_seeder.wait(timeout=5.0)
+
+            assert len(fast_called) == 1
+            assert len(enrich_called) == 1
+
+
+class TestSeederPauseResume:
+    """Test pause/resume behavior."""
+
+    def test_pause_transitions_to_paused(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            paused = fresh_seeder.pause()
+            assert paused is True
+            assert fresh_seeder.get_status().state == State.PAUSED
+
+            barrier.set()
+
+    def test_pause_when_idle_returns_false(self, fresh_seeder: _AssetSeeder):
+        paused = fresh_seeder.pause()
+        assert paused is False
+
+    def test_resume_returns_to_running(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            fresh_seeder.pause()
+            assert fresh_seeder.get_status().state == State.PAUSED
+
+            resumed = fresh_seeder.resume()
+            assert resumed is True
+            assert fresh_seeder.get_status().state == State.RUNNING
+
+            barrier.set()
+
+    def test_resume_when_not_paused_returns_false(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            resumed = fresh_seeder.resume()
+            assert resumed is False
+
+            barrier.set()
+
+    def test_cancel_while_paused_works(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached_checkpoint = threading.Event()
+
+        def slow_collect(*args):
+            reached_checkpoint.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached_checkpoint.wait(timeout=2.0)
+
+            fresh_seeder.pause()
+            assert fresh_seeder.get_status().state == State.PAUSED
+
+            cancelled = fresh_seeder.cancel()
+            assert cancelled is True
+
+            barrier.set()
+            fresh_seeder.wait(timeout=5.0)
+            assert fresh_seeder.get_status().state == State.IDLE
+
+class TestSeederStopRestart:
+    """Test stop and restart behavior."""
+
+    def test_stop_is_alias_for_cancel(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+
+        def slow_collect(*args):
+            reached.set()
+            barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            stopped = fresh_seeder.stop()
+            assert stopped is True
+            assert fresh_seeder.get_status().state == State.CANCELLING
+
+            barrier.set()
+
+    def test_restart_cancels_and_starts_new_scan(
+        self, fresh_seeder: _AssetSeeder, mock_dependencies
+    ):
+        barrier = threading.Event()
+        reached = threading.Event()
+        start_count = 0
+
+        def slow_collect(*args):
+            nonlocal start_count
+            start_count += 1
+            if start_count == 1:
+                reached.set()
+                barrier.wait(timeout=5.0)
+            return []
+
+        with patch(
+            "app.assets.seeder.collect_paths_for_roots", side_effect=slow_collect
+        ):
+            fresh_seeder.start(roots=("models",))
+            assert reached.wait(timeout=2.0)
+
+            barrier.set()
+            restarted = fresh_seeder.restart()
+            assert restarted is True
+
+            fresh_seeder.wait(timeout=5.0)
+            assert start_count == 2
+
+    def test_restart_preserves_previous_params(self, fresh_seeder: _AssetSeeder):
+        """Verify restart uses previous params when not overridden."""
+        collected_roots = []
+
+        def track_collect(roots):
+            collected_roots.append(roots)
+            return []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", side_effect=track_collect),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("input", "output"))
+            fresh_seeder.wait(timeout=5.0)
+
+            fresh_seeder.restart()
+            fresh_seeder.wait(timeout=5.0)
+
+            assert len(collected_roots) == 2
+            assert collected_roots[0] == ("input", "output")
+            assert collected_roots[1] == ("input", "output")
+
+    def test_restart_can_override_params(self, fresh_seeder: _AssetSeeder):
+        """Verify restart can override previous params."""
+        collected_roots = []
+
+        def track_collect(roots):
+            collected_roots.append(roots)
+            return []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", side_effect=track_collect),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", return_value=[]),
+            patch("app.assets.seeder.enrich_assets_batch", return_value=(0, 0)),
+        ):
+            fresh_seeder.start(roots=("models",))
+            fresh_seeder.wait(timeout=5.0)
+
+            fresh_seeder.restart(roots=("input",))
+            fresh_seeder.wait(timeout=5.0)
+
+            assert len(collected_roots) == 2
+            assert collected_roots[0] == ("models",)
+            assert collected_roots[1] == ("input",)
+
+
+def _make_row(ref_id: str, asset_id: str = "a1") -> UnenrichedReferenceRow:
+    return UnenrichedReferenceRow(
+        reference_id=ref_id, asset_id=asset_id,
+        file_path=f"/fake/{ref_id}.bin", enrichment_level=0,
+    )
+
+
+class TestEnrichPhaseDefensiveLogic:
+    """Test skip_ids filtering and consecutive_empty termination."""
+
+    def test_failed_refs_are_skipped_on_subsequent_batches(
+        self, fresh_seeder: _AssetSeeder,
+    ):
+        """References that fail enrichment are filtered out of future batches."""
+        row_a = _make_row("r1")
+        row_b = _make_row("r2")
+        call_count = 0
+
+        def fake_get_unenriched(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count <= 2:
+                return [row_a, row_b]
+            return []
+
+        enriched_refs: list[list[str]] = []
+
+        def fake_enrich(rows, **kwargs):
+            ref_ids = [r.reference_id for r in rows]
+            enriched_refs.append(ref_ids)
+            # r1 always fails, r2 succeeds
+            failed = [r.reference_id for r in rows if r.reference_id == "r1"]
+            enriched = len(rows) - len(failed)
+            return enriched, failed
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
+            patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
+        ):
+            fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
+            fresh_seeder.wait(timeout=5.0)
+
+        # First batch: both refs attempted
+        assert "r1" in enriched_refs[0]
+        assert "r2" in enriched_refs[0]
+        # Second batch: r1 filtered out
+        assert "r1" not in enriched_refs[1]
+        assert "r2" in enriched_refs[1]
+
+    def test_stops_after_consecutive_empty_batches(
+        self, fresh_seeder: _AssetSeeder,
+    ):
+        """Enrich phase terminates after 3 consecutive batches with zero progress."""
+        row = _make_row("r1")
+        batch_count = 0
+
+        def fake_get_unenriched(*args, **kwargs):
+            nonlocal batch_count
+            batch_count += 1
+            # Always return the same row (simulating a permanently failing ref)
+            return [row]
+
+        def fake_enrich(rows, **kwargs):
+            # Always fail — zero enriched, all failed
+            return 0, [r.reference_id for r in rows]
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
+            patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
+        ):
+            fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
+            fresh_seeder.wait(timeout=5.0)
+
+        # Should stop after exactly 3 consecutive empty batches
+        # Batch 1: returns row, enrich fails → filtered out in batch 2+
+        # But get_unenriched keeps returning it, filter removes it → empty → break
+        # Actually: batch 1 has row, fails. Batch 2 get_unenriched returns [row],
+        # skip_ids filters it → empty list → breaks via `if not unenriched: break`
+        # So it terminates in 2 calls to get_unenriched.
+        assert batch_count == 2
+
+    def test_consecutive_empty_counter_resets_on_success(
+        self, fresh_seeder: _AssetSeeder,
+    ):
+        """A successful batch resets the consecutive empty counter."""
+        call_count = 0
+
+        def fake_get_unenriched(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count <= 6:
+                return [_make_row(f"r{call_count}", f"a{call_count}")]
+            return []
+
+        def fake_enrich(rows, **kwargs):
+            ref_id = rows[0].reference_id
+            # Fail batches 1-2, succeed batch 3, fail batches 4-5, succeed batch 6
+            if ref_id in ("r1", "r2", "r4", "r5"):
+                return 0, [ref_id]
+            return 1, []
+
+        with (
+            patch("app.assets.seeder.dependencies_available", return_value=True),
+            patch("app.assets.seeder.sync_root_safely", return_value=set()),
+            patch("app.assets.seeder.collect_paths_for_roots", return_value=[]),
+            patch("app.assets.seeder.build_asset_specs", return_value=([], set(), 0)),
+            patch("app.assets.seeder.insert_asset_specs", return_value=0),
+            patch("app.assets.seeder.get_unenriched_assets_for_roots", side_effect=fake_get_unenriched),
+            patch("app.assets.seeder.enrich_assets_batch", side_effect=fake_enrich),
+        ):
+            fresh_seeder.start(roots=("models",), phase=ScanPhase.ENRICH)
+            fresh_seeder.wait(timeout=5.0)
+
+        # All 6 batches should run + 1 final call returning empty
+        assert call_count == 7
+        status = fresh_seeder.get_status()
+        assert status.state == State.IDLE