feat: soft-delete for AssetReference with scanner persistence

- Add deleted_at column to AssetReference model and migration
- soft_delete_reference_by_id sets deleted_at instead of removing rows
- DELETE /api/assets/{id} defaults to soft-delete; delete_content=true
  for hard-delete
- Add deleted_at IS NULL filters to read queries, tag queries, and
  scanner queries so soft-deleted refs are invisible
- restore_references_by_paths skips soft-deleted refs
- upsert_reference clears deleted_at on explicit re-ingest
- Add tests for soft-delete API behavior, scanner persistence, bulk
  insert, enrichment exclusion, and seed asset garbage collection

Amp-Thread-ID: https://ampcode.com/threads/T-019cb6fc-c05c-761f-b855-6d5d1c9defa2
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-03-03 20:04:01 -08:00
parent ce17e303fc
commit d5cac66405
12 changed files with 247 additions and 17 deletions

View File

@@ -38,6 +38,7 @@ from app.assets.database.queries.asset_reference import (
restore_references_by_paths,
set_reference_metadata,
set_reference_preview,
soft_delete_reference_by_id,
update_reference_access_time,
update_reference_name,
update_reference_timestamps,
@@ -107,6 +108,7 @@ __all__ = [
"restore_references_by_paths",
"set_reference_metadata",
"set_reference_preview",
"soft_delete_reference_by_id",
"set_reference_tags",
"update_asset_hash_and_mime",
"update_reference_access_time",

View File

@@ -173,11 +173,11 @@ def get_reference_with_owner_check(
"""Fetch a reference and verify ownership.
Raises:
ValueError: if reference not found
ValueError: if reference not found or soft-deleted
PermissionError: if owner_id doesn't match
"""
ref = get_reference_by_id(session, reference_id=reference_id)
if not ref:
if not ref or ref.deleted_at is not None:
raise ValueError(f"AssetReference {reference_id} not found")
if ref.owner_id and ref.owner_id != owner_id:
raise PermissionError("not owner")
@@ -206,6 +206,7 @@ def reference_exists_for_asset_id(
select(sa.literal(True))
.select_from(AssetReference)
.where(AssetReference.asset_id == asset_id)
.where(AssetReference.deleted_at.is_(None))
.limit(1)
)
return session.execute(q).first() is not None
@@ -327,6 +328,7 @@ def list_references_page(
.join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
.where(AssetReference.deleted_at.is_(None))
.options(noload(AssetReference.tags))
)
@@ -357,6 +359,7 @@ def list_references_page(
.join(Asset, Asset.id == AssetReference.asset_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.is_missing == False) # noqa: E712
.where(AssetReference.deleted_at.is_(None))
)
if name_contains:
escaped, esc = escape_sql_like_string(name_contains)
@@ -400,6 +403,7 @@ def fetch_reference_asset_and_tags(
.join(Tag, Tag.name == AssetReferenceTag.tag_name, isouter=True)
.where(
AssetReference.id == reference_id,
AssetReference.deleted_at.is_(None),
build_visible_owner_clause(owner_id),
)
.options(noload(AssetReference.tags))
@@ -430,6 +434,7 @@ def fetch_reference_and_asset(
.join(Asset, Asset.id == AssetReference.asset_id)
.where(
AssetReference.id == reference_id,
AssetReference.deleted_at.is_(None),
build_visible_owner_clause(owner_id),
)
.limit(1)
@@ -541,6 +546,28 @@ def delete_reference_by_id(
return int(session.execute(stmt).rowcount or 0) > 0
def soft_delete_reference_by_id(
session: Session,
reference_id: str,
owner_id: str,
) -> bool:
"""Mark a reference as soft-deleted by setting deleted_at timestamp.
Returns True if the reference was found and marked deleted.
"""
now = get_utc_now()
stmt = (
sa.update(AssetReference)
.where(
AssetReference.id == reference_id,
AssetReference.deleted_at.is_(None),
build_visible_owner_clause(owner_id),
)
.values(deleted_at=now)
)
return int(session.execute(stmt).rowcount or 0) > 0
def set_reference_preview(
session: Session,
reference_id: str,
@@ -633,10 +660,12 @@ def upsert_reference(
AssetReference.mtime_ns.is_(None),
AssetReference.mtime_ns != int(mtime_ns),
AssetReference.is_missing == True, # noqa: E712
AssetReference.deleted_at.isnot(None),
)
)
.values(
asset_id=asset_id, mtime_ns=int(mtime_ns), is_missing=False, updated_at=now
asset_id=asset_id, mtime_ns=int(mtime_ns), is_missing=False,
deleted_at=None, updated_at=now,
)
)
res2 = session.execute(upd)
@@ -660,6 +689,7 @@ def mark_references_missing_outside_prefixes(
result = session.execute(
sa.update(AssetReference)
.where(AssetReference.file_path.isnot(None))
.where(AssetReference.deleted_at.is_(None))
.where(~matches_valid_prefix)
.where(AssetReference.is_missing == False) # noqa: E712
.values(is_missing=True)
@@ -681,6 +711,7 @@ def restore_references_by_paths(session: Session, file_paths: list[str]) -> int:
sa.update(AssetReference)
.where(AssetReference.file_path.in_(chunk))
.where(AssetReference.is_missing == True) # noqa: E712
.where(AssetReference.deleted_at.is_(None))
.values(is_missing=False)
)
total += result.rowcount
@@ -699,6 +730,7 @@ def get_unreferenced_unhashed_asset_ids(session: Session) -> list[str]:
sa.select(sa.literal(1))
.where(AssetReference.asset_id == Asset.id)
.where(AssetReference.is_missing == False) # noqa: E712
.where(AssetReference.deleted_at.is_(None))
.correlate(Asset)
.exists()
)
@@ -758,6 +790,7 @@ def get_references_for_prefixes(
)
.join(Asset, Asset.id == AssetReference.asset_id)
.where(AssetReference.file_path.isnot(None))
.where(AssetReference.deleted_at.is_(None))
.where(sa.or_(*conds))
)
@@ -894,6 +927,7 @@ def get_unenriched_references(
AssetReference.enrichment_level,
)
.where(AssetReference.file_path.isnot(None))
.where(AssetReference.deleted_at.is_(None))
.where(sa.or_(*conds))
.where(AssetReference.is_missing == False) # noqa: E712
.where(AssetReference.enrichment_level <= max_level)

View File

@@ -272,6 +272,7 @@ def list_tags_with_usage(
.select_from(AssetReferenceTag)
.join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.deleted_at.is_(None))
.group_by(AssetReferenceTag.tag_name)
.subquery()
)
@@ -307,6 +308,7 @@ def list_tags_with_usage(
select(AssetReferenceTag.tag_name)
.join(AssetReference, AssetReference.id == AssetReferenceTag.asset_reference_id)
.where(build_visible_owner_clause(owner_id))
.where(AssetReference.deleted_at.is_(None))
.group_by(AssetReferenceTag.tag_name)
)
total_q = total_q.where(Tag.name.in_(visible_tags_sq))