fix: address code review feedback - round 2

- Reject path separators (/, \, os.sep) in tag components for defense-in-depth
- Add comment explaining double-relpath normalization trick
- Add _require_assets_feature_enabled decorator returning 503 when disabled
- Call asset_seeder.disable() when --enable-assets is not passed
- Add iter_chunks to bulk_update_needs_verify, bulk_update_is_missing,
  and delete_references_by_ids to respect SQLite bind param limits
- Fix CacheStateRow.size_bytes NULL coercion (0 -> None) to avoid
  false needs_verify flags on assets with unknown size
- Add PermissionError catch in delete_asset_tags route (403 vs 500)
- Add hash-is-None guard in delete_orphaned_seed_asset
- Validate from_asset_id in reassign_asset_references
- Initialize _prune_first in __init__, remove getattr workaround
- Cap error accumulation in _add_error to 200
- Remove confirmed dead code: seed_assets, compute_filename_for_asset,
  ALLOWED_ROOTS, AssetNotFoundError, SetTagsResult, update_enrichment_level,
  Asset.to_dict, AssetReference.to_dict, _AssetSeeder.enable

Amp-Thread-ID: https://ampcode.com/threads/T-019cb610-1b55-74b6-8dbb-381d73c387c0
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-03-03 16:35:55 -08:00
parent 4d4c2cedd3
commit 32a6fcf7a8
12 changed files with 50 additions and 136 deletions

View File

@@ -20,7 +20,7 @@ from sqlalchemy import (
from sqlalchemy.orm import Mapped, foreign, mapped_column, relationship
from app.assets.helpers import get_utc_now
from app.database.models import Base, to_dict
from app.database.models import Base
class Asset(Base):
@@ -59,9 +59,6 @@ class Asset(Base):
CheckConstraint("size_bytes >= 0", name="ck_assets_size_nonneg"),
)
def to_dict(self, include_none: bool = False) -> dict[str, Any]:
return to_dict(self, include_none=include_none)
def __repr__(self) -> str:
return f"<Asset id={self.id} hash={(self.hash or '')[:12]}>"
@@ -161,11 +158,6 @@ class AssetReference(Base):
),
)
def to_dict(self, include_none: bool = False) -> dict[str, Any]:
data = to_dict(self, include_none=include_none)
data["tags"] = [t.name for t in self.tags]
return data
def __repr__(self) -> str:
path_part = f" path={self.file_path!r}" if self.file_path else ""
return f"<AssetReference id={self.id} name={self.name!r}{path_part}>"

View File

@@ -37,7 +37,6 @@ from app.assets.database.queries.asset_reference import (
restore_references_by_paths,
set_reference_metadata,
set_reference_preview,
update_enrichment_level,
update_reference_access_time,
update_reference_name,
update_reference_timestamps,
@@ -108,7 +107,6 @@ __all__ = [
"set_reference_preview",
"set_reference_tags",
"update_asset_hash_and_mime",
"update_enrichment_level",
"update_reference_access_time",
"update_reference_name",
"update_reference_timestamps",

View File

@@ -134,7 +134,7 @@ def reassign_asset_references(
Used when merging a stub asset into an existing asset with the same hash.
"""
ref = session.get(AssetReference, reference_id)
if ref:
if ref and ref.asset_id == from_asset_id:
ref.asset_id = to_asset_id
session.flush()

View File

@@ -552,7 +552,7 @@ class CacheStateRow(NamedTuple):
needs_verify: bool
asset_id: str
asset_hash: str | None
size_bytes: int
size_bytes: int | None
def list_references_by_asset_id(
@@ -767,7 +767,7 @@ def get_references_for_prefixes(
needs_verify=row[3],
asset_id=row[4],
asset_hash=row[5],
size_bytes=int(row[6] or 0),
size_bytes=int(row[6]) if row[6] is not None else None,
)
for row in rows
]
@@ -782,12 +782,15 @@ def bulk_update_needs_verify(
"""
if not reference_ids:
return 0
result = session.execute(
sa.update(AssetReference)
.where(AssetReference.id.in_(reference_ids))
.values(needs_verify=value)
)
return result.rowcount
total = 0
for chunk in iter_chunks(reference_ids, MAX_BIND_PARAMS):
result = session.execute(
sa.update(AssetReference)
.where(AssetReference.id.in_(chunk))
.values(needs_verify=value)
)
total += result.rowcount
return total
def bulk_update_is_missing(
@@ -799,12 +802,15 @@ def bulk_update_is_missing(
"""
if not reference_ids:
return 0
result = session.execute(
sa.update(AssetReference)
.where(AssetReference.id.in_(reference_ids))
.values(is_missing=value)
)
return result.rowcount
total = 0
for chunk in iter_chunks(reference_ids, MAX_BIND_PARAMS):
result = session.execute(
sa.update(AssetReference)
.where(AssetReference.id.in_(chunk))
.values(is_missing=value)
)
total += result.rowcount
return total
def delete_references_by_ids(session: Session, reference_ids: list[str]) -> int:
@@ -814,25 +820,30 @@ def delete_references_by_ids(session: Session, reference_ids: list[str]) -> int:
"""
if not reference_ids:
return 0
result = session.execute(
sa.delete(AssetReference).where(AssetReference.id.in_(reference_ids))
)
return result.rowcount
total = 0
for chunk in iter_chunks(reference_ids, MAX_BIND_PARAMS):
result = session.execute(
sa.delete(AssetReference).where(AssetReference.id.in_(chunk))
)
total += result.rowcount
return total
def delete_orphaned_seed_asset(session: Session, asset_id: str) -> bool:
"""Delete a seed asset (hash is None) and its references.
Returns: True if asset was deleted, False if not found
Returns: True if asset was deleted, False if not found or has a hash
"""
asset = session.get(Asset, asset_id)
if not asset:
return False
if asset.hash is not None:
return False
session.execute(
sa.delete(AssetReference).where(AssetReference.asset_id == asset_id)
)
asset = session.get(Asset, asset_id)
if asset:
session.delete(asset)
return True
return False
session.delete(asset)
return True
class UnenrichedReferenceRow(NamedTuple):
@@ -899,19 +910,6 @@ def get_unenriched_references(
]
def update_enrichment_level(
session: Session,
reference_id: str,
level: int,
) -> None:
"""Update the enrichment level for a reference."""
session.execute(
sa.update(AssetReference)
.where(AssetReference.id == reference_id)
.values(enrichment_level=level)
)
def bulk_update_enrichment_level(
session: Session,
reference_ids: list[str],