feat: remove Asset when there is no references left + bugfixes + more tests

This commit is contained in:
bigcat88
2025-09-08 22:37:39 +03:00
parent 0e9de2b7c9
commit dfb5703d40
7 changed files with 332 additions and 51 deletions

View File

@@ -277,9 +277,13 @@ async def upload_asset(request: web.Request) -> web.Response:
os.remove(tmp_path)
msg = str(e)
if "HASH_MISMATCH" in msg or msg.strip().upper() == "HASH_MISMATCH":
return _error_response(400, "HASH_MISMATCH", "Uploaded file hash does not match provided hash.")
return _error_response(
400,
"HASH_MISMATCH",
"Uploaded file hash does not match provided hash.",
)
return _error_response(400, "BAD_REQUEST", "Invalid inputs.")
except Exception:
except Exception as e:
if tmp_path and os.path.exists(tmp_path):
os.remove(tmp_path)
return _error_response(500, "INTERNAL", "Unexpected server error.")
@@ -343,10 +347,14 @@ async def delete_asset(request: web.Request) -> web.Response:
except Exception:
return _error_response(400, "INVALID_ID", f"AssetInfo id '{asset_info_id_raw}' is not a valid UUID.")
delete_content = request.query.get("delete_content")
delete_content = True if delete_content is None else delete_content.lower() not in {"0", "false", "no"}
try:
deleted = await assets_manager.delete_asset_reference(
asset_info_id=asset_info_id,
owner_id=UserManager.get_request_user_id(request),
delete_content_if_orphan=delete_content,
)
except Exception:
return _error_response(500, "INTERNAL", "Unexpected server error.")

View File

@@ -27,6 +27,8 @@ from .database.services import (
create_asset_info_for_existing_asset,
fetch_asset_info_asset_and_tags,
get_asset_info_by_id,
list_cache_states_by_asset_hash,
asset_info_exists_for_hash,
)
from .api import schemas_in, schemas_out
from ._assets_helpers import (
@@ -371,11 +373,40 @@ async def update_asset(
)
async def delete_asset_reference(*, asset_info_id: str, owner_id: str) -> bool:
async def delete_asset_reference(*, asset_info_id: str, owner_id: str, delete_content_if_orphan: bool = True) -> bool:
"""Delete single AssetInfo. If this was the last reference to Asset and delete_content_if_orphan=True (default),
delete the Asset row as well and remove all cached files recorded for that asset_hash.
"""
async with await create_session() as session:
r = await delete_asset_info_by_id(session, asset_info_id=asset_info_id, owner_id=owner_id)
info_row = await get_asset_info_by_id(session, asset_info_id=asset_info_id)
asset_hash = info_row.asset_hash if info_row else None
deleted = await delete_asset_info_by_id(session, asset_info_id=asset_info_id, owner_id=owner_id)
if not deleted:
await session.commit()
return False
if not delete_content_if_orphan or not asset_hash:
await session.commit()
return True
still_exists = await asset_info_exists_for_hash(session, asset_hash=asset_hash)
if still_exists:
await session.commit()
return True
states = await list_cache_states_by_asset_hash(session, asset_hash=asset_hash)
file_paths = [s.file_path for s in (states or []) if getattr(s, "file_path", None)]
asset_row = await get_asset_by_hash(session, asset_hash=asset_hash)
if asset_row is not None:
await session.delete(asset_row)
await session.commit()
return r
for p in file_paths:
with contextlib.suppress(Exception):
if p and os.path.isfile(p):
os.remove(p)
return True
async def create_asset_from_hash(

View File

@@ -36,6 +36,17 @@ async def get_asset_info_by_id(session: AsyncSession, *, asset_info_id: str) ->
return await session.get(AssetInfo, asset_info_id)
async def asset_info_exists_for_hash(session: AsyncSession, *, asset_hash: str) -> bool:
return (
await session.execute(
sa.select(sa.literal(True))
.select_from(AssetInfo)
.where(AssetInfo.asset_hash == asset_hash)
.limit(1)
)
).first() is not None
async def check_fs_asset_exists_quick(
session,
*,
@@ -586,7 +597,7 @@ async def create_asset_info_for_existing_asset(
tag_origin: str = "manual",
owner_id: str = "",
) -> AssetInfo:
"""Create a new AssetInfo referencing an existing Asset (no content write)."""
"""Create a new AssetInfo referencing an existing Asset. If row already exists, return it unchanged."""
now = utcnow()
info = AssetInfo(
owner_id=owner_id,
@@ -597,8 +608,25 @@ async def create_asset_info_for_existing_asset(
updated_at=now,
last_access_time=now,
)
session.add(info)
await session.flush() # get info.id
try:
async with session.begin_nested():
session.add(info)
await session.flush() # get info.id
except IntegrityError:
existing = (
await session.execute(
select(AssetInfo)
.where(
AssetInfo.asset_hash == asset_hash,
AssetInfo.name == name,
AssetInfo.owner_id == owner_id,
)
.limit(1)
)
).scalars().first()
if not existing:
raise RuntimeError("AssetInfo upsert failed to find existing row after conflict.")
return existing
# Uncomment next code, and remove code after it, once the hack with "metadata[filename" is not needed anymore
# if user_metadata is not None: