Refactor asset database: separate business logic from queries

Architecture changes:
- API Routes -> manager.py (thin adapter) -> services/ (business logic) -> queries/ (atomic DB ops)
- Services own session lifecycle via create_session()
- Queries accept Session as parameter, do single-table atomic operations

New app/assets/services/ layer:
- __init__.py - exports all service functions
- ingest.py - ingest_file_from_path(), register_existing_asset()
- asset_management.py - get_asset_detail(), update_asset_metadata(), delete_asset_reference(), set_asset_preview()
- tagging.py - apply_tags(), remove_tags(), list_tags()

Removed from queries/asset_info.py:
- ingest_fs_asset (moved to services/ingest.py as ingest_file_from_path)
- update_asset_info_full (moved to services/asset_management.py as update_asset_metadata)
- create_asset_info_for_existing_asset (moved to services/ingest.py as register_existing_asset)

Updated manager.py:
- Now a thin adapter that transforms API schemas to/from service calls
- Delegates all business logic to services layer
- No longer imports sqlalchemy.orm.Session or models directly

Test updates:
- Fixed test_cache_state.py import of pick_best_live_path (moved to helpers.py)
- Added comprehensive service layer tests (41 new tests)
- All 112 query + service tests pass

Amp-Thread-ID: https://ampcode.com/threads/T-019c24e2-7ae4-707f-ad19-c775ed8b82b5
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Luke Mino-Altherr
2026-02-03 11:08:57 -08:00
parent e708054f7d
commit 02117ae130
16 changed files with 1748 additions and 632 deletions

View File

@@ -2,10 +2,8 @@
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState
from app.assets.database.queries import (
list_cache_states_by_asset_id,
pick_best_live_path,
)
from app.assets.database.queries import list_cache_states_by_asset_id
from app.assets.helpers import pick_best_live_path
def _make_asset(session: Session, hash_val: str | None = None, size: int = 1024) -> Asset:

View File

@@ -0,0 +1 @@
# Service layer tests

View File

@@ -0,0 +1,49 @@
import os
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from app.assets.database.models import Base
@pytest.fixture
def db_engine():
"""In-memory SQLite engine for fast unit tests."""
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
return engine
@pytest.fixture
def session(db_engine):
"""Session fixture for tests that need direct DB access."""
with Session(db_engine) as sess:
yield sess
@pytest.fixture
def mock_create_session(db_engine):
"""Patch create_session to use our in-memory database."""
from contextlib import contextmanager
from sqlalchemy.orm import Session as SASession
@contextmanager
def _create_session():
with SASession(db_engine) as sess:
yield sess
with patch("app.assets.services.ingest.create_session", _create_session), \
patch("app.assets.services.asset_management.create_session", _create_session), \
patch("app.assets.services.tagging.create_session", _create_session):
yield _create_session
@pytest.fixture
def temp_dir():
"""Temporary directory for file operations."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)

View File

@@ -0,0 +1,264 @@
"""Tests for asset_management services."""
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, Tag
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
from app.assets.helpers import utcnow
from app.assets.services import (
get_asset_detail,
update_asset_metadata,
delete_asset_reference,
set_asset_preview,
)
def _make_asset(session: Session, hash_val: str = "blake3:test", size: int = 1024) -> Asset:
asset = Asset(hash=hash_val, size_bytes=size, mime_type="application/octet-stream")
session.add(asset)
session.flush()
return asset
def _make_asset_info(
session: Session,
asset: Asset,
name: str = "test",
owner_id: str = "",
) -> AssetInfo:
now = utcnow()
info = AssetInfo(
owner_id=owner_id,
name=name,
asset_id=asset.id,
created_at=now,
updated_at=now,
last_access_time=now,
)
session.add(info)
session.flush()
return info
class TestGetAssetDetail:
def test_returns_none_for_nonexistent(self, mock_create_session):
result = get_asset_detail(asset_info_id="nonexistent")
assert result is None
def test_returns_asset_with_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, name="test.bin")
ensure_tags_exist(session, ["alpha", "beta"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["alpha", "beta"])
session.commit()
result = get_asset_detail(asset_info_id=info.id)
assert result is not None
assert result["info"].id == info.id
assert result["asset"].id == asset.id
assert set(result["tags"]) == {"alpha", "beta"}
def test_respects_owner_visibility(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
session.commit()
# Wrong owner cannot see
result = get_asset_detail(asset_info_id=info.id, owner_id="user2")
assert result is None
# Correct owner can see
result = get_asset_detail(asset_info_id=info.id, owner_id="user1")
assert result is not None
class TestUpdateAssetMetadata:
def test_updates_name(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, name="old_name.bin")
info_id = info.id
session.commit()
update_asset_metadata(
asset_info_id=info_id,
name="new_name.bin",
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.name == "new_name.bin"
def test_updates_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ensure_tags_exist(session, ["old"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["old"])
session.commit()
result = update_asset_metadata(
asset_info_id=info.id,
tags=["new1", "new2"],
)
assert set(result["tags"]) == {"new1", "new2"}
assert "old" not in result["tags"]
def test_updates_user_metadata(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
info_id = info.id
session.commit()
update_asset_metadata(
asset_info_id=info_id,
user_metadata={"key": "value", "num": 42},
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.user_metadata["key"] == "value"
assert updated_info.user_metadata["num"] == 42
def test_raises_for_nonexistent(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
update_asset_metadata(asset_info_id="nonexistent", name="fail")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
update_asset_metadata(
asset_info_id=info.id,
name="new",
owner_id="user2",
)
class TestDeleteAssetReference:
def test_deletes_asset_info(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
info_id = info.id
session.commit()
result = delete_asset_reference(
asset_info_id=info_id,
owner_id="",
delete_content_if_orphan=False,
)
assert result is True
assert session.get(AssetInfo, info_id) is None
def test_returns_false_for_nonexistent(self, mock_create_session):
result = delete_asset_reference(
asset_info_id="nonexistent",
owner_id="",
)
assert result is False
def test_returns_false_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
info_id = info.id
session.commit()
result = delete_asset_reference(
asset_info_id=info_id,
owner_id="user2",
)
assert result is False
assert session.get(AssetInfo, info_id) is not None
def test_keeps_asset_if_other_infos_exist(self, mock_create_session, session: Session):
asset = _make_asset(session)
info1 = _make_asset_info(session, asset, name="info1")
info2 = _make_asset_info(session, asset, name="info2")
asset_id = asset.id
session.commit()
delete_asset_reference(
asset_info_id=info1.id,
owner_id="",
delete_content_if_orphan=True,
)
# Asset should still exist
assert session.get(Asset, asset_id) is not None
def test_deletes_orphaned_asset(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
asset_id = asset.id
info_id = info.id
session.commit()
delete_asset_reference(
asset_info_id=info_id,
owner_id="",
delete_content_if_orphan=True,
)
# Both info and asset should be gone
assert session.get(AssetInfo, info_id) is None
assert session.get(Asset, asset_id) is None
class TestSetAssetPreview:
def test_sets_preview(self, mock_create_session, session: Session):
asset = _make_asset(session, hash_val="blake3:main")
preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset)
info_id = info.id
preview_id = preview_asset.id
session.commit()
set_asset_preview(
asset_info_id=info_id,
preview_asset_id=preview_id,
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.preview_id == preview_id
def test_clears_preview(self, mock_create_session, session: Session):
asset = _make_asset(session)
preview_asset = _make_asset(session, hash_val="blake3:preview")
info = _make_asset_info(session, asset)
info.preview_id = preview_asset.id
info_id = info.id
session.commit()
set_asset_preview(
asset_info_id=info_id,
preview_asset_id=None,
)
# Verify by re-fetching from DB
session.expire_all()
updated_info = session.get(AssetInfo, info_id)
assert updated_info.preview_id is None
def test_raises_for_nonexistent_info(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
set_asset_preview(asset_info_id="nonexistent")
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
set_asset_preview(
asset_info_id=info.id,
preview_asset_id=None,
owner_id="user2",
)

View File

@@ -0,0 +1,228 @@
"""Tests for ingest services."""
import os
from pathlib import Path
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetCacheState, AssetInfo, Tag
from app.assets.database.queries import ensure_tags_exist, get_asset_tags
from app.assets.services import ingest_file_from_path, register_existing_asset
class TestIngestFileFromPath:
def test_creates_asset_and_cache_state(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "test_file.bin"
file_path.write_bytes(b"test content")
result = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:abc123",
size_bytes=12,
mtime_ns=1234567890000000000,
mime_type="application/octet-stream",
)
assert result["asset_created"] is True
assert result["state_created"] is True
assert result["asset_info_id"] is None # no info_name provided
# Verify DB state
assets = session.query(Asset).all()
assert len(assets) == 1
assert assets[0].hash == "blake3:abc123"
states = session.query(AssetCacheState).all()
assert len(states) == 1
assert states[0].file_path == str(file_path)
def test_creates_asset_info_when_name_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "model.safetensors"
file_path.write_bytes(b"model data")
result = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:def456",
size_bytes=10,
mtime_ns=1234567890000000000,
mime_type="application/octet-stream",
info_name="My Model",
owner_id="user1",
)
assert result["asset_created"] is True
assert result["asset_info_id"] is not None
info = session.query(AssetInfo).first()
assert info is not None
assert info.name == "My Model"
assert info.owner_id == "user1"
def test_creates_tags_when_provided(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "tagged.bin"
file_path.write_bytes(b"data")
result = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:ghi789",
size_bytes=4,
mtime_ns=1234567890000000000,
info_name="Tagged Asset",
tags=["models", "checkpoints"],
)
assert result["asset_info_id"] is not None
# Verify tags were created and linked
tags = session.query(Tag).all()
tag_names = {t.name for t in tags}
assert "models" in tag_names
assert "checkpoints" in tag_names
asset_tags = get_asset_tags(session, asset_info_id=result["asset_info_id"])
assert set(asset_tags) == {"models", "checkpoints"}
def test_idempotent_upsert(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "dup.bin"
file_path.write_bytes(b"content")
# First ingest
r1 = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:repeat",
size_bytes=7,
mtime_ns=1234567890000000000,
)
assert r1["asset_created"] is True
# Second ingest with same hash - should update, not create
r2 = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:repeat",
size_bytes=7,
mtime_ns=1234567890000000001, # different mtime
)
assert r2["asset_created"] is False
assert r2["state_updated"] is True or r2["state_created"] is False
# Still only one asset
assets = session.query(Asset).all()
assert len(assets) == 1
def test_validates_preview_id(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "with_preview.bin"
file_path.write_bytes(b"data")
# Create a preview asset first
preview_asset = Asset(hash="blake3:preview", size_bytes=100)
session.add(preview_asset)
session.commit()
preview_id = preview_asset.id
result = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:main",
size_bytes=4,
mtime_ns=1234567890000000000,
info_name="With Preview",
preview_id=preview_id,
)
assert result["asset_info_id"] is not None
info = session.query(AssetInfo).filter_by(id=result["asset_info_id"]).first()
assert info.preview_id == preview_id
def test_invalid_preview_id_is_cleared(self, mock_create_session, temp_dir: Path, session: Session):
file_path = temp_dir / "bad_preview.bin"
file_path.write_bytes(b"data")
result = ingest_file_from_path(
abs_path=str(file_path),
asset_hash="blake3:badpreview",
size_bytes=4,
mtime_ns=1234567890000000000,
info_name="Bad Preview",
preview_id="nonexistent-uuid",
)
assert result["asset_info_id"] is not None
info = session.query(AssetInfo).filter_by(id=result["asset_info_id"]).first()
assert info.preview_id is None
class TestRegisterExistingAsset:
def test_creates_info_for_existing_asset(self, mock_create_session, session: Session):
# Create existing asset
asset = Asset(hash="blake3:existing", size_bytes=1024, mime_type="image/png")
session.add(asset)
session.commit()
result = register_existing_asset(
asset_hash="blake3:existing",
name="Registered Asset",
user_metadata={"key": "value"},
tags=["models"],
)
assert result["created"] is True
assert "models" in result["tags"]
# Verify by re-fetching from DB
session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Registered Asset").all()
assert len(infos) == 1
def test_returns_existing_info(self, mock_create_session, session: Session):
# Create asset and info
asset = Asset(hash="blake3:withinfo", size_bytes=512)
session.add(asset)
session.flush()
from app.assets.helpers import utcnow
info = AssetInfo(
owner_id="",
name="Existing Info",
asset_id=asset.id,
created_at=utcnow(),
updated_at=utcnow(),
last_access_time=utcnow(),
)
session.add(info)
session.flush() # Flush to get the ID
info_id = info.id
session.commit()
result = register_existing_asset(
asset_hash="blake3:withinfo",
name="Existing Info",
owner_id="",
)
assert result["created"] is False
# Verify only one AssetInfo exists for this name
session.expire_all()
infos = session.query(AssetInfo).filter_by(name="Existing Info").all()
assert len(infos) == 1
assert infos[0].id == info_id
def test_raises_for_nonexistent_hash(self, mock_create_session):
with pytest.raises(ValueError, match="No asset with hash"):
register_existing_asset(
asset_hash="blake3:doesnotexist",
name="Fail",
)
def test_applies_tags_to_new_info(self, mock_create_session, session: Session):
asset = Asset(hash="blake3:tagged", size_bytes=256)
session.add(asset)
session.commit()
result = register_existing_asset(
asset_hash="blake3:tagged",
name="Tagged Info",
tags=["alpha", "beta"],
)
assert result["created"] is True
assert set(result["tags"]) == {"alpha", "beta"}

View File

@@ -0,0 +1,197 @@
"""Tests for tagging services."""
import pytest
from sqlalchemy.orm import Session
from app.assets.database.models import Asset, AssetInfo, Tag
from app.assets.database.queries import ensure_tags_exist, add_tags_to_asset_info
from app.assets.helpers import utcnow
from app.assets.services import apply_tags, remove_tags, list_tags
def _make_asset(session: Session, hash_val: str = "blake3:test") -> Asset:
asset = Asset(hash=hash_val, size_bytes=1024)
session.add(asset)
session.flush()
return asset
def _make_asset_info(
session: Session,
asset: Asset,
name: str = "test",
owner_id: str = "",
) -> AssetInfo:
now = utcnow()
info = AssetInfo(
owner_id=owner_id,
name=name,
asset_id=asset.id,
created_at=now,
updated_at=now,
last_access_time=now,
)
session.add(info)
session.flush()
return info
class TestApplyTags:
def test_adds_new_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
session.commit()
result = apply_tags(
asset_info_id=info.id,
tags=["alpha", "beta"],
)
assert set(result["added"]) == {"alpha", "beta"}
assert result["already_present"] == []
assert set(result["total_tags"]) == {"alpha", "beta"}
def test_reports_already_present(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ensure_tags_exist(session, ["existing"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["existing"])
session.commit()
result = apply_tags(
asset_info_id=info.id,
tags=["existing", "new"],
)
assert result["added"] == ["new"]
assert result["already_present"] == ["existing"]
def test_raises_for_nonexistent_info(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
apply_tags(asset_info_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
apply_tags(
asset_info_id=info.id,
tags=["new"],
owner_id="user2",
)
class TestRemoveTags:
def test_removes_tags(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ensure_tags_exist(session, ["a", "b", "c"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["a", "b", "c"])
session.commit()
result = remove_tags(
asset_info_id=info.id,
tags=["a", "b"],
)
assert set(result["removed"]) == {"a", "b"}
assert result["not_present"] == []
assert result["total_tags"] == ["c"]
def test_reports_not_present(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset)
ensure_tags_exist(session, ["present"])
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["present"])
session.commit()
result = remove_tags(
asset_info_id=info.id,
tags=["present", "absent"],
)
assert result["removed"] == ["present"]
assert result["not_present"] == ["absent"]
def test_raises_for_nonexistent_info(self, mock_create_session):
with pytest.raises(ValueError, match="not found"):
remove_tags(asset_info_id="nonexistent", tags=["x"])
def test_raises_for_wrong_owner(self, mock_create_session, session: Session):
asset = _make_asset(session)
info = _make_asset_info(session, asset, owner_id="user1")
session.commit()
with pytest.raises(PermissionError, match="not owner"):
remove_tags(
asset_info_id=info.id,
tags=["x"],
owner_id="user2",
)
class TestListTags:
def test_returns_tags_with_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session)
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
session.commit()
rows, total = list_tags()
tag_dict = {name: count for name, _, count in rows}
assert tag_dict["used"] == 1
assert tag_dict["unused"] == 0
assert total == 2
def test_excludes_zero_counts(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["used", "unused"])
asset = _make_asset(session)
info = _make_asset_info(session, asset)
add_tags_to_asset_info(session, asset_info_id=info.id, tags=["used"])
session.commit()
rows, total = list_tags(include_zero=False)
tag_names = {name for name, _, _ in rows}
assert "used" in tag_names
assert "unused" not in tag_names
def test_prefix_filter(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["alpha", "beta", "alphabet"])
session.commit()
rows, _ = list_tags(prefix="alph")
tag_names = {name for name, _, _ in rows}
assert tag_names == {"alpha", "alphabet"}
def test_order_by_name(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["zebra", "alpha", "middle"])
session.commit()
rows, _ = list_tags(order="name_asc")
names = [name for name, _, _ in rows]
assert names == ["alpha", "middle", "zebra"]
def test_pagination(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["a", "b", "c", "d", "e"])
session.commit()
rows, total = list_tags(limit=2, offset=1, order="name_asc")
assert total == 5
assert len(rows) == 2
names = [name for name, _, _ in rows]
assert names == ["b", "c"]
def test_clamps_limit(self, mock_create_session, session: Session):
ensure_tags_exist(session, ["a"])
session.commit()
# Service should clamp limit to max 1000
rows, _ = list_tags(limit=2000)
assert len(rows) <= 1000